$data['name'] ]; $isweb = Web::where($where)->first(); if(empty($isweb)){ date_default_timezone_set('Asia/Shanghai'); $time = time(); $catetime = date('Y-m-d H:i:s', $time); $data['created_at'] = $catetime; $web = Web::insert($data); }else{ return Result::error('此网站已存在,不可重复添加!'); } if(empty($web)){ return Result::error('添加失败'); } return Result::success('添加成功'); } /** * 获取并搜索网站 * @param array $data * @return array|mixed */ public function getWeb(array $data): array { if(isset($data['keyWord'])){ $where = [ ['name','like','%'.$data['keyWord'].'%'] ]; $webss = Web::where($where)->first(); if(empty($webss)){ return Result::error('未查找到相关网站!'); } }else{ $web = Web::get(); } if(empty($web)){ return Result::error('您还未添加网站,请先去添加!'); } return Result::success($web); } /** * 修改网站 * @param array $data * @return array|mixed */ public function upWeb(array $data): array { $web = Web::where('id',$data['id'])->first(); if(empty($web)){ return Result::error('请输入正确的网站id!'); }else{ $id = Web::where('id',$data['id'])->update($data); if(empty($id)){ return Result::error('无法修改!'); } } return Result::success($id); } /** * 删除网站 * @param array $data * @return array|mixed */ public function delWeb(array $data): array { $web = Web::where('id',$data['id'])->first(); if(empty($web)){ return Result::error('请输入正确的网站id!'); }else{ $id = Web::where('id',$data['id'])->delete(); if(empty($id)){ return Result::error('无法删除!'); } } return Result::success($id); } /** * @param array $data * @return array */ public function sendCrawler(array $data): array { //通过规则id 查询规则类型 $where = [ 'rule.id'=>$data['id'] ]; $info = Rule::where($where)->leftJoin('web','rule.web_id','web.id') ->select("rule.*","web.name as web_name","web.url as web_url","web.type as web_type") ->first(); $info = $info->toArray(); switch ($info['web_type']){ case 1: var_dump("wojinlailaile======",$info); Rule::where(['id'=>$data['id']])->update(['status'=>1]); $data['copyfrom'] = $info['web_name']; $data['author'] = '刘德华'; $data['first_url'] = $info['first_url']; $data['second_start'] = $info['second_start']; $data['second_num'] = $info['second_num']; $data['second_end'] = $info['second_end']; $data['end_pagenum']= $info['end_pagenum']; $data['rule_id']= $data['id']; $data['admin_user_id']= $data['admin_user_id']; // $data['newUrlStr'] = $urlList = $this->addUrlArr($data); if($urlList){ foreach ($urlList as $val){ var_dump("单列表地址:",$val); $this->ruleCollection($val,$data); } } Rule::where(['id'=>$data['id']])->update(['status'=>2]); break; case 2: $wecUrl = $info['first_url'];//'https://www.ndcpa.gov.cn/queryList'; $parames = json_decode($info['parameter'],true); $parames['webSiteCode'] = [trim($parames['webSiteCode'], "[]")]; //['jbkzzx'];// $parames['channelCode'] = [trim($parames['channelCode'], "[]")]; // ['c100008'];// $other = [ 'web_url'=>$info['web_url'], 'copyfrom'=>$info['web_name'], 'admin_user_id'=>$data['admin_user_id'], 'rule_id'=>$data['id'] ]; var_dump("开始调用接口方法====",$parames); // die; $this->foreachCurl($wecUrl,$parames,$other); } return Result::success([]); } /** * 把可采集的列表页连接 打包成一个大数组 * @return void */ public function addUrlArr($data) { $arrList = []; array_push($arrList,$data['first_url']); $exit = false; $i = 0; while(!$exit){ $i++; $url = $data['second_start'].$i.$data['second_end']; $respon1 = Result::pageExists($url); var_dump("采集地址:",$respon1,$url); // Coroutine::sleep(2); if ($i==intval($data['end_pagenum'])-1) { $exit = true; // Coroutine::exit(); // 退出循环 }else{ array_push($arrList,$url); } } return $arrList; } /** * 按照规则采集数据 * @return void */ public function ruleCollection($url,$data) { var_dump("采集参数:",$data); $list = QueryList::get($url); $dataList = $list->rules([ 'title' => ['a', 'text'], 'link' => ['a', 'href'], ])->range('.list1 li')->query()->getData(); var_dump("采集的内容:",$dataList); // var_dump("====",$dataList);die; $firstUrlArr = explode("/", $url); array_pop($firstUrlArr); $firstUrlArr = implode('/',$firstUrlArr); $dataList = $dataList->toArray(); if($dataList){ foreach ($dataList as $tiem){ $newUrl = substr($tiem['link'], 1); $newUrlStr = $firstUrlArr.$newUrl; $detailContent = QueryList::get($newUrlStr); $detailData = $detailContent->rules([ 'title'=>['h1','text'], 'content'=>['.TRS_UEDITOR','html'], ])->range(".news-details")->query()->getData(); $detailData = $detailData->toArray(); var_dump("内容详情:",$detailData,$newUrlStr); if($detailData){ foreach ($detailData as $val){ var_dump("进没进foreach:",$newUrlStr,$val); $data['fromurl'] = $newUrlStr; $data['title'] = $val['title']; $data['content'] = $val['content']; $data['newUrlStr'] = $newUrlStr; $data['source'] = ''; $data['introduce'] = $val['title']??''; $data['keyword'] = $val['title']??''; $data['copyfrom'] = $data['copyfrom']; $data['source'] = $data['source']??$data['copyfrom']; $data['admin_user_id'] = $data['admin_user_id']??''; $data['rule_id'] = $data['rule_id']??''; // $data['copyfrom'] = $data['copyfrom']; var_dump("要插入的数据:",$data); $this->insertArticleData($data); } } } // } } /** * 插入数据 * @param $data * @return void */ public function insertArticleData($data=[]) { if($data){ Db::beginTransaction(); try{ $articleInfo = Article::where(['title'=>$data['title']])->first(); var_dump("获取详情:",$articleInfo,$data); if(empty($articleInfo)){ $insertData = []; $insertData['fromurl'] =$data['newUrlStr']; $insertData['oldtitle'] =$data['title']; $insertData['title'] = $data['title']; $insertData['copyfrom'] = $data['copyfrom']; $insertData['author'] = $data['author']; $insertData['introduce'] = $data['title']; $insertData['keyword'] = $data['title']; $insertData['source'] = isset($data['source']) && $data['source']!=''? $data['source']:$data['copyfrom']; $insertData['admin_user_id'] = $data['admin_user_id']; $insertData['rule_id'] = $data['rule_id']; // var_dump("插入Article:",$insertData); $article_id = Article::insertGetId($insertData); $insertDataDetail = []; $insertDataDetail['article_id'] = $article_id; $insertDataDetail['content'] = $data['content']; var_dump("插入ArticleData:",$insertDataDetail); ArticleData::insertGetId($insertDataDetail); // Coroutine::sleep(2); // var_dump("插入成功一次:",$article_id,$insertDataDetail); } Db::commit(); }catch (\Exception $e){ Db::rollBack(); var_dump("插入失败:",$e->getMessage()); } }else{ var_dump("没有数据可以插入:"); } } /** * 分页采集 * @return void */ public function foreachCurl($wecUrl,$parames,$other,&$page=1) { $options = [ CURLOPT_HEADER => true, // 如果想包含头部信息在响应中,可以设置为true CURLOPT_TIMEOUT => 30 // 设置请求超时时间为30秒 ]; $result = Result::http_post($wecUrl,$parames,$options); $result = json_decode($result['response'],true); var_dump("获取数据:",$result); if($result['data'] && $result['data']['results']){ $dataList = $result['data']['results']; // var_dump("取数据结构体:",$dataList); foreach ($dataList as $val){ // var_dump("进入循环插入:",$val); $newUrlStr = json_decode($val['source']['urls'],true); $newUrlStr = $other['web_url'].$newUrlStr['common']; // var_dump("来源地址:",$newUrlStr); $insertData = [ 'newUrlStr'=>$newUrlStr, 'title'=>$val['source']['title']??'', 'source'=>$val['source']['contentSource']??'', 'copyfrom'=>$other['copyfrom']??'', 'content'=>$val['source']['content']['content']??'', 'admin_user_id'=>$other['admin_user_id']??'', 'rule_id'=>$other['rule_id']??'', 'author'=>'冯蕊' ]; // var_dump("调用插入数据方法,组装数据:",$insertData); $this->insertArticleData($insertData); } } $pages = intval($parames['current']); $pages = $pages+1; $parames['current'] = $pages; $twoResult = Result::http_post($wecUrl,$parames,$options); if($result['data'] && $result['data']['results'] && count($result['data']['results'])>0){ var_dump("分页测试:",$parames,$parames['current']); $this->foreachCurl($wecUrl,$parames,$other,$pages); } // var_dump("正确的数据:",$result); } }