$data['name'] ]; $isweb = Web::where($where)->first(); if(empty($isweb)){ date_default_timezone_set('Asia/Shanghai'); $web = Web::insert($data); }else{ return Result::error('此网站已存在,不可重复添加!'); } if(empty($web)){ return Result::error('添加失败'); } return Result::success('添加成功'); } /** * 获取并搜索网站 * @param array $data * @return array|mixed */ public function getWeb(array $data): array { if(isset($data['keyWord'])){ $where = [ ['name','like','%'.$data['keyWord'].'%'] ]; $rep = Web::where($where)->limit($data['pageSize'])->orderBy("updated_at","desc")->offset(($data['page']-1)*$data['pageSize'])->get(); $count = Web::where($where)->count(); if($count==0){ return Result::error('未查找到相关网站!'); } }else{ $rep = Web::limit($data['pageSize'])->orderBy("updated_at","desc")->offset(($data['page']-1)*$data['pageSize'])->get(); $count = Web::count(); } $data = [ 'rep' => $rep->toArray(), 'count' => $count ]; if(empty($rep)){ return Result::error('您还未添加网站,请先去添加!'); } return Result::success($data); } /** * 修改网站 * @param array $data * @return array|mixed */ public function upWeb(array $data): array { $web = Web::where('id',$data['id'])->first(); if(empty($web)){ return Result::error('请输入正确的网站id!'); }else{ date_default_timezone_set('Asia/Shanghai'); $id = Web::where('id',$data['id'])->update($data); if(empty($id)){ return Result::error('无法修改!'); } } return Result::success($id); } /** * 删除网站 * @param array $data * @return array|mixed */ public function delWeb(array $data): array { $web = Web::where('id',$data['id'])->first(); if(empty($web)){ return Result::error('请输入正确的网站id!'); }else{ $where = [ ['web_id','=',$data['id']] ]; //判断此网站下是否规则u任务 $rule = Rule::where($where)->get(); if(empty($rle)){ //若没有直接删除网站 $result['web'] = Web::where('id',$data['id'])->delete(); }else{ //若有,判断规则任务是否有已执行的 $rule = Rule::where($where)->where('status',2)->get(); // return Result::success($rule); if(!empty($rule->toArray())){ //若有已执行的任务规则,不可删除网站 return Result::error('该网站已有成功执行的任务规则,不可删除!'); }else{ try { Db::beginTransaction(); //若无已执行的任务规则,删除网站及相应的未执行的规则任务 $result['web'] = Web::where('id',$data['id'])->delete(); $result['rule'] = Rule::where($where)->delete(); Db::commit(); } catch(\Throwable $ex){ Db::rollBack(); var_dump($ex->getMessage()); return Result::error("删除失败",0); } } } } return Result::success($result); } /** * 添加任务规则 * @param array $data * @return array|mixed */ public function addRule(array $data): array { $web = Web::where('id',$data['web_id'])->get(); if(empty($web->toArray())){ return Result::error('请输入正确的网站id!'); }else{ $rulename = Rule::where('name',$data['name'])->get(); //查找是否存在规则名称重复的 if(empty($rulename->toArray())){ //(若是多类型参数一起传过来则根据类型,只获取对应类型需要的参数) switch($data['type']){ case 1: $rule = [ 'name' => $data['name'], 'web_id' => $data['web_id'], 'first_url' => $data['first_url'], 'second_start' => $data['second_start'], 'second_num' => $data['second_num'], 'second_end' => $data['second_end'], 'end_pagenum' => $data['end_pagenum'], 'start' => $data['start'], 'con_url' => $data['con_url'], 'title' => $data['title'], 'content' => $data['content'] ]; // var_dump("============1============"); break; case 2: $rule = [ 'name' => $data['name'], 'web_id' => $data['web_id'], 'first_url' => $data['first_url'], 'parameter' => $data['parameter'], 'start' => $data['start'], 'title' => $data['title'], 'content' => $data['content'] ]; // var_dump("============2============"); break; default: $rule = [ 'name' => $data['name'], 'web_id' => $data['web_id'], 'diy_rule' => $data['diy_rule'] ]; // var_dump("============3============"); break; } if(!empty($data['con_start']) && $data['type'] == 1){ $rule ['con_start'] = $data['con_start']; } if(!empty($data['source']) && $data['type'] != 3){ $rule ['source'] = $data['source']; } if(isset($data['writer_class']) && $data['type'] != 3){ $rule ['writer_class'] = $data['writer_class']; } if(isset($data['writer']) && $data['type'] != 3){ $rule ['writer'] = $data['writer']; } var_dump($data); date_default_timezone_set('Asia/Shanghai'); //若不存在,根据网站类型添加到不行类型的规则表中 $result = Rule::insertGetId($rule); }else{ return Result::error('此任务已存在!'); } } return Result::success($result); } /** * 获取并搜索规则任务 * @param array $data * @return array|mixed */ public function getRule(array $data): array { $where = []; if(isset($data['web_id'])){ $web = Web::where('id',$data['web_id'])->get(); if(empty($web->toArray())){ return Result::error('请输入正确的网站id!'); }else{ //若是根据网站跳转到的规则任务则存到$where数组中 $where = [ ['web_id','=', $data['web_id']] ]; } } if(isset($data['keyWord'])){ //若存在搜索词,则存到条件数组$where中 $where = [ ['name','like','%'.$data['keyWord'].'%'] ]; } if(empty($where)){ $rep = Rule::withCount(relations:'arts')->limit($data['pageSize'])->orderBy("updated_at","desc")->offset(($data['page']-1)*$data['pageSize'])->get(); }else{ $rep = Rule::withCount(relations:'arts')->where($where)->limit($data['pageSize'])->orderBy("updated_at","desc")->offset(($data['page']-1)*$data['pageSize'])->get(); } $count = Rule::where($where)->count(); if($count==0){ return Result::error('暂无相关规则任务!'); } $data = [ 'rep' => $rep->toArray(), 'count' => $count ]; return Result::success($data); } /** * 获取某个任务规则 * @param array $data * @return array|mixed */ public function getOneRule(array $data): array { $result = Rule::where('id',$data['id'])->first(); if(empty($result)){ return Result::error('请输入正确的规则任务id!'); }else{ return Result::success($result); } } /** * 删除规则任务 * @param array $data * @return array */ public function delRule(array $data): array { $where = ['id' => $data['rule_id']]; $rule = Rule::where($where)->first(); if(empty($rule)){ return Result::error('请输入正确的规则任务id!'); }else{ //查找是否存在已导入的文章 $art_num = Article::where('rule_id',$data['rule_id'])->where('state',1)->count(); if($art_num==0){ //查找是否存在已采集但是未导入的文章 $colart_num = Article::where('rule_id',$data['rule_id'])->where('state',0)->count(); if($colart_num==0){ $result['rule'] = Rule::where($where)->delete(); }else{ try { Db::beginTransaction(); //若有已采集但未导入的文章,删除规则任务及相应的未导入的文章 $result['rule'] = Rule::where($where)->delete(); $result['art'] = Article::where('rule_id',$data['rule_id'])->delete(); Db::commit(); } catch(\Throwable $ex){ Db::rollBack(); var_dump($ex->getMessage()); return Result::error("删除失败",0); } } }else{ return Result::error('此规则任务下的文章已导入,不可删除!'); } } return Result::success($result); } /** * 开始采集 * @param array $data * @return array */ public function sendCrawler(array $data): array { var_dump("接收到的数据:",$data); $message = new GatherProducer($data); $producer = ContextApplicationContext::getContainer()->get(Producer::class); $a = $producer->produce($message); var_dump("生产者:",$a); // $result = $this->Gservice->push($data,rand(5,20)); return Result::success([]); } /** * 修改规则任务 * @param array $data * @return array|mixed */ public function upRule(array $data): array { $rule = Rule::where('id',$data['id'])->select('id')->first(); unset($data['type']); if(empty($rule)){ return Result::error('请输入正确的规则任务id!'); }else{ $rulename = Rule::where('id','!=',$rule['id'])->where('name',$data['name'])->select('name')->first(); if(empty($rulename)){ $result = Rule::where('id',$data['id'])->update($data); }else{ return Result::error('已存在此任务规则名称!'); } } return Result::success($result); } /** * 删除规则任务 * @param array $data * @return array */ public function goCrawler(array $data): array { //通过规则id 查询规则类型 $where = [ 'rule.id'=>$data['id'] ]; $info = Rule::where($where)->leftJoin('web','rule.web_id','web.id') ->select("rule.*","web.name as web_name","web.url as web_url","web.type as web_type") ->first(); $info = $info->toArray(); var_dump("规则信息:",$info); switch ($info['web_type']){ case 1: var_dump("===========规则采集======",$info); try { Rule::where(['id'=>$data['id']])->update(['status'=>1]); //添加几个值 $info['admin_user_id'] = $data['admin_user_id']; $info['rule_id'] = $data['id']; $info['copyfrom'] = $info['web_name']; $info['author'] = $info['writer']; // var_dump("++++++++++++++++++"); $urlList = $this->addUrlArr($info); // var_dump("采集列表:",$urlList); if($urlList){ foreach ($urlList as $val){ $this->ruleCollection($val,$info); } } Rule::where(['id'=>$data['id']])->update(['status'=>2]); }catch (\Exception $e){ var_dump("采集失败报错:",$e->getMessage()); Rule::where(['id'=>$data['id']])->update(['status'=>2]); } break; case 2: Rule::where(['id'=>$data['id']])->update(['status'=>1]); $wecUrl = $info['first_url'];//'https://www.ndcpa.gov.cn/queryList'; $parames = json_decode($info['parameter'],true); // var_dump($parames);die; $parames['webSiteCode'] = [trim($parames['webSiteCode'], "[]")]; //['jbkzzx'];// $parames['channelCode'] = [trim($parames['channelCode'], "[]")]; // ['c100008'];// $other = [ 'web_url'=>$info['web_url'], 'copyfrom'=>$info['web_name'], 'admin_user_id'=>$data['admin_user_id'], 'rule_id'=>$data['id'], 'writer'=>$info['writer'], ]; var_dump("=======开始接口采集====",$parames); // die; $this->foreachCurl($wecUrl,$parames,$other); Rule::where(['id'=>$data['id']])->update(['status'=>2]); } return Result::success([]); } /** * 把可采集的列表页连接 打包成一个大数组 * @return void */ public function addUrlArr($data) { $arrList = []; array_push($arrList,$data['first_url']); $exit = false; $i = $data['second_num']-1; while(!$exit){ $i++; $url = $data['second_start'].$i.$data['second_end']; $respon1 = Result::pageExists($url); // Coroutine::sleep(2); if ($i>intval($data['end_pagenum']) || intval($data['end_pagenum'])-1==0) { $exit = true; // Coroutine::exit(); // 退出循环 }else{ array_push($arrList,$url); } } return $arrList; } /** * 按照规则采集数据 * @return void */ public function ruleCollection($url,$info) { // var_dump("采集参数:",$url,$info['start']); $list = QueryList::get($url); $dataList = $list->rules([ 'title' => ['a:eq(0)', 'text'], 'link' => ['a:eq(0)', 'href'], ])->range($info['start'])->query()->getData(); var_dump("采集的列表:",$dataList); $firstUrlArr = explode("/", $url); array_pop($firstUrlArr); $firstUrlArr = implode('/',$firstUrlArr); $dataList = $dataList->toArray(); if($dataList){ foreach ($dataList as $tiem){ //检测采集的url是否存在网站域名 。存在就继续,不存在就检测是否是三方跳转 $newUrlStr = $tiem['link']; if (strpos($tiem['link'], $info['web_url']) === false) { $array = ['http','https']; $link = $tiem['link']; $found = array_filter($array, function($item) use ($link) { return str_contains($link, $item); }); if(count($found)>0){ continue; } $newUrlStr = $info['con_url'].$tiem['link']; } var_dump("详情地址:",$newUrlStr); $detailContent = QueryList::get($newUrlStr); $rules = []; if($info['title']){ $rules['title'] = [$info['title'],'text']; } if($info['content']){ $rules['content'] = [$info['content'],'html']; } //详情页范围 $detailRange = $info['con_start']??''; var_dump("打印规则:",$rules,"详情起始:", $info['con_start']); $detailData = $detailContent->rules($rules)->range($detailRange)->query()->getData(); $detailData = $detailData->toArray(); var_dump("内容详情:",$detailData,$newUrlStr); if($detailData){ foreach ($detailData as $val){ // var_dump("进没进foreach:",$newUrlStr,$val); $data = []; $data['fromurl'] = $newUrlStr; $data['title'] = $val['title']; $data['content'] = $val['content']; $data['newUrlStr'] = $newUrlStr; $data['introduce'] = $val['title']??''; $data['keyword'] = $val['title']??''; $data['copyfrom'] = $info['copyfrom']; $data['source'] = $info['source']??$info['copyfrom']; $data['admin_user_id'] = $info['admin_user_id']??''; $data['rule_id'] = $info['rule_id']??''; $data['author'] = $info['author']??''; $this->insertArticleData($data); } } } } } /** * 插入数据 * @param $data * @return void */ public function insertArticleData($data=[]) { if($data){ Db::beginTransaction(); try{ $articleInfo = Article::where(['title'=>$data['title']])->first(); // var_dump("获取详情:",$articleInfo,$data); if(empty($articleInfo)){ $insertData = []; $insertData['fromurl'] =$data['newUrlStr']; $insertData['oldtitle'] =$data['title']; $insertData['title'] = $data['title']; $insertData['copyfrom'] = $data['copyfrom']; $insertData['author'] = $data['author']; $insertData['introduce'] = $data['title']; $insertData['keyword'] = $data['title']; $insertData['source'] = isset($data['source']) && $data['source']!=''? $data['source']:$data['copyfrom']; $insertData['admin_user_id'] = $data['admin_user_id']; $insertData['rule_id'] = $data['rule_id']; // var_dump("插入Article:",$insertData); $article_id = Article::insertGetId($insertData); $insertDataDetail = []; $insertDataDetail['article_id'] = $article_id; $insertDataDetail['content'] = $data['content']; // var_dump("插入ArticleData:",$insertDataDetail); ArticleData::insertGetId($insertDataDetail); // Coroutine::sleep(2); // var_dump("插入成功一次:",$article_id,$insertDataDetail); } Db::commit(); }catch (\Exception $e){ Db::rollBack(); var_dump("插入失败:",$e->getMessage()); } }else{ var_dump("没有数据可以插入:"); } } /** * 分页采集 * @return void */ public function foreachCurl($wecUrl,$parames,$other,&$page=1) { $options = [ CURLOPT_HEADER => true, // 如果想包含头部信息在响应中,可以设置为true CURLOPT_TIMEOUT => 30 // 设置请求超时时间为30秒 ]; $result = Result::http_post($wecUrl,$parames,$options); $result = json_decode($result['response'],true); // var_dump("获取数据:",$result); if($result['data'] && $result['data']['results']){ $dataList = $result['data']['results']; // var_dump("取数据结构体:",$dataList); foreach ($dataList as $val){ // var_dump("进入循环插入:",$val); $newUrlStr = json_decode($val['source']['urls'],true); $newUrlStr = $other['web_url'].$newUrlStr['common']; // var_dump("来源地址:",$newUrlStr); $insertData = [ 'newUrlStr'=>$newUrlStr, 'title'=>$val['source']['title']??'', 'source'=>$val['source']['contentSource']??'', 'copyfrom'=>$other['copyfrom']??'', 'content'=>$val['source']['content']['content']??'', 'admin_user_id'=>$other['admin_user_id']??'', 'rule_id'=>$other['rule_id']??'', 'author'=>$other['writer']??'' ]; // var_dump("调用插入数据方法,组装数据:",$insertData); $this->insertArticleData($insertData); } } $pages = intval($parames['current']); $pages = $pages+1; $parames['current'] = $pages; $twoResult = Result::http_post($wecUrl,$parames,$options); if($result['data'] && $result['data']['results'] && count($result['data']['results'])>0){ // var_dump("分页测试:",$parames,$parames['current']); $this->foreachCurl($wecUrl,$parames,$other,$pages); } // var_dump("正确的数据:",$result); } /** * 获取并搜索资讯 * @param array $data * @return array */ public function getInfo(array $data): array { $where = [ ['rule_id','=',$data['rule_id']] ]; //若存在条件参数都存到where数组 if(isset($data['title']) && !empty($data['title'])){ array_push($where,['title','like','%'.$data['title'].'%']); } if(isset($data['source']) && !empty($data['source'])){ // $art_source = Article::where($where)->get(); // if(!empty($art_source->toArray())){ array_push($where,['copyfrom','like','%'.$data['source'].'%']); // } } if(isset($data['state']) && $data['state']!=''){ array_push($where,['state',$data['state']]); } //跨库查询栏目导航及采集的新闻 $info = Article::query() ->where($where) ->with('category') ->orderBy("article.updated_at","desc") ->limit($data['pageSize']) ->offset(($data['page']-1)*$data['pageSize'])->get(); $count = Article::where($where)->count(); if($count == 0){ return Result::error('暂无资讯'); } $result = [ 'rep' => $info->toArray(), 'count' => $count ]; return Result::success($result); } /** * 获取某个资讯 * @param array $data * @return array */ public function getOneInfo(array $data): array { $where = ['id' => $data['art_id']]; $inf = Article::where($where)->first(); if($inf==null){ return Result::error('请输入正确的资讯id!'); } $info = Article::where($where) ->leftJoin('article_data','article_id','id') ->select('article.*','article_data.content') ->first(); if($inf['catid']!=null){ $category = Category::where(['id'=>$info['catid']])->select('name')->first(); $info['category'] = $category['name']; } return Result::success($info); } /** * 修改资讯 * @param array $data * @return array */ public function upInfo(array $data): array { $id = $data['art_id']; $content = $data['content']; unset($data['art_id']); //去掉此元素 unset($data['content']); //去掉此元素 $info = Article::where('id',$id)->first(); if($info==null){ return Result::error('请输入正确的文章id!'); } if($info['state']==1){ return Result::error('此文章已导入 ,不可编辑!'); }else{ Db::beginTransaction(); try{ $info = Article::where('id',$id)->update($data); $art_data = ArticleData::where('article_id',$id)->update(['content'=>$content]); Db::commit(); } catch(\Throwable $ex){ Db::rollBack(); var_dump($ex->getMessage()); return Result::error("修改失败",0); } $data = [ 'info' => $info, 'art_data' => $art_data ]; return Result::success($data); } } /** * 删除资讯 * @param array $data * @return array */ public function delInfo(array $data): array { $id = $data['art_id']; $info = Article::where('id',$id)->first(); if($info==null){ return Result::error('请输入正确的文章id!'); } if($info['state']==1){ return Result::error('此文章已导入,不可删除!'); }else{ Db::beginTransaction(); try{ $delinfo = Article::where('id',$id)->delete(); $deldata = ArticleData::where('article_id',$id)->delete(); Db::commit(); } catch(\Throwable $ex){ Db::rollBack(); var_dump($ex->getMessage()); return Result::error("删除失败",0); } } $data = [ 'delinfo' => $delinfo, 'deldata' => $deldata ]; return Result::success($data); } /** * 关联导航池 * @param array $data * @return array */ public function addCatid(array $data): array { $id = $data['rule_id']; $art = Article::where('rule_id',$id)->select('id')->count(); if($art==0){ return Result::error('还未采集,请采集'); }else{ $info = Article::where('rule_id',$id)->where('state',0)->select('id')->get(); if(empty($info->toArray())){ return Result::error('所有文章都已导入,不可修改关联的导航池!'); }else{ //查找此规则任务下的文章是否已经有导入的文章 $article = Article::where('rule_id',$id)->where('state',1)->select('id')->get(); if(!empty($article->toArray())){ //查询已导入的文章的导航id $catid = Article::whereIn('id',$article)->select('catid')->first(); $cat_arr_id = Article::whereIn('id',$article)->select('cat_arr_id')->first(); //查询未导入的文章id $art_catid = Article::whereIn('id',$info)->whereNull('catid')->count(); if($art_catid>0){ $catid = isset($catid['catid'])?$catid['catid']:''; // var_dump("更新数据111:",$catid,$cat_arr_id); $result = Article::whereIn('id',$info)->update(['catid'=>$catid,'cat_arr_id'=>$cat_arr_id['cat_arr_id']]); // var_dump("更新数据111:",$result); }else{ // var_dump("已全部关联导航池请勿重复关联"); return Result::error('已全部关联导航池请勿重复关联'); } }else{ //若不存在已导入的文章则判断是否存在导航id if(isset($data['cat_arr_id'])){ $catid = isset($data['cat_arr_id'])?end($data['cat_arr_id']):''; $cat_arr_id = isset($data['cat_arr_id'])?json_encode($data['cat_arr_id']):''; //若存在直接使用此导航id $result = Article::whereIn('id',$info)->update(['catid'=>$catid,'cat_arr_id'=>$cat_arr_id]); var_dump("55555555555555555",$result); }else{ //若不存在则返回所有导航栏目 $result = Category::select('id','name')->get(); if(!empty($result)){ return Result::success($result); }else{ return Result::error('暂无数据'); } } } } } if(empty($result)){ return Result::error('暂无数据'); }else{ return Result::success($result); } // return Result::success($result); } /** * 导入文章(生产者) * @param array $data * @return array */ public function addArt(array $data): array { var_dump("接收到的数据:",$data); $message = new ImportProducer($data); $producer = ContextApplicationContext::getContainer()->get(Producer::class); $a = $producer->produce($message); var_dump("生产者:",$a); // $result = $this->Gservice->push($data,rand(5,20)); return Result::success([]); } /** * 导入文章(消费者) * @param array $data * @return array */ public function goAddArt(array $data): array { // var_dump('准备去消费------',$data); // var_dump("======@@@===="); $where = [ 'rule_id' => $data['rule_id'], 'state' => 0 ]; //获取某个规则任务下的已采集未导入的文章及文章详情 $arts_id = Article::where($where)->wherenotNull('catid')->select('id')->orderBy('id')->get()->toArray(); $arts = Article::where($where)->wherenotNull('catid')->select('title','catid','level','introduce','keyword','author','copyfrom','fromurl','hits','islink','imgurl','admin_user_id','is_original','cat_arr_id')->orderBy('id')->get()->toArray(); // var_dump('=============:::',$arts_id); $arts_data = ArticleData::whereIn('article_id',$arts_id)->select('content')->orderBy('article_id','desc')->get()->toArray(); // var_dump('=============',$arts); $data = [ 'articles' => $arts, 'art_content' => $arts_data ]; Db::beginTransaction(); try{ $oldart = OldArticle::insert($arts); $oldart_data = OldArticleData::insert($arts_data); $upstate_art = Article::where($where)->wherenotNull('catid')->update(['state' => 1]); Db::commit(); } catch(\Throwable $ex){ Db::rollBack(); var_dump($ex->getMessage()); return Result::error($ex->getMessage(),0); } return Result::success($data); } }