|
@@ -7,16 +7,20 @@ use App\Model\Article;
|
|
use App\Model\Rule;
|
|
use App\Model\Rule;
|
|
use App\Model\Web;
|
|
use App\Model\Web;
|
|
use Hyperf\DbConnection\Db;
|
|
use Hyperf\DbConnection\Db;
|
|
|
|
+use Hyperf\Di\Annotation\Inject;
|
|
use Hyperf\RpcServer\Annotation\RpcService;
|
|
use Hyperf\RpcServer\Annotation\RpcService;
|
|
use App\Tools\Result;
|
|
use App\Tools\Result;
|
|
use QL\QueryList;
|
|
use QL\QueryList;
|
|
use Swoole\Coroutine;
|
|
use Swoole\Coroutine;
|
|
|
|
+use App\Service\GatherQueueService;
|
|
|
|
+
|
|
|
|
|
|
|
|
|
|
#[RpcService(name: "CollectorService", protocol: "jsonrpc-http", server: "jsonrpc-http")]
|
|
#[RpcService(name: "CollectorService", protocol: "jsonrpc-http", server: "jsonrpc-http")]
|
|
class CollectorService implements CollectorServiceInterface
|
|
class CollectorService implements CollectorServiceInterface
|
|
{
|
|
{
|
|
-
|
|
|
|
|
|
+ #[Inject]
|
|
|
|
+ protected GatherQueueService $Gservice;
|
|
/**
|
|
/**
|
|
* 添加网站
|
|
* 添加网站
|
|
* @param array $data
|
|
* @param array $data
|
|
@@ -108,11 +112,23 @@ class CollectorService implements CollectorServiceInterface
|
|
}
|
|
}
|
|
return Result::success($id);
|
|
return Result::success($id);
|
|
}
|
|
}
|
|
|
|
+
|
|
/**
|
|
/**
|
|
|
|
+ * 发送数据
|
|
* @param array $data
|
|
* @param array $data
|
|
* @return array
|
|
* @return array
|
|
*/
|
|
*/
|
|
public function sendCrawler(array $data): array
|
|
public function sendCrawler(array $data): array
|
|
|
|
+ {
|
|
|
|
+ $result = $this->Gservice->push($data,rand(5,20));
|
|
|
|
+ return Result::success([$result]);
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ /**
|
|
|
|
+ * @param array $data
|
|
|
|
+ * @return array
|
|
|
|
+ */
|
|
|
|
+ public function goCrawler(array $data): array
|
|
{
|
|
{
|
|
//通过规则id 查询规则类型
|
|
//通过规则id 查询规则类型
|
|
$where = [
|
|
$where = [
|
|
@@ -125,10 +141,10 @@ class CollectorService implements CollectorServiceInterface
|
|
|
|
|
|
switch ($info['web_type']){
|
|
switch ($info['web_type']){
|
|
case 1:
|
|
case 1:
|
|
- var_dump("wojinlailaile======",$info);
|
|
|
|
|
|
+ var_dump("===========规则采集======",$info);
|
|
Rule::where(['id'=>$data['id']])->update(['status'=>1]);
|
|
Rule::where(['id'=>$data['id']])->update(['status'=>1]);
|
|
$data['copyfrom'] = $info['web_name'];
|
|
$data['copyfrom'] = $info['web_name'];
|
|
- $data['author'] = '刘德华';
|
|
|
|
|
|
+ $data['author'] = $info['writer'];;
|
|
$data['first_url'] = $info['first_url'];
|
|
$data['first_url'] = $info['first_url'];
|
|
$data['second_start'] = $info['second_start'];
|
|
$data['second_start'] = $info['second_start'];
|
|
$data['second_num'] = $info['second_num'];
|
|
$data['second_num'] = $info['second_num'];
|
|
@@ -140,7 +156,7 @@ class CollectorService implements CollectorServiceInterface
|
|
$urlList = $this->addUrlArr($data);
|
|
$urlList = $this->addUrlArr($data);
|
|
if($urlList){
|
|
if($urlList){
|
|
foreach ($urlList as $val){
|
|
foreach ($urlList as $val){
|
|
- var_dump("单列表地址:",$val);
|
|
|
|
|
|
+// var_dump("单列表地址:",$val);
|
|
$this->ruleCollection($val,$data);
|
|
$this->ruleCollection($val,$data);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
@@ -158,9 +174,10 @@ class CollectorService implements CollectorServiceInterface
|
|
'web_url'=>$info['web_url'],
|
|
'web_url'=>$info['web_url'],
|
|
'copyfrom'=>$info['web_name'],
|
|
'copyfrom'=>$info['web_name'],
|
|
'admin_user_id'=>$data['admin_user_id'],
|
|
'admin_user_id'=>$data['admin_user_id'],
|
|
- 'rule_id'=>$data['id']
|
|
|
|
|
|
+ 'rule_id'=>$data['id'],
|
|
|
|
+ 'writer'=>$info['writer'],
|
|
];
|
|
];
|
|
- var_dump("开始调用接口方法====",$parames);
|
|
|
|
|
|
+ var_dump("=======开始接口采集====",$parames);
|
|
// die;
|
|
// die;
|
|
$this->foreachCurl($wecUrl,$parames,$other);
|
|
$this->foreachCurl($wecUrl,$parames,$other);
|
|
Rule::where(['id'=>$data['id']])->update(['status'=>2]);
|
|
Rule::where(['id'=>$data['id']])->update(['status'=>2]);
|
|
@@ -183,7 +200,7 @@ class CollectorService implements CollectorServiceInterface
|
|
$i++;
|
|
$i++;
|
|
$url = $data['second_start'].$i.$data['second_end'];
|
|
$url = $data['second_start'].$i.$data['second_end'];
|
|
$respon1 = Result::pageExists($url);
|
|
$respon1 = Result::pageExists($url);
|
|
- var_dump("采集地址:",$respon1,$url);
|
|
|
|
|
|
+// var_dump("采集地址:",$respon1,$url);
|
|
// Coroutine::sleep(2);
|
|
// Coroutine::sleep(2);
|
|
if ($i==intval($data['end_pagenum'])-1) {
|
|
if ($i==intval($data['end_pagenum'])-1) {
|
|
$exit = true;
|
|
$exit = true;
|
|
@@ -203,13 +220,13 @@ class CollectorService implements CollectorServiceInterface
|
|
*/
|
|
*/
|
|
public function ruleCollection($url,$data)
|
|
public function ruleCollection($url,$data)
|
|
{
|
|
{
|
|
- var_dump("采集参数:",$data);
|
|
|
|
|
|
+// var_dump("采集参数:",$data);
|
|
$list = QueryList::get($url);
|
|
$list = QueryList::get($url);
|
|
$dataList = $list->rules([
|
|
$dataList = $list->rules([
|
|
'title' => ['a', 'text'],
|
|
'title' => ['a', 'text'],
|
|
'link' => ['a', 'href'],
|
|
'link' => ['a', 'href'],
|
|
])->range('.list1 li')->query()->getData();
|
|
])->range('.list1 li')->query()->getData();
|
|
- var_dump("采集的内容:",$dataList);
|
|
|
|
|
|
+// var_dump("采集的内容:",$dataList);
|
|
// var_dump("====",$dataList);die;
|
|
// var_dump("====",$dataList);die;
|
|
$firstUrlArr = explode("/", $url);
|
|
$firstUrlArr = explode("/", $url);
|
|
array_pop($firstUrlArr);
|
|
array_pop($firstUrlArr);
|
|
@@ -227,10 +244,10 @@ class CollectorService implements CollectorServiceInterface
|
|
])->range(".news-details")->query()->getData();
|
|
])->range(".news-details")->query()->getData();
|
|
|
|
|
|
$detailData = $detailData->toArray();
|
|
$detailData = $detailData->toArray();
|
|
- var_dump("内容详情:",$detailData,$newUrlStr);
|
|
|
|
|
|
+// var_dump("内容详情:",$detailData,$newUrlStr);
|
|
if($detailData){
|
|
if($detailData){
|
|
foreach ($detailData as $val){
|
|
foreach ($detailData as $val){
|
|
- var_dump("进没进foreach:",$newUrlStr,$val);
|
|
|
|
|
|
+// var_dump("进没进foreach:",$newUrlStr,$val);
|
|
$data['fromurl'] = $newUrlStr;
|
|
$data['fromurl'] = $newUrlStr;
|
|
$data['title'] = $val['title'];
|
|
$data['title'] = $val['title'];
|
|
$data['content'] = $val['content'];
|
|
$data['content'] = $val['content'];
|
|
@@ -243,7 +260,7 @@ class CollectorService implements CollectorServiceInterface
|
|
$data['admin_user_id'] = $data['admin_user_id']??'';
|
|
$data['admin_user_id'] = $data['admin_user_id']??'';
|
|
$data['rule_id'] = $data['rule_id']??'';
|
|
$data['rule_id'] = $data['rule_id']??'';
|
|
// $data['copyfrom'] = $data['copyfrom'];
|
|
// $data['copyfrom'] = $data['copyfrom'];
|
|
- var_dump("要插入的数据:",$data);
|
|
|
|
|
|
+// var_dump("要插入的数据:",$data);
|
|
$this->insertArticleData($data);
|
|
$this->insertArticleData($data);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
@@ -264,7 +281,7 @@ class CollectorService implements CollectorServiceInterface
|
|
Db::beginTransaction();
|
|
Db::beginTransaction();
|
|
try{
|
|
try{
|
|
$articleInfo = Article::where(['title'=>$data['title']])->first();
|
|
$articleInfo = Article::where(['title'=>$data['title']])->first();
|
|
- var_dump("获取详情:",$articleInfo,$data);
|
|
|
|
|
|
+// var_dump("获取详情:",$articleInfo,$data);
|
|
if(empty($articleInfo)){
|
|
if(empty($articleInfo)){
|
|
$insertData = [];
|
|
$insertData = [];
|
|
$insertData['fromurl'] =$data['newUrlStr'];
|
|
$insertData['fromurl'] =$data['newUrlStr'];
|
|
@@ -282,7 +299,7 @@ class CollectorService implements CollectorServiceInterface
|
|
$insertDataDetail = [];
|
|
$insertDataDetail = [];
|
|
$insertDataDetail['article_id'] = $article_id;
|
|
$insertDataDetail['article_id'] = $article_id;
|
|
$insertDataDetail['content'] = $data['content'];
|
|
$insertDataDetail['content'] = $data['content'];
|
|
- var_dump("插入ArticleData:",$insertDataDetail);
|
|
|
|
|
|
+// var_dump("插入ArticleData:",$insertDataDetail);
|
|
ArticleData::insertGetId($insertDataDetail);
|
|
ArticleData::insertGetId($insertDataDetail);
|
|
// Coroutine::sleep(2);
|
|
// Coroutine::sleep(2);
|
|
// var_dump("插入成功一次:",$article_id,$insertDataDetail);
|
|
// var_dump("插入成功一次:",$article_id,$insertDataDetail);
|
|
@@ -311,7 +328,7 @@ class CollectorService implements CollectorServiceInterface
|
|
];
|
|
];
|
|
$result = Result::http_post($wecUrl,$parames,$options);
|
|
$result = Result::http_post($wecUrl,$parames,$options);
|
|
$result = json_decode($result['response'],true);
|
|
$result = json_decode($result['response'],true);
|
|
- var_dump("获取数据:",$result);
|
|
|
|
|
|
+// var_dump("获取数据:",$result);
|
|
if($result['data'] && $result['data']['results']){
|
|
if($result['data'] && $result['data']['results']){
|
|
$dataList = $result['data']['results'];
|
|
$dataList = $result['data']['results'];
|
|
// var_dump("取数据结构体:",$dataList);
|
|
// var_dump("取数据结构体:",$dataList);
|
|
@@ -328,7 +345,7 @@ class CollectorService implements CollectorServiceInterface
|
|
'content'=>$val['source']['content']['content']??'',
|
|
'content'=>$val['source']['content']['content']??'',
|
|
'admin_user_id'=>$other['admin_user_id']??'',
|
|
'admin_user_id'=>$other['admin_user_id']??'',
|
|
'rule_id'=>$other['rule_id']??'',
|
|
'rule_id'=>$other['rule_id']??'',
|
|
- 'author'=>'冯蕊'
|
|
|
|
|
|
+ 'author'=>$other['writer']??''
|
|
];
|
|
];
|
|
// var_dump("调用插入数据方法,组装数据:",$insertData);
|
|
// var_dump("调用插入数据方法,组装数据:",$insertData);
|
|
$this->insertArticleData($insertData);
|
|
$this->insertArticleData($insertData);
|
|
@@ -339,7 +356,7 @@ class CollectorService implements CollectorServiceInterface
|
|
$parames['current'] = $pages;
|
|
$parames['current'] = $pages;
|
|
$twoResult = Result::http_post($wecUrl,$parames,$options);
|
|
$twoResult = Result::http_post($wecUrl,$parames,$options);
|
|
if($result['data'] && $result['data']['results'] && count($result['data']['results'])>0){
|
|
if($result['data'] && $result['data']['results'] && count($result['data']['results'])>0){
|
|
- var_dump("分页测试:",$parames,$parames['current']);
|
|
|
|
|
|
+// var_dump("分页测试:",$parames,$parames['current']);
|
|
$this->foreachCurl($wecUrl,$parames,$other,$pages);
|
|
$this->foreachCurl($wecUrl,$parames,$other,$pages);
|
|
}
|
|
}
|
|
// var_dump("正确的数据:",$result);
|
|
// var_dump("正确的数据:",$result);
|