|
@@ -324,8 +324,13 @@ class CollectorService implements CollectorServiceInterface
|
|
$data['end_pagenum']= $info['end_pagenum'];
|
|
$data['end_pagenum']= $info['end_pagenum'];
|
|
$data['rule_id']= $data['id'];
|
|
$data['rule_id']= $data['id'];
|
|
$data['admin_user_id']= $data['admin_user_id'];
|
|
$data['admin_user_id']= $data['admin_user_id'];
|
|
|
|
+ $data['start'] = $info['start']??'';
|
|
|
|
+ $data['title'] = $info['title']??'';
|
|
|
|
+ $data['content'] = $info['content']??'';
|
|
|
|
+ var_dump("++++++++++++++++++");
|
|
// $data['newUrlStr'] =
|
|
// $data['newUrlStr'] =
|
|
$urlList = $this->addUrlArr($data);
|
|
$urlList = $this->addUrlArr($data);
|
|
|
|
+ var_dump("采集列表:",$urlList);
|
|
if($urlList){
|
|
if($urlList){
|
|
foreach ($urlList as $val){
|
|
foreach ($urlList as $val){
|
|
// var_dump("单列表地址:",$val);
|
|
// var_dump("单列表地址:",$val);
|
|
@@ -372,9 +377,9 @@ class CollectorService implements CollectorServiceInterface
|
|
$i++;
|
|
$i++;
|
|
$url = $data['second_start'].$i.$data['second_end'];
|
|
$url = $data['second_start'].$i.$data['second_end'];
|
|
$respon1 = Result::pageExists($url);
|
|
$respon1 = Result::pageExists($url);
|
|
-// var_dump("采集地址:",$respon1,$url);
|
|
|
|
|
|
+ var_dump("采集地址:",$respon1,$url);
|
|
// Coroutine::sleep(2);
|
|
// Coroutine::sleep(2);
|
|
- if ($i==intval($data['end_pagenum'])-1) {
|
|
|
|
|
|
+ if ($i==intval($data['end_pagenum'])-1 || intval($data['end_pagenum'])-1==0) {
|
|
$exit = true;
|
|
$exit = true;
|
|
// Coroutine::exit(); // 退出循环
|
|
// Coroutine::exit(); // 退出循环
|
|
}else{
|
|
}else{
|
|
@@ -392,31 +397,39 @@ class CollectorService implements CollectorServiceInterface
|
|
*/
|
|
*/
|
|
public function ruleCollection($url,$data)
|
|
public function ruleCollection($url,$data)
|
|
{
|
|
{
|
|
-// var_dump("采集参数:",$data);
|
|
|
|
|
|
+ var_dump("采集参数:",$url,$data);
|
|
$list = QueryList::get($url);
|
|
$list = QueryList::get($url);
|
|
$dataList = $list->rules([
|
|
$dataList = $list->rules([
|
|
'title' => ['a', 'text'],
|
|
'title' => ['a', 'text'],
|
|
'link' => ['a', 'href'],
|
|
'link' => ['a', 'href'],
|
|
- ])->range('.list1 li')->query()->getData();
|
|
|
|
-// var_dump("采集的内容:",$dataList);
|
|
|
|
|
|
+ ])->range($data['start'])->query()->getData();
|
|
|
|
+ var_dump("采集的内容:",$dataList);
|
|
// var_dump("====",$dataList);die;
|
|
// var_dump("====",$dataList);die;
|
|
$firstUrlArr = explode("/", $url);
|
|
$firstUrlArr = explode("/", $url);
|
|
array_pop($firstUrlArr);
|
|
array_pop($firstUrlArr);
|
|
$firstUrlArr = implode('/',$firstUrlArr);
|
|
$firstUrlArr = implode('/',$firstUrlArr);
|
|
|
|
|
|
$dataList = $dataList->toArray();
|
|
$dataList = $dataList->toArray();
|
|
|
|
+// var_dump($dataList);die;
|
|
if($dataList){
|
|
if($dataList){
|
|
foreach ($dataList as $tiem){
|
|
foreach ($dataList as $tiem){
|
|
$newUrl = substr($tiem['link'], 1);
|
|
$newUrl = substr($tiem['link'], 1);
|
|
$newUrlStr = $firstUrlArr.$newUrl;
|
|
$newUrlStr = $firstUrlArr.$newUrl;
|
|
$detailContent = QueryList::get($newUrlStr);
|
|
$detailContent = QueryList::get($newUrlStr);
|
|
- $detailData = $detailContent->rules([
|
|
|
|
- 'title'=>['h1','text'],
|
|
|
|
- 'content'=>['.TRS_UEDITOR','html'],
|
|
|
|
- ])->range(".news-details")->query()->getData();
|
|
|
|
|
|
+ $rules = [];
|
|
|
|
+ if($data['title']){
|
|
|
|
+ $rules['title'] = [$data['title'],'text'];
|
|
|
|
+ }
|
|
|
|
+ if($data['content']){
|
|
|
|
+ $rules['content'] = [$data['content'],'html'];
|
|
|
|
+ }
|
|
|
|
+ //详情页范围
|
|
|
|
+ $detailRange = '.news-details';
|
|
|
|
+ var_dump("打印规则:",$rules);
|
|
|
|
+ $detailData = $detailContent->rules($rules)->range($detailRange)->query()->getData();
|
|
|
|
|
|
$detailData = $detailData->toArray();
|
|
$detailData = $detailData->toArray();
|
|
-// var_dump("内容详情:",$detailData,$newUrlStr);
|
|
|
|
|
|
+ var_dump("内容详情:",$detailData,$newUrlStr);
|
|
if($detailData){
|
|
if($detailData){
|
|
foreach ($detailData as $val){
|
|
foreach ($detailData as $val){
|
|
// var_dump("进没进foreach:",$newUrlStr,$val);
|
|
// var_dump("进没进foreach:",$newUrlStr,$val);
|
|
@@ -424,7 +437,6 @@ class CollectorService implements CollectorServiceInterface
|
|
$data['title'] = $val['title'];
|
|
$data['title'] = $val['title'];
|
|
$data['content'] = $val['content'];
|
|
$data['content'] = $val['content'];
|
|
$data['newUrlStr'] = $newUrlStr;
|
|
$data['newUrlStr'] = $newUrlStr;
|
|
- $data['source'] = '';
|
|
|
|
$data['introduce'] = $val['title']??'';
|
|
$data['introduce'] = $val['title']??'';
|
|
$data['keyword'] = $val['title']??'';
|
|
$data['keyword'] = $val['title']??'';
|
|
$data['copyfrom'] = $data['copyfrom'];
|
|
$data['copyfrom'] = $data['copyfrom'];
|