rkljw 4 ay önce
ebeveyn
işleme
1d473d6b3c

+ 23 - 11
app/JsonRpc/CollectorService.php

@@ -324,8 +324,13 @@ class CollectorService implements CollectorServiceInterface
                 $data['end_pagenum']= $info['end_pagenum'];
                 $data['end_pagenum']= $info['end_pagenum'];
                 $data['rule_id']= $data['id'];
                 $data['rule_id']= $data['id'];
                 $data['admin_user_id']= $data['admin_user_id'];
                 $data['admin_user_id']= $data['admin_user_id'];
+                $data['start'] = $info['start']??'';
+                $data['title'] = $info['title']??'';
+                $data['content'] = $info['content']??'';
+                var_dump("++++++++++++++++++");
 //                $data['newUrlStr'] =
 //                $data['newUrlStr'] =
                 $urlList = $this->addUrlArr($data);
                 $urlList = $this->addUrlArr($data);
+                var_dump("采集列表:",$urlList);
                 if($urlList){
                 if($urlList){
                     foreach ($urlList as $val){
                     foreach ($urlList as $val){
 //                        var_dump("单列表地址:",$val);
 //                        var_dump("单列表地址:",$val);
@@ -372,9 +377,9 @@ class CollectorService implements CollectorServiceInterface
                 $i++;
                 $i++;
                 $url = $data['second_start'].$i.$data['second_end'];
                 $url = $data['second_start'].$i.$data['second_end'];
                 $respon1 = Result::pageExists($url);
                 $respon1 = Result::pageExists($url);
-//                var_dump("采集地址:",$respon1,$url);
+                var_dump("采集地址:",$respon1,$url);
 //                Coroutine::sleep(2);
 //                Coroutine::sleep(2);
-                if ($i==intval($data['end_pagenum'])-1) {
+                if ($i==intval($data['end_pagenum'])-1 || intval($data['end_pagenum'])-1==0) {
                     $exit = true;
                     $exit = true;
 //                    Coroutine::exit(); // 退出循环
 //                    Coroutine::exit(); // 退出循环
                 }else{
                 }else{
@@ -392,31 +397,39 @@ class CollectorService implements CollectorServiceInterface
      */
      */
     public function ruleCollection($url,$data)
     public function ruleCollection($url,$data)
     {
     {
-//        var_dump("采集参数:",$data);
+        var_dump("采集参数:",$url,$data);
         $list = QueryList::get($url);
         $list = QueryList::get($url);
         $dataList = $list->rules([
         $dataList = $list->rules([
             'title' => ['a', 'text'],
             'title' => ['a', 'text'],
             'link'  => ['a', 'href'],
             'link'  => ['a', 'href'],
-        ])->range('.list1 li')->query()->getData();
-//        var_dump("采集的内容:",$dataList);
+        ])->range($data['start'])->query()->getData();
+        var_dump("采集的内容:",$dataList);
 //        var_dump("====",$dataList);die;
 //        var_dump("====",$dataList);die;
         $firstUrlArr =  explode("/", $url);
         $firstUrlArr =  explode("/", $url);
         array_pop($firstUrlArr);
         array_pop($firstUrlArr);
         $firstUrlArr = implode('/',$firstUrlArr);
         $firstUrlArr = implode('/',$firstUrlArr);
 
 
         $dataList = $dataList->toArray();
         $dataList = $dataList->toArray();
+//        var_dump($dataList);die;
         if($dataList){
         if($dataList){
             foreach ($dataList as $tiem){
             foreach ($dataList as $tiem){
                 $newUrl =  substr($tiem['link'], 1);
                 $newUrl =  substr($tiem['link'], 1);
                 $newUrlStr = $firstUrlArr.$newUrl;
                 $newUrlStr = $firstUrlArr.$newUrl;
                 $detailContent = QueryList::get($newUrlStr);
                 $detailContent = QueryList::get($newUrlStr);
-                $detailData = $detailContent->rules([
-                    'title'=>['h1','text'],
-                    'content'=>['.TRS_UEDITOR','html'],
-                ])->range(".news-details")->query()->getData();
+                $rules = [];
+                if($data['title']){
+                    $rules['title'] = [$data['title'],'text'];
+                }
+                if($data['content']){
+                    $rules['content'] = [$data['content'],'html'];
+                }
+                //详情页范围
+                $detailRange = '.news-details';
+                var_dump("打印规则:",$rules);
+                $detailData = $detailContent->rules($rules)->range($detailRange)->query()->getData();
 
 
                 $detailData = $detailData->toArray();
                 $detailData = $detailData->toArray();
-//                var_dump("内容详情:",$detailData,$newUrlStr);
+                var_dump("内容详情:",$detailData,$newUrlStr);
                 if($detailData){
                 if($detailData){
                     foreach ($detailData as $val){
                     foreach ($detailData as $val){
 //                        var_dump("进没进foreach:",$newUrlStr,$val);
 //                        var_dump("进没进foreach:",$newUrlStr,$val);
@@ -424,7 +437,6 @@ class CollectorService implements CollectorServiceInterface
                         $data['title'] = $val['title'];
                         $data['title'] = $val['title'];
                         $data['content'] = $val['content'];
                         $data['content'] = $val['content'];
                         $data['newUrlStr'] = $newUrlStr;
                         $data['newUrlStr'] = $newUrlStr;
-                        $data['source'] = '';
                         $data['introduce'] = $val['title']??'';
                         $data['introduce'] = $val['title']??'';
                         $data['keyword'] = $val['title']??'';
                         $data['keyword'] = $val['title']??'';
                         $data['copyfrom'] = $data['copyfrom'];
                         $data['copyfrom'] = $data['copyfrom'];

Dosya farkı çok büyük olduğundan ihmal edildi
+ 0 - 0
runtime/container/scan.cache


+ 1 - 1
runtime/hyperf.pid

@@ -1 +1 @@
-69072
+14350

+ 28 - 0
runtime/logs/hyperf.log

@@ -13076,3 +13076,31 @@
 [2024-12-04T07:06:17.586691+00:00] sql.INFO: [66.89] select `col_rule`.*, `col_web`.`name` as `web_name`, `col_web`.`url` as `web_url`, `col_web`.`type` as `web_type` from `col_rule` left join `col_web` on `col_rule`.`web_id` = `col_web`.`id` where (`col_rule`.`id` = '127') limit 1 [] []
 [2024-12-04T07:06:17.586691+00:00] sql.INFO: [66.89] select `col_rule`.*, `col_web`.`name` as `web_name`, `col_web`.`url` as `web_url`, `col_web`.`type` as `web_type` from `col_rule` left join `col_web` on `col_rule`.`web_id` = `col_web`.`id` where (`col_rule`.`id` = '127') limit 1 [] []
 [2024-12-04T07:06:17.854610+00:00] sql.INFO: [266.54] update `col_rule` set `status` = '1', `col_rule`.`updated_at` = '2024-12-04 07:06:17' where (`id` = '127') [] []
 [2024-12-04T07:06:17.854610+00:00] sql.INFO: [266.54] update `col_rule` set `status` = '1', `col_rule`.`updated_at` = '2024-12-04 07:06:17' where (`id` = '127') [] []
 [2024-12-04T07:06:18.207523+00:00] sql.INFO: [245.67] update `col_rule` set `status` = '2', `col_rule`.`updated_at` = '2024-12-04 07:06:17' where (`id` = '127') [] []
 [2024-12-04T07:06:18.207523+00:00] sql.INFO: [245.67] update `col_rule` set `status` = '2', `col_rule`.`updated_at` = '2024-12-04 07:06:17' where (`id` = '127') [] []
+[2024-12-05T08:14:57.772941+00:00] sql.INFO: [200.93] select `col_rule`.*, `col_web`.`name` as `web_name`, `col_web`.`url` as `web_url`, `col_web`.`type` as `web_type` from `col_rule` left join `col_web` on `col_rule`.`web_id` = `col_web`.`id` where (`col_rule`.`id` = '127') limit 1 [] []
+[2024-12-05T08:15:03.080816+00:00] sql.INFO: [101.08] update `col_rule` set `status` = '1', `col_rule`.`updated_at` = '2024-12-05 08:15:02' where (`id` = '127') [] []
+[2024-12-05T08:16:40.149125+00:00] sql.INFO: [737.24] select `col_rule`.*, `col_web`.`name` as `web_name`, `col_web`.`url` as `web_url`, `col_web`.`type` as `web_type` from `col_rule` left join `col_web` on `col_rule`.`web_id` = `col_web`.`id` where (`col_rule`.`id` = '131') limit 1 [] []
+[2024-12-05T08:16:40.350192+00:00] sql.INFO: [199.57] update `col_rule` set `status` = '1', `col_rule`.`updated_at` = '2024-12-05 08:16:40' where (`id` = '131') [] []
+[2024-12-05T08:18:36.913527+00:00] sql.INFO: [211.98] select `col_rule`.*, `col_web`.`name` as `web_name`, `col_web`.`url` as `web_url`, `col_web`.`type` as `web_type` from `col_rule` left join `col_web` on `col_rule`.`web_id` = `col_web`.`id` where (`col_rule`.`id` = '131') limit 1 [] []
+[2024-12-05T08:18:37.544634+00:00] sql.INFO: [111.64] update `col_rule` set `status` = '1', `col_rule`.`updated_at` = '2024-12-05 08:18:37' where (`id` = '131') [] []
+[2024-12-05T08:19:57.341101+00:00] sql.INFO: [356.63] select `col_rule`.*, `col_web`.`name` as `web_name`, `col_web`.`url` as `web_url`, `col_web`.`type` as `web_type` from `col_rule` left join `col_web` on `col_rule`.`web_id` = `col_web`.`id` where (`col_rule`.`id` = '131') limit 1 [] []
+[2024-12-05T08:20:00.419648+00:00] sql.INFO: [109.23] update `col_rule` set `status` = '1', `col_rule`.`updated_at` = '2024-12-05 08:20:00' where (`id` = '131') [] []
+[2024-12-05T08:21:04.971910+00:00] sql.INFO: [104.13] select `col_rule`.*, `col_web`.`name` as `web_name`, `col_web`.`url` as `web_url`, `col_web`.`type` as `web_type` from `col_rule` left join `col_web` on `col_rule`.`web_id` = `col_web`.`id` where (`col_rule`.`id` = '131') limit 1 [] []
+[2024-12-05T08:21:05.468009+00:00] sql.INFO: [61.32] update `col_rule` set `status` = '1', `col_rule`.`updated_at` = '2024-12-05 08:21:05' where (`id` = '131') [] []
+[2024-12-05T08:21:58.277938+00:00] sql.INFO: [100.02] select `col_rule`.*, `col_web`.`name` as `web_name`, `col_web`.`url` as `web_url`, `col_web`.`type` as `web_type` from `col_rule` left join `col_web` on `col_rule`.`web_id` = `col_web`.`id` where (`col_rule`.`id` = '131') limit 1 [] []
+[2024-12-05T08:21:58.744967+00:00] sql.INFO: [83.46] update `col_rule` set `status` = '1', `col_rule`.`updated_at` = '2024-12-05 08:21:58' where (`id` = '131') [] []
+[2024-12-05T08:28:39.679076+00:00] sql.INFO: [103.26] select `col_rule`.*, `col_web`.`name` as `web_name`, `col_web`.`url` as `web_url`, `col_web`.`type` as `web_type` from `col_rule` left join `col_web` on `col_rule`.`web_id` = `col_web`.`id` where (`col_rule`.`id` = '131') limit 1 [] []
+[2024-12-05T08:28:40.454400+00:00] sql.INFO: [427.62] update `col_rule` set `status` = '1', `col_rule`.`updated_at` = '2024-12-05 08:28:40' where (`id` = '131') [] []
+[2024-12-05T08:29:54.591895+00:00] sql.INFO: [126.54] select `col_rule`.*, `col_web`.`name` as `web_name`, `col_web`.`url` as `web_url`, `col_web`.`type` as `web_type` from `col_rule` left join `col_web` on `col_rule`.`web_id` = `col_web`.`id` where (`col_rule`.`id` = '131') limit 1 [] []
+[2024-12-05T08:29:55.690159+00:00] sql.INFO: [127.24] update `col_rule` set `status` = '1', `col_rule`.`updated_at` = '2024-12-05 08:29:55' where (`id` = '131') [] []
+[2024-12-05T08:30:42.336405+00:00] sql.INFO: [86.16] select `col_rule`.*, `col_web`.`name` as `web_name`, `col_web`.`url` as `web_url`, `col_web`.`type` as `web_type` from `col_rule` left join `col_web` on `col_rule`.`web_id` = `col_web`.`id` where (`col_rule`.`id` = '131') limit 1 [] []
+[2024-12-05T08:30:43.144645+00:00] sql.INFO: [117.86] update `col_rule` set `status` = '1', `col_rule`.`updated_at` = '2024-12-05 08:30:43' where (`id` = '131') [] []
+[2024-12-05T08:36:37.313059+00:00] sql.INFO: [507.48] select `col_rule`.*, `col_web`.`name` as `web_name`, `col_web`.`url` as `web_url`, `col_web`.`type` as `web_type` from `col_rule` left join `col_web` on `col_rule`.`web_id` = `col_web`.`id` where (`col_rule`.`id` = '131') limit 1 [] []
+[2024-12-05T08:36:37.745027+00:00] sql.INFO: [267.01] update `col_rule` set `status` = '1', `col_rule`.`updated_at` = '2024-12-05 08:36:37' where (`id` = '131') [] []
+[2024-12-05T08:38:13.978218+00:00] sql.INFO: [76.88] select `col_rule`.*, `col_web`.`name` as `web_name`, `col_web`.`url` as `web_url`, `col_web`.`type` as `web_type` from `col_rule` left join `col_web` on `col_rule`.`web_id` = `col_web`.`id` where (`col_rule`.`id` = '131') limit 1 [] []
+[2024-12-05T08:38:14.486348+00:00] sql.INFO: [171.69] update `col_rule` set `status` = '1', `col_rule`.`updated_at` = '2024-12-05 08:38:14' where (`id` = '131') [] []
+[2024-12-05T08:44:18.079554+00:00] sql.INFO: [71.16] select `col_rule`.*, `col_web`.`name` as `web_name`, `col_web`.`url` as `web_url`, `col_web`.`type` as `web_type` from `col_rule` left join `col_web` on `col_rule`.`web_id` = `col_web`.`id` where (`col_rule`.`id` = '131') limit 1 [] []
+[2024-12-05T08:44:18.668624+00:00] sql.INFO: [122.51] update `col_rule` set `status` = '1', `col_rule`.`updated_at` = '2024-12-05 08:44:18' where (`id` = '131') [] []
+[2024-12-05T08:44:20.533739+00:00] sql.INFO: [47.6] update `col_rule` set `status` = '2', `col_rule`.`updated_at` = '2024-12-05 08:44:20' where (`id` = '131') [] []
+[2024-12-05T08:52:11.464176+00:00] sql.INFO: [316.28] select `col_rule`.*, `col_web`.`name` as `web_name`, `col_web`.`url` as `web_url`, `col_web`.`type` as `web_type` from `col_rule` left join `col_web` on `col_rule`.`web_id` = `col_web`.`id` where (`col_rule`.`id` = '131') limit 1 [] []
+[2024-12-05T08:52:12.757551+00:00] sql.INFO: [155.8] update `col_rule` set `status` = '1', `col_rule`.`updated_at` = '2024-12-05 08:52:12' where (`id` = '131') [] []
+[2024-12-05T08:52:18.320962+00:00] sql.INFO: [63.79] update `col_rule` set `status` = '2', `col_rule`.`updated_at` = '2024-12-05 08:52:18' where (`id` = '131') [] []

Bu fark içinde çok fazla dosya değişikliği olduğu için bazı dosyalar gösterilmiyor