Browse Source

修复用不同用户登录的cookie抓取同一条微博评论的不同分页时,被爬虫识别抓取失败问题

pull/86/head
longchao 2 days ago
parent
commit
08da801d0b
  1. 25
      queue/deal/spider_weibo_comment.php

25
queue/deal/spider_weibo_comment.php

@ -23,13 +23,6 @@ class spiderWeiboComment extends dealBase {
exit; exit;
} }
$cookie_data = json_decode($cookie_json, true);
$cookie = $cookie_data[PROC_CODE];
if(empty($cookie)) {
if($rdobj->llen(_RQ_SPIDER_WEIBO_COMMENT) + 0 > 0) $obj->writeLog(ZHISHIKU_SPIDER_LOG, ZHISHIKU_SPIDER_COMMENT, '未设置该进程cookie:'.$baselog.'|'.$cookie_json);
exit;
}
$ipinfo = $rdobj->get(_RC_SPIDER_WEIBO_COMMENT_USE_IP); $ipinfo = $rdobj->get(_RC_SPIDER_WEIBO_COMMENT_USE_IP);
$ipdata = json_decode($ipinfo, true); $ipdata = json_decode($ipinfo, true);
if (!$ipinfo && PROC_CODE == 0 && ($rdobj->llen(_RQ_SPIDER_WEIBO_COMMENT) + 0 > 0 || !$this->is_while)) $ipdata = $obj->getZmhttpIp(_RC_SPIDER_WEIBO_COMMENT_USE_IP); if (!$ipinfo && PROC_CODE == 0 && ($rdobj->llen(_RQ_SPIDER_WEIBO_COMMENT) + 0 > 0 || !$this->is_while)) $ipdata = $obj->getZmhttpIp(_RC_SPIDER_WEIBO_COMMENT_USE_IP);
@ -46,7 +39,21 @@ class spiderWeiboComment extends dealBase {
$obj->writeLog(ZHISHIKU_SPIDER_LOG, ZHISHIKU_SPIDER_COMMENT, '抓取开始:'.$baselog.'|'.$rq_data); $obj->writeLog(ZHISHIKU_SPIDER_LOG, ZHISHIKU_SPIDER_COMMENT, '抓取开始:'.$baselog.'|'.$rq_data);
$cookie_data = json_decode($cookie_json, true);
$rq_data = json_decode($rq_data, true); $rq_data = json_decode($rq_data, true);
if(!empty($rq_data['cookie'])) {
$cookie = $rq_data['cookie'];
if(!in_array($cookie, $cookie_data)) $cookie = '';
} else {
$cookie = $cookie_data[PROC_CODE];
}
if(empty($cookie)) {
$obj->writeLog(ZHISHIKU_SPIDER_LOG, ZHISHIKU_SPIDER_COMMENT, '未设置该进程cookie:'.$baselog.'|'.$cookie_json);
exit;
}
$rq_data['cookie'] = $cookie;
$weibo_id = $rq_data['weibo_id']+0; $weibo_id = $rq_data['weibo_id']+0;
$max_id = $rq_data['max_id']+0; $max_id = $rq_data['max_id']+0;
@ -80,7 +87,9 @@ class spiderWeiboComment extends dealBase {
$obj->writeLog(ZHISHIKU_SPIDER_LOG, ZHISHIKU_SPIDER_COMMENT, '队列评论抓取失败,重试中:'.$error.'|'.$baselog.'|redis:'.$rds); $obj->writeLog(ZHISHIKU_SPIDER_LOG, ZHISHIKU_SPIDER_COMMENT, '队列评论抓取失败,重试中:'.$error.'|'.$baselog.'|redis:'.$rds);
unset($cookie_data[PROC_CODE]); $key = array_search($cookie, $cookie_data);
if ($key !== false) unset($cookie_data[$key]);
$rdobj->set(_RC_WEIBO_COMMENT_LOGIN_COOKIE2, json_encode($cookie_data)); $rdobj->set(_RC_WEIBO_COMMENT_LOGIN_COOKIE2, json_encode($cookie_data));
if ($obj->is_change_ip && PROC_CODE == 0) $rdobj->del(_RC_SPIDER_WEIBO_COMMENT_USE_IP); if ($obj->is_change_ip && PROC_CODE == 0) $rdobj->del(_RC_SPIDER_WEIBO_COMMENT_USE_IP);

Loading…
Cancel
Save