You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

156 lines
6.3 KiB

<?php
include_once dirname(dirname(__FILE__)).'/base/dealBase.php';
define('PROC_CODE', $argv[2]);
class spiderWeiboComment extends dealBase {
private $task_key = _RQ_SPIDER_WEIBO_COMMENT;
public function setPara() {
$this->processnum = $GLOBALS['DAEMON_NUMLIMIT'][SPIDER_WEIBO_COMMENT];
$this->is_while = true;
}
public function deal() {
$obj = new mSpider();
$rdobj = $obj->initRedis();
$cookie_json = $rdobj->get(_RC_WEIBO_COMMENT_LOGIN_COOKIE2);
$baselog = PROC_CODE . "|";
if(empty($cookie_json)) {
if($rdobj->llen(_RQ_SPIDER_WEIBO_COMMENT) + 0 > 0) $obj->writeLog(ZHISHIKU_SPIDER_LOG, ZHISHIKU_SPIDER_COMMENT, 'cookie为空:'.$baselog);
exit;
}
$ipinfo = $rdobj->get(_RC_SPIDER_WEIBO_COMMENT_USE_IP);
$ipdata = json_decode($ipinfo, true);
if (!$ipinfo && PROC_CODE == 0 && ($rdobj->llen(_RQ_SPIDER_WEIBO_COMMENT) + 0 > 0 || !$this->is_while)) $ipdata = $obj->getZmhttpIp(_RC_SPIDER_WEIBO_COMMENT_USE_IP);
if (empty($ipdata)) {
sleep(2);
return false;
}
$obj->proxy_ip = $ipdata['ip'];
$obj->proxy_port = $ipdata['port'];
$rq_data = $rdobj->rpop(_RQ_SPIDER_WEIBO_COMMENT);
if(empty($rq_data)) exit();
$obj->writeLog(ZHISHIKU_SPIDER_LOG, ZHISHIKU_SPIDER_COMMENT, '抓取开始:'.$baselog.'|'.$rq_data);
$cookie_data = json_decode($cookie_json, true);
$rq_data = json_decode($rq_data, true);
if(!empty($rq_data['cookie'])) {
$cookie = $rq_data['cookie'];
if(!in_array($cookie, $cookie_data)) $cookie = '';
} else {
$cookie = $cookie_data[PROC_CODE];
}
if(empty($cookie)) {
$obj->writeLog(ZHISHIKU_SPIDER_LOG, ZHISHIKU_SPIDER_COMMENT, '未设置该进程cookie:'.$baselog.'|'.$cookie_json);
exit;
}
$rq_data['cookie'] = $cookie;
$weibo_id = $rq_data['weibo_id']+0;
$max_id = $rq_data['max_id']+0;
if($weibo_id+0 <= 0 || $max_id < 0) {
$obj->writeLog(ZHISHIKU_SPIDER_LOG, ZHISHIKU_SPIDER_COMMENT, 'queue:数据格式不正确:'.json_encode($rq_data));
exit;
}
$baselog = $weibo_id . "|" .$max_id . "|" . $ipdata['ip'] . "|" . $ipdata['port'] . "|" . PROC_CODE . "|";
$wobj = new mWeibo();
$weibo_info = $wobj->getWeiboById($weibo_id);
if(empty($weibo_info)) {
$obj->writeLog(ZHISHIKU_SPIDER_LOG, ZHISHIKU_SPIDER_COMMENT, '微博信息不存在:'.$baselog);
exit;
}
$obj->writeLog(ZHISHIKU_SPIDER_LOG, ZHISHIKU_SPIDER_COMMENT, '抓取就绪开始:'.$baselog.'|'.json_encode($rq_data));
$res = $obj->spiderComments($cookie, $weibo_info['uid'], $weibo_info['wid'], $weibo_info['mblogid'], $max_id);
$obj->writeLog(ZHISHIKU_SPIDER_LOG, ZHISHIKU_SPIDER_COMMENT, '抓取准备数据完成:'.$baselog.'|'.json_encode($rq_data));
if (!$res) {
$error = $obj->getError();
if($error == 'cookie失效') {
sleep(30);
$rdobj = $obj->initRedis();
$rds = $rdobj->rpush(_RQ_SPIDER_WEIBO_COMMENT, json_encode($rq_data));
$obj->writeLog(ZHISHIKU_SPIDER_LOG, ZHISHIKU_SPIDER_COMMENT, '队列评论抓取失败,重试中:'.$error.'|'.$baselog.'|redis:'.$rds);
$key = array_search($cookie, $cookie_data);
if ($key !== false) unset($cookie_data[$key]);
$rdobj->set(_RC_WEIBO_COMMENT_LOGIN_COOKIE2, json_encode($cookie_data));
if ($obj->is_change_ip && PROC_CODE == 0) $rdobj->del(_RC_SPIDER_WEIBO_COMMENT_USE_IP);
$obj->sendMail(array('1432334894@qq.com'), date('Y年m月d日', time()) . '-微博评论数据抓取', $error);
exit;
}
sleep(30);
$rdobj = $obj->initRedis();
$rds = $rdobj->rpush(_RQ_SPIDER_WEIBO_COMMENT, json_encode($rq_data));
$obj->writeLog(ZHISHIKU_SPIDER_LOG, ZHISHIKU_SPIDER_COMMENT, '队列评论抓取失败,重试中:'.$error.'|'.$baselog.'|redis:'.$rds);
if ($obj->is_change_ip && PROC_CODE == 0) $rdobj->del(_RC_SPIDER_WEIBO_COMMENT_USE_IP);
return false;
}
$cobj = new mWeiboComments();
$res = $cobj->addComment($weibo_id, $res);
if(!$res) {
$error = $cobj->getError().'|评论json数据保存到数据库失败~';
$obj->writeLog(ZHISHIKU_SPIDER_LOG, ZHISHIKU_SPIDER_COMMENT, $baselog.'|'.$error);
$wobj->updateWeibo($weibo_id, array('spider_comment_status' => SPIDER_COMMENT_STATUS_ERROR));
return false;
}else{
$obj->writeLog(ZHISHIKU_SPIDER_LOG, ZHISHIKU_SPIDER_COMMENT, '微博评论分页抓取成功:'.json_encode($res).'|'.$baselog);
$rdobj = $obj->initRedis();
if($res['count'] == 0 && $res['max_id'] > 0) {
$obj->writeLog(ZHISHIKU_SPIDER_LOG, ZHISHIKU_SPIDER_COMMENT, '抓取所有评论失败:'.$baselog.'|'.$res['max_id']);
$ures = $wobj->updateWeibo($weibo_id, array('spider_comment_status' => SPIDER_COMMENT_STATUS_ERROR));
if($ures) $rdobj->srem(_RS_SPIDER_WEIBO_COMMENT, $weibo_id);
sleep(10);
return false;
}
}
if(!$res['is_load_all']) {
sleep(10);
$rdobj = $obj->initRedis();
$rds = $rdobj->rpush(_RQ_SPIDER_WEIBO_COMMENT, json_encode($res));
$obj->writeLog(ZHISHIKU_SPIDER_LOG, ZHISHIKU_SPIDER_COMMENT, '抓取下一页:'.$baselog.'|result:'.json_encode($res).'|redis:'.$rds);
return true;
}
$res = $wobj->updateWeibo($weibo_id, array('spider_comment_status' => SPIDER_COMMENT_STATUS_SUCCESS));
if(!$res) {
$error = $wobj->getError();
$obj->writeLog(ZHISHIKU_SPIDER_LOG, ZHISHIKU_SPIDER_COMMENT, '更新微博状态失败:'.$error.'|'.$baselog);
sleep(2);
return false;
}
$rdobj = $obj->initRedis();
$rds = $rdobj->srem(_RS_SPIDER_WEIBO_COMMENT, $weibo_id);
$obj->writeLog(ZHISHIKU_SPIDER_LOG, ZHISHIKU_SPIDER_COMMENT, '抓取成功:'.$baselog.'|redis:'.$rds);
sleep(4);
return true;
}
}
new spiderWeiboComment();