9 changed files with 246 additions and 23 deletions
@ -0,0 +1,38 @@ |
|||
<?php |
|||
/** |
|||
* 抓取微博评论任务 |
|||
* @package crontab |
|||
*/ |
|||
|
|||
include_once(dirname(dirname(dirname(__FILE__))) . "/library/publicBase.php"); |
|||
|
|||
class addSpiderWeiboCommentTask { |
|||
public function __construct() { |
|||
$page_size = 500; |
|||
|
|||
$condition['uid'] = WEIBO_USER_ZHANG; |
|||
$condition['spider_comment_status'] = SPIDER_COMMENT_STATUS_NO; |
|||
|
|||
$obj = new mWeibo(); |
|||
$rdobj = $obj->initRedis(); |
|||
if($rdobj->llen(_RQ_SPIDER_WEIBO_COMMENT)+0 > 50) return true; |
|||
|
|||
for($page_num=1;$page_num<=1;$page_num++) { |
|||
$weibo_data = $obj->getWeiboList($condition, $page_num, $page_size); |
|||
if(empty($weibo_data)) break; |
|||
|
|||
foreach($weibo_data as $data) { |
|||
$pdata['weibo_id'] = $data['id']; |
|||
$pdata['max_id'] = 0; |
|||
|
|||
if ($rdobj->sIsMember(_RS_SPIDER_WEIBO_COMMENT, $data['id'])) continue; |
|||
$rdobj->lpush(_RQ_SPIDER_WEIBO_COMMENT, json_encode($pdata)); |
|||
$rdobj->sAdd(_RS_SPIDER_WEIBO_COMMENT, $data['id']); |
|||
} |
|||
} |
|||
|
|||
return true; |
|||
} |
|||
} |
|||
|
|||
new addSpiderWeiboCommentTask(); |
@ -0,0 +1,103 @@ |
|||
<?php |
|||
include_once dirname(dirname(__FILE__)).'/base/dealBase.php'; |
|||
|
|||
define('PROC_CODE', $argv[2]); |
|||
|
|||
class spiderWeiboComment extends dealBase { |
|||
|
|||
private $task_key = _RQ_SPIDER_WEIBO_COMMENT; |
|||
|
|||
public function setPara() { |
|||
$this->processnum = $GLOBALS['DAEMON_NUMLIMIT'][SPIDER_WEIBO_COMMENT]; |
|||
$this->is_while = true; |
|||
} |
|||
|
|||
public function deal() { |
|||
$obj = new mSpider(); |
|||
$rdobj = $obj->initRedis(); |
|||
|
|||
$ipinfo = $rdobj->get(_RC_SPIDER_WEIBO_COMMENT_USE_IP); |
|||
$ipdata = json_decode($ipinfo, true); |
|||
if (!$ipinfo && PROC_CODE == 0 && ($rdobj->llen($this->task_key) + 0 > 0 || !$this->is_while)) $ipdata = $obj->getZmhttpIp(_RC_SPIDER_WEIBO_COMMENT_USE_IP); |
|||
if (empty($ipdata)) { |
|||
sleep(2); |
|||
return false; |
|||
} |
|||
|
|||
$obj->proxy_ip = $ipdata['ip']; |
|||
$obj->proxy_port = $ipdata['port']; |
|||
|
|||
$rq_data = $rdobj->rpop($this->task_key); |
|||
if(empty($rq_data)) exit(); |
|||
|
|||
$rq_data = json_decode($rq_data, true); |
|||
|
|||
$weibo_id = $rq_data['weibo_id']+0; |
|||
$max_id = $rq_data['max_id']+0; |
|||
if($weibo_id+0 <= 0 || $max_id < 0) { |
|||
$obj->writeLog(ZHISHIKU_SPIDER_LOG, ZHISHIKU_SPIDER_COMMENT, 'queue:数据格式不正确:'.json_decode( $rq_data)); |
|||
exit; |
|||
} |
|||
|
|||
$baselog = $weibo_id . "|" .$max_id . "|" . $ipdata['ip'] . "|" . $ipdata['port'] . "|" . PROC_CODE . "|"; |
|||
|
|||
$wobj = new mWeibo(); |
|||
$weibo_info = $wobj->getWeiboById($weibo_id); |
|||
if(empty($weibo_info)) { |
|||
$obj->writeLog(ZHISHIKU_SPIDER_LOG, ZHISHIKU_SPIDER_COMMENT, '微博信息不存在:'.$baselog); |
|||
exit; |
|||
} |
|||
|
|||
$cookie = $rdobj->get(_RC_WEIBO_LOGIN_COOKIE2); |
|||
if(empty($cookie)) $cookie = WEIBO_LOGIN_COOKIE2; |
|||
|
|||
$res = $obj->spiderComments($cookie, $weibo_info['uid'], $weibo_info['wid'], $weibo_info['mblogid'], $max_id); |
|||
if (!$res) { |
|||
$error = $obj->getError(); |
|||
$obj->writeLog(ZHISHIKU_SPIDER_LOG, ZHISHIKU_SPIDER_COMMENT, '队列评论抓取失败,重试中:'.$error.'|'.$baselog); |
|||
|
|||
if($error == 'cookie失效') { |
|||
$obj->sendMail(array('1432334894@qq.com'), date('Y年m月d日', time()) . '-微博评论数据抓取', $error); |
|||
$rdobj->del($this->task_key); |
|||
$rdobj->del(_RS_SPIDER_WEIBO_COMMENT); |
|||
exit; |
|||
} |
|||
|
|||
$rdobj->lpush($this->task_key, json_encode($rq_data)); |
|||
if ($obj->is_change_ip && PROC_CODE == 0) $rdobj->del(_RC_SPIDER_WEIBO_COMMENT_USE_IP); |
|||
sleep(30); |
|||
|
|||
return false; |
|||
} |
|||
|
|||
$cobj = new mWeiboComments(); |
|||
$res = $cobj->addComment($weibo_id, $res); |
|||
if(!$res) { |
|||
$error = $cobj->getError().'|评论json数据保存到数据库失败~'; |
|||
$obj->writeLog(ZHISHIKU_SPIDER_LOG, ZHISHIKU_SPIDER_COMMENT, $baselog.'|'.$error); |
|||
$wobj->updateWeibo($weibo_id, array('spider_comment_status' => SPIDER_COMMENT_STATUS_ERROR)); |
|||
return false; |
|||
} |
|||
|
|||
if(!$res['is_load_all']) { |
|||
$rdobj->lpush($this->task_key, json_encode($res)); |
|||
return true; |
|||
} |
|||
|
|||
$res = $wobj->updateWeibo($weibo_id, array('spider_comment_status' => SPIDER_COMMENT_STATUS_SUCCESS)); |
|||
if(!$res) { |
|||
$error = $wobj->getError(); |
|||
$obj->writeLog(ZHISHIKU_SPIDER_LOG, ZHISHIKU_SPIDER_COMMENT, '更新微博状态失败:'.$error.'|'.$baselog); |
|||
sleep(2); |
|||
return false; |
|||
} |
|||
|
|||
$obj->writeLog(ZHISHIKU_SPIDER_LOG, ZHISHIKU_SPIDER_COMMENT, '抓取成功:'.$baselog); |
|||
$rdobj->sRem(_RS_SPIDER_WEIBO_COMMENT, $weibo_id); |
|||
sleep(3); |
|||
|
|||
return true; |
|||
} |
|||
} |
|||
|
|||
new spiderWeiboComment(); |
File diff suppressed because one or more lines are too long
Loading…
Reference in new issue