You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

52 lines
1.6 KiB

4 days ago
<?php
/**
* 抓取微博行为数据
* @package crontab
*/
include_once(dirname(dirname(dirname(__FILE__))) . "/library/publicBase.php");
class spiderBehaviorData {
public function __construct() {
//删除半年前的数据
$bobj = new mWeiboBehavior();
$six_month_ago = date('Y-m-d', strtotime('-6 month'));
$bobj->deleteExpireBehaviorData($six_month_ago);
$user_list = array_keys($GLOBALS['WEIBO_USER_LIST']);
$obj = new mSpider();
foreach ($user_list as $uid) {
//采集原创数据
for ($page = 1; $page <= 1000; $page++) {
$res = $obj->spiderBehaviorOriginalData($uid, $page);
//采集完毕
if($res === 'done') break;
if (!$res) {
$error_code = $obj->getError();
$obj->sendMail(array('1026652509@qq.com'), date('Y年m月d日', time()) . '-微博原创数据抓取', $GLOBALS['spider_error'][$error_code]);
break 2;
}
}
//采集转发数据
for ($page = 1; $page <= 1000; $page++) {
$res = $obj->spiderBehaviorForwardData($uid, $page);
//采集完毕
if($res === 'done') break;
if (!$res) {
$error_code = $obj->getError();
$obj->sendMail(array('1026652509@qq.com'), date('Y年m月d日', time()) . '-微博转发数据抓取', $GLOBALS['spider_error'][$error_code]);
break 2;
}
}
}
return true;
}
}
new spiderBehaviorData();