From 02ebdc01fe919ca3aade50214bbe49f4c0d77080 Mon Sep 17 00:00:00 2001 From: pengda <10266652509@qq.com> Date: Tue, 20 May 2025 14:52:51 +0800 Subject: [PATCH] =?UTF-8?q?=E9=82=AE=E4=BB=B6=E4=BE=9D=E8=B5=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- config/define.php | 48 +++++ control/weibo.php | 22 ++- data/dWeiboBehavior.php | 36 ++++ model/mBase.php | 20 ++- model/mSpider.php | 301 +++++++++++++++++++++++++++++++- model/mWeibo.php | 12 ++ model/mWeiboBehavior.php | 95 ++++++++++ queue/config/daemonconf.php | 2 +- queue/crontab/spider_behavior_data.php | 51 ++++++ queue/crontab/spider_behavior_delta.php | 57 ++++++ 10 files changed, 632 insertions(+), 12 deletions(-) create mode 100644 data/dWeiboBehavior.php create mode 100644 model/mWeiboBehavior.php create mode 100644 queue/crontab/spider_behavior_data.php create mode 100644 queue/crontab/spider_behavior_delta.php diff --git a/config/define.php b/config/define.php index 3bafc3b..46087ff 100644 --- a/config/define.php +++ b/config/define.php @@ -49,4 +49,52 @@ define('WEIBO_IMG_URL', 'https://wx4.sinaimg.cn/mw690/%s.jpg'); + define('WEIBO_LOGIN_COOKIE', '_2A25FItemDeRhGeFH41MV8ibNyjSIHXVmXlVurDV8PUNbmtAYLWP9kW9NekLV22FUUuTM0k2WTJS1xyONMvh_V90k'); + + define('WEIBO_USER_ZHANG',2282201403); + define('WEIBO_USER_XU',1285478591); + define('WEIBO_USER_CHENG',5031299688); + define('WEIBO_USER_LUO',1547596314); + $GLOBALS['WEIBO_USER_LIST'] = array( + WEIBO_USER_ZHANG => '针灸匠张宝旬', +// WEIBO_USER_XU => '徐文兵', +// WEIBO_USER_CHENG => '在下程程', +// WEIBO_USER_LUO => '罗大伦', + ); + + define('_RC_SPIDER_IP', 'rc_spider_ip'); + + define('SPIDER_REQUEST_MAX_NUMS', 5); + + define('SPIDER_ERROR_IP_GET_EMPTY', 101); + define('SPIDER_ERROR_IP_GET_OUT', 102); + define('SPIDER_ERROR_IP_GET_FAIL', 103); + define('SPIDER_ERROR_IP_EXPIERD', 104); + define('SPIDER_ERROR_IP_ASTABLE', 105); + define('SPIDER_ERROR_IP_UNKNOWN', 199); + define('SPIDER_ERROR_URL_GET_EMPTY', 201); + define('SPIDER_ERROR_URL_COOKIE_EXPIRED', 202); + define('SPIDER_ERROR_URL_GET_UNKNOWN', 299); + $GLOBALS['spider_error'] = array( + SPIDER_ERROR_IP_GET_EMPTY => '代理ip获取为空', + SPIDER_ERROR_IP_GET_OUT => '代理ip获取超额', + SPIDER_ERROR_IP_GET_FAIL => '代理ip获取失败', + SPIDER_ERROR_IP_EXPIERD => '代理ip失效', + SPIDER_ERROR_IP_ASTABLE => '代理ip不稳定', + SPIDER_ERROR_IP_UNKNOWN => '代理ip未知错误', + SPIDER_ERROR_URL_GET_EMPTY => '请求为空', + SPIDER_ERROR_URL_COOKIE_EXPIRED => 'cookie失效', + SPIDER_ERROR_URL_GET_UNKNOWN => '请求未知错误', + ); + + $GLOBALS['notice_mail_list'] = array( + 'mouzhi@qq.com' => array( + 'host' => 'smtp.qq.com', + 'password' => 'gtjywrxamasibjch' + ), + ); + + define('BEHAVIOR_TYPE_REPOSTS', 0); + define('BEHAVIOR_TYPE_COMMENTS', 1); + define('BEHAVIOR_TYPE_ATTITUDES', 2); diff --git a/control/weibo.php b/control/weibo.php index 8377d3e..376b851 100644 --- a/control/weibo.php +++ b/control/weibo.php @@ -85,5 +85,25 @@ class weibo extends publicBase { $this->ajax_json(true, '保存成功'); } - public function ajax_upload_file() {} + public function ajax_hot_list() { + $type = $this->post('type') + 0; + $sdate = trim($this->post('sdate')); + $edate = trim($this->post('edate')); + $cur_page = $this->post('currentPage') ? $this->post('currentPage') : 1; + $page_size = $this->post('pageSize') ? $this->post('pageSize') : 20; + + $obj = new mWeiboBehavior(); + $list = $obj->getHotBehavior($type, $sdate, $edate, $cur_page, $page_size); + $total = $obj->getHotBehaviorTotal($sdate, $edate); + + $rdata = array( + 'total' => $total, + 'per_page' => $page_size, + 'last_page' => ceil($total / $page_size), + 'cur_page' => $cur_page, + 'list' => $list, + ); + + $this->ajax_json(true, '获取成功', $rdata); + } } diff --git a/data/dWeiboBehavior.php b/data/dWeiboBehavior.php new file mode 100644 index 0000000..a2b7a52 --- /dev/null +++ b/data/dWeiboBehavior.php @@ -0,0 +1,36 @@ + array( + 'id', + 'uid', + 'wid', + 'reposts_count', + 'comments_count', + 'attitudes_count', + 'date', + 'created_at', + ), + 'spider_behavior_delta' => array( + 'id', + 'uid', + 'wid', + 'reposts_delta', + 'comments_delta', + 'attitudes_delta', + 'date', + 'created_at', + ), + ); + + protected $primary_keys = array( + 'spider_behavior_data' => 'id', + 'spider_behavior_delta' => 'id', + ); +} + diff --git a/model/mBase.php b/model/mBase.php index 97b9d84..306b0cf 100644 --- a/model/mBase.php +++ b/model/mBase.php @@ -114,17 +114,19 @@ class mBase extends publicBase { * @param string $title 邮件标题 * @param string $content 邮件内容 */ - public static function sendMail($emails, $title, $content) { + public static function sendMail($emails, $title, $content,$attr='') { include_once SERVER_ROOT . '/library/mail/class.phpmailer.php'; - $mail_name = array_rand($GLOBALS['notice_qqmail_list']); + $mail_name = array_rand($GLOBALS['notice_mail_list']); $mail = new PHPMailer(); $mail->IsSMTP(); - $mail->Host = 'smtp.163.com'; - $mail->Port = 465; + $mail->Host = $GLOBALS['notice_mail_list'][$mail_name]['host']; + $mail->Port = 465; // TCP 端口 + $mail->SMTPSecure = 'ssl'; // 启用 TLS 加密 + //$mail->SMTPDebug = 2; // 是否开启调试 $mail->Username = $mail_name; - $mail->Password = $GLOBALS['notice_qqmail_list'][$mail_name]; + $mail->Password = $GLOBALS['notice_mail_list'][$mail_name]['password']; $mail->From = $mail_name; $mail->CharSet = "utf-8"; $mail->IsHTML(true); @@ -132,9 +134,13 @@ class mBase extends publicBase { $mail->ClearAddresses(); $mail->SetLanguage('en', SERVER_ROOT . '/library/mail/language/'); - $mail->FromName = "快乐论文"; + if($attr){ + $mail->addAttachment($attr); + } + + $mail->FromName = "知识库"; foreach($emails as $email) { - $mail->AddAddress($email); + $mail->AddAddress($email); } $mail->Subject = $title; $mail->Body = $content; diff --git a/model/mSpider.php b/model/mSpider.php index 9e3af5d..d2fc5cc 100644 --- a/model/mSpider.php +++ b/model/mSpider.php @@ -7,6 +7,8 @@ include_once(SERVER_ROOT . "/model/mBase.php"); class mSpider extends mBase { + private $error_nums = 0; + public function __construct() { $this->obj = new dWeibo(); $this->tbl = 'spider_weibo'; @@ -37,12 +39,49 @@ class mSpider extends mBase { return array('ip' => $ip, 'port' => $port); } - public function getRequest($url, $headers = array(), $timeout = 60) { - if (empty($this->proxy_ip) || empty($this->proxy_port)) { - $this->setError('配置代理后再来请求吧'); + public function getNewHttpIp($task_key, $times = 1) { + $url = "http://proxy.siyetian.com/apis_get.html?token=AesJWLNp2a65kaJdXTqFFeNpWT35ERNpnTn1STqFUeORUR31kaNh3TUl0dPRUQy4ERJdXT6lVN.QMxkTO0MjM0cTM&limit=1&type=1&time=&data_format=json&showTimeEnd=true"; + $jsoninfo = $this->getCUrl($url); + + $this->writeLog('spider', 'getNewHttpIp.log', $task_key . '|' . $jsoninfo); + + //请求为空 重试第五次还是为空 则抛出异常 + if (empty($jsoninfo)) { + if ($times >= SPIDER_REQUEST_MAX_NUMS) { + $this->setError(SPIDER_ERROR_IP_GET_EMPTY); + return false; + } + + //请求为空 5秒后重试 + sleep(5); + return $this->getNewHttpIp($task_key, $times + 1); + } + + $data = json_decode($jsoninfo, true); + //套餐用完 需要换套餐了 + if ($data['code'] == 10019 || $data['code'] == 10005) { + $this->setError(SPIDER_ERROR_IP_GET_OUT); + return false; + } + + $ip = $data['data'][0]['ip']; + $port = $data['data'][0]['port']; + $end_time = $data['data'][0]['end_time']; + //其他异常错误 + if (empty($ip) || empty($port)) { + $this->setError(SPIDER_ERROR_IP_GET_FAIL); return false; } + $expire_time = strtotime($end_time) - time(); + + $rdobj = $this->initRedis(); + $rdobj->setex($task_key, $expire_time, json_encode(array('ip' => $ip, 'port' => $port, 'end_time' => $end_time))); + + return array('ip' => $ip, 'port' => $port); + } + + public function getRequest($url, $headers = array(), $timeout = 60) { $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $url); @@ -206,4 +245,260 @@ class mSpider extends mBase { return $this->obj->selectAll('spider_weibo', array('sql' => $where, 'vals' => array()), 'id desc ', array($offset, $page_size)); } + + //原创微博 用户行为数据 + public function spiderBehaviorOriginalData($uid, $page, $times = 1) { + $this->writeLog('spider', 'getData.log', "{$uid}|{$page}|{$times}"); + + $rdobj = $this->initRedis(); + + $ipinfo = $rdobj->get(_RC_SPIDER_IP); + $ipdata = json_decode($ipinfo, true); + if (!$ipinfo) { + $ipdata = $this->getNewHttpIp(_RC_SPIDER_IP); + } + + //获取代理ip失败 + if (empty($ipdata)) { + $this->setError($this->getError()); + return false; + } + + $this->proxy_ip = $ipdata['ip']; + $this->proxy_port = $ipdata['port']; + + $cookie = WEIBO_LOGIN_COOKIE; + $url = "https://weibo.com/ajax/statuses/searchProfile?uid={$uid}&page={$page}&hasori=1"; + $headers = array( + "Cookie: SUB={$cookie}", + "x-requested-with: XMLHttpRequest", + "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36", + "Referer: https://weibo.com/u/{$uid}?tabtype=feed" + ); + + $res = $this->getRequest($url, $headers); + //请求错误 + if (empty($res)) { + //请求为空 + $error_code = SPIDER_ERROR_URL_GET_EMPTY; + //curl错误 + if ($this->curl_err) { + $this->error_nums++; + $error_code = $this->getSpiderErrors(); + } + //ip不稳定 请求不作数 + if ($error_code == SPIDER_ERROR_IP_ASTABLE) $times--; + //代理失效 + if ($error_code == SPIDER_ERROR_IP_EXPIERD) $rdobj->del(_RC_SPIDER_IP); + + //连续五次请求错误 则抛出异常 + if ($times >= SPIDER_REQUEST_MAX_NUMS) { + $this->setError($error_code); + return false; + } + + sleep(2); + return $this->spiderBehaviorOriginalData($uid, $page, $times + 1); + } + $this->error_nums = 0; + + if (strpos($res, '登录 - 微博')) { + $this->setError(SPIDER_ERROR_URL_COOKIE_EXPIRED); + return false; + } + + $data = json_decode($res, 1); + if (!$data) { + $this->writeLog('spider', 'errorData.log', $res); + $this->setError(SPIDER_ERROR_URL_GET_UNKNOWN); + return false; + } + + //获取完毕 + if (empty($data['data']['list'])) return 'done'; + + $date = date('Y-m-d'); + $bobj = new mWeiboBehavior(); + $wobj = new mWeibo(); + foreach ($data['data']['list'] as $da) { + //博主设置不可见 + if (!isset($da['reposts_count'])) continue; + + $temp = array( + 'uid' => $uid, + 'wid' => $da['id'], + 'reposts_count' => $da['reposts_count'], + 'comments_count' => $da['comments_count'], + 'attitudes_count' => $da['attitudes_count'], + 'date' => $date, + ); + $res = $bobj->saveBehaviorData($temp); + if (!$res) { + $this->writeLog('spider', 'insert_error.log', json_encode($temp)); + } + + $created_at = new DateTime($da['created_at']); // 解析时间 + $weibo = array( + 'mblogid' => $da['mblogid'], + 'text' => strip_tags($da['text']), + 'refer' => '', + 'pic_ids' => $da['pic_num'] > 0 ? json_encode($da['pic_ids']) : '', + 'video_url' => isset($da['page_info']['media_info']['stream_url']) ? $da['page_info']['media_info']['stream_url'] : '', + 'video_cover' => isset($da['page_info']['page_pic']) ? pathinfo(basename($da['page_info']['page_pic']), PATHINFO_FILENAME) : '', + 'created_at' => $created_at->format('Y-m-d H:i:s') + ); + $res = $wobj->saveWeiboData($temp, $weibo); + if (!$res) { + $this->writeLog('spider', 'insert_error.log', json_encode(array_merge($temp, $weibo))); + } + } + + $this->writeLog('spider', 'getDone.log', "{$uid}|{$page}|{$times}"); + return true; + } + + //转发微博 用户行为数据 + public function spiderBehaviorForwardData($uid, $page, $times = 1) { + $this->writeLog('spider', 'getData.log', "{$uid}|{$page}|{$times}"); + + $rdobj = $this->initRedis(); + + $ipinfo = $rdobj->get(_RC_SPIDER_IP); + $ipdata = json_decode($ipinfo, true); + if (!$ipinfo) { + $ipdata = $this->getNewHttpIp(_RC_SPIDER_IP); + } + + //获取代理ip失败 + if (empty($ipdata)) { + $this->setError($this->getError()); + return false; + } + + $this->proxy_ip = $ipdata['ip']; + $this->proxy_port = $ipdata['port']; + + $cookie = WEIBO_LOGIN_COOKIE; + $endtime = strtotime("tomorrow"); + $url = "https://weibo.com/ajax/statuses/searchProfile?uid={$uid}&page={$page}&endtime={$endtime}&hasret=1&hastext=1&haspic=1&hasvideo=1&hasmusic=1"; + $headers = array( + "Cookie: SUB={$cookie}", + "x-requested-with: XMLHttpRequest", + "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36", + "Referer: https://weibo.com/u/{$uid}?is_text=1&is_pic=1&is_video=1&is_music=1&is_forward=1&end_time={$endtime}" + ); + $res = $this->getRequest($url, $headers); + + //请求错误 + if (empty($res)) { + //请求为空 + $error_code = SPIDER_ERROR_URL_GET_EMPTY; + //curl错误 + if ($this->curl_err) { + $this->error_nums++; + $error_code = $this->getSpiderErrors(); + } + //ip不稳定 请求不作数 + if ($error_code == SPIDER_ERROR_IP_ASTABLE) $times--; + //代理失效 + if ($error_code == SPIDER_ERROR_IP_EXPIERD) $rdobj->del(_RC_SPIDER_IP); + + //连续五次请求错误 则抛出异常 + if ($times >= SPIDER_REQUEST_MAX_NUMS) { + $this->setError($error_code); + return false; + } + + sleep(2); + return $this->spiderBehaviorForwardData($uid, $page, $times + 1); + } + $this->error_nums = 0; + + if (strpos($res, '登录 - 微博')) { + $this->setError(SPIDER_ERROR_URL_COOKIE_EXPIRED); + return false; + } + + $data = json_decode($res, 1); + if (!$data) { + $this->writeLog('spider', 'errorData.log', $res); + $this->setError(SPIDER_ERROR_URL_GET_UNKNOWN); + return false; + } + + //获取完毕 + if (empty($data['data']['list'])) return 'done'; + + $date = date('Y-m-d'); + $bobj = new mWeiboBehavior(); + $wobj = new mWeibo(); + foreach ($data['data']['list'] as $item) { + if (!isset($item['user']['id']) || $item['user']['id'] != $uid) continue; + + $da = $item['retweeted_status']; + //博主设置不可见 + if (!isset($da['reposts_count'])) continue; + + $temp = array( + 'uid' => $uid, + 'wid' => $da['id'], + 'reposts_count' => $da['reposts_count'], + 'comments_count' => $da['comments_count'], + 'attitudes_count' => $da['attitudes_count'], + 'date' => $date, + ); + $res = $bobj->saveBehaviorData($temp); + if (!$res) { + $this->writeLog('spider', 'insert_error.log', json_encode($temp)); + } + + $created_at = new DateTime($da['created_at']); // 解析时间 + $weibo = array( + 'mblogid' => $da['mblogid'], + 'text' => strip_tags($da['text']), + 'refer' => '', + 'pic_ids' => $da['pic_num'] > 0 ? json_encode($da['pic_ids']) : '', + 'video_url' => isset($da['page_info']['media_info']['stream_url']) ? $da['page_info']['media_info']['stream_url'] : '', + 'video_cover' => isset($da['page_info']['page_pic']) ? pathinfo(basename($da['page_info']['page_pic']), PATHINFO_FILENAME) : '', + 'created_at' => $created_at->format('Y-m-d H:i:s') + ); + $res = $wobj->saveWeiboData($temp, $weibo); + if (!$res) { + $this->writeLog('spider', 'insert_error.log', json_encode(array_merge($temp, $weibo))); + } + } + + $this->writeLog('spider', 'getDone.log', "{$uid}|{$page}|{$times}"); + return true; + } + + private function getSpiderErrors() { + preg_match('/Unable to receive initial SOCKS5 response./', $this->curl_err, $match); + if ($match[0]) { + return SPIDER_ERROR_IP_EXPIERD; + } + + preg_match('/SOCKS5 read timeout/', $this->curl_err, $match); + if ($match[0]) { + return SPIDER_ERROR_IP_EXPIERD; + } + + $this->writeLog('spider', 'curl_error.log', $this->curl_err); + + //ip不稳定 + preg_match("/Can't complete SOCKS5 connection to 0.0.0.0:0/", $this->curl_err, $match); + if ($match[0]) { + if ($this->error_nums < 5) return SPIDER_ERROR_IP_ASTABLE; + if ($this->error_nums == 5) return SPIDER_ERROR_IP_EXPIERD; + } + + //ip不稳定 + preg_match("/Connection refused/", $this->curl_err, $match); + if ($match[0]) { + if ($this->error_nums < 5) return SPIDER_ERROR_IP_ASTABLE; + if ($this->error_nums == 5) return SPIDER_ERROR_IP_EXPIERD; + } + + return SPIDER_ERROR_IP_UNKNOWN; + } } \ No newline at end of file diff --git a/model/mWeibo.php b/model/mWeibo.php index e8498f1..6c61c72 100644 --- a/model/mWeibo.php +++ b/model/mWeibo.php @@ -109,4 +109,16 @@ class mWeibo extends mBase { public function getVideoOssUrl($picid, $created_at) { return ALIYUN_OSS_URI . date("Y-m", strtotime($created_at)) . '/' . $picid . '.mp4'; } + + public function saveWeiboData($behavior, $data) { + $weibo = $this->obj->select($this->tbl, array('sql' => '`wid`=?', 'vals' => array($behavior['wid']))); + if ($weibo) { + return $this->updateWeibo($weibo['id'], $behavior); + } + return $this->addWeibo(array_merge($behavior, $data)); + } + + public function getWeiboByWids($wids) { + return $this->obj->selectIn($this->tbl, array('wid'=>$wids)); + } } \ No newline at end of file diff --git a/model/mWeiboBehavior.php b/model/mWeiboBehavior.php new file mode 100644 index 0000000..c47246a --- /dev/null +++ b/model/mWeiboBehavior.php @@ -0,0 +1,95 @@ +obj = new dWeiboBehavior(); + $this->tbl_data = 'spider_behavior_data'; + $this->tbl_delta = 'spider_behavior_delta'; + } + + public function saveBehaviorData($data) { + return $this->obj->replace($this->tbl_data, $data); + } + + public function getBehaviorByDate($date) { + return $this->obj->selectAll($this->tbl_data, array('sql' => "`date`=?", 'vals' => array($date)), 'id desc', array(0, 10000)); + } + + public function deleteExpireBehaviorData($date) { + return $this->obj->delete($this->tbl_data, array('sql' => "`date` array($date))); + } + + public function saveBehaviorDelta($data) { + return $this->obj->replace($this->tbl_delta, $data); + } + + public function deleteExpireBehaviorDelta($date) { + return $this->obj->delete($this->tbl_delta, array('sql' => "`date` array($date))); + } + + public function getHotBehavior($type, $sdate, $edate, $page_num, $page_size) { + $offset = ($page_num - 1) * $page_size; + + if (!$sdate) { + $this->setError('参数错误'); + return false; + } + + $sdate = date('Y-m-d', strtotime($sdate)); + $where = " date = '{$sdate}' "; + if ($edate) { + $edate = date('Y-m-d', strtotime($edate)); + $where = " date >= '{$sdate}' AND date <= '{$edate}' "; + } + + $row = ""; + if ($type == BEHAVIOR_TYPE_REPOSTS) $row = " SUM(reposts_delta) as num"; + if ($type == BEHAVIOR_TYPE_COMMENTS) $row = " SUM(comments_delta) as num"; + if ($type == BEHAVIOR_TYPE_ATTITUDES) $row = " SUM(attitudes_delta) as num"; + + $sql = "SELECT wid,{$row} FROM spider_behavior_delta WHERE {$where} GROUP BY wid ORDER BY num DESC LIMIT {$offset}, {$page_size}"; + $res = $this->obj->execute($sql, true, true); + + $mobj = new mWeibo(); + $wids = array_column($res, 'wid'); + + $weibos = $mobj->getWeiboByWids($wids); + $weibo_list = array_column($weibos, null, 'wid'); + + $weibo_user = $GLOBALS['WEIBO_USER_LIST']; + foreach ($res as &$re){ + $weibo = isset($weibo_list[$re['wid']]) ? $weibo_list[$re['wid']] : array(); + + $re['uname'] = empty($weibo)? $weibo_user[WEIBO_USER_ZHANG] : $weibo_user[$weibo['uid']]; + $re['title'] = empty($weibo)? '无文字展示' : $weibo['text']; + } + return $res; + } + + public function getHotBehaviorTotal($sdate, $edate) { + if (!$sdate) { + $this->setError('参数错误'); + return false; + } + + $sdate = date('Y-m-d', strtotime($sdate)); + $where = " date = '{$sdate}' "; + if ($edate) { + $edate = date('Y-m-d', strtotime($edate)); + $where = " date >= '{$sdate}' AND date <= '{$edate}' "; + } + + $sql = "SELECT count(*) as total FROM spider_behavior_delta WHERE {$where}"; + $res = $this->obj->execute($sql, false, true); + return $res['total']; + } +} \ No newline at end of file diff --git a/queue/config/daemonconf.php b/queue/config/daemonconf.php index d3c6191..f023c83 100644 --- a/queue/config/daemonconf.php +++ b/queue/config/daemonconf.php @@ -1,6 +1,6 @@ deleteExpireBehaviorData($six_month_ago); + + $user_list = array_keys($GLOBALS['WEIBO_USER_LIST']); + + $obj = new mSpider(); + foreach ($user_list as $uid) { + //采集原创数据 + for ($page = 1; $page <= 1000; $page++) { + $res = $obj->spiderBehaviorOriginalData($uid, $page); + //采集完毕 + if($res === 'done') break; + + if (!$res) { + $error_code = $obj->getError(); + $obj->sendMail(array('1026652509@qq.com'), date('Y年m月d日', time()) . '-微博原创数据抓取', $GLOBALS['spider_error'][$error_code]); + break 2; + } + } + + //采集转发数据 + for ($page = 1; $page <= 1000; $page++) { + $res = $obj->spiderBehaviorForwardData($uid, $page); + //采集完毕 + if($res === 'done') break; + + if (!$res) { + $error_code = $obj->getError(); + $obj->sendMail(array('1026652509@qq.com'), date('Y年m月d日', time()) . '-微博转发数据抓取', $GLOBALS['spider_error'][$error_code]); + break 2; + } + } + } + + return true; + } +} + +new spiderBehaviorData(); diff --git a/queue/crontab/spider_behavior_delta.php b/queue/crontab/spider_behavior_delta.php new file mode 100644 index 0000000..7d8b32a --- /dev/null +++ b/queue/crontab/spider_behavior_delta.php @@ -0,0 +1,57 @@ +deleteExpireBehaviorDelta($six_month_ago); + + //统计今日增量 + $today = date('Y-m-d'); + $today_data = $obj->getBehaviorByDate($today); + $today_data = array_column($today_data, null, 'wid'); + + $yesterday = date('Y-m-d', strtotime('-1 day')); + $yesterday_data = $obj->getBehaviorByDate($yesterday); + $yesterday_data = array_column($yesterday_data, null, 'wid'); + + if (empty($yesterday_data)) return true; + + foreach ($today_data as $k => $v) { + $yesterday_reposts_count = 0; + $yesterday_comments_count = 0; + $yesterday_attitudes_count = 0; + + if (isset($yesterday_data[$k])) { + $yesterday_reposts_count = $yesterday_data[$k]['reposts_count']; + $yesterday_comments_count = $yesterday_data[$k]['comments_count']; + $yesterday_attitudes_count = $yesterday_data[$k]['attitudes_count']; + } + + $delta = array( + 'uid' => $v['uid'], + 'wid' => $v['wid'], + 'reposts_delta' => $v['reposts_count'] - $yesterday_reposts_count, + 'comments_delta' => $v['comments_count'] - $yesterday_comments_count, + 'attitudes_delta' => $v['attitudes_count'] - $yesterday_attitudes_count, + 'date' => $v['date'], + ); + + $res = $obj->saveBehaviorDelta($delta); + if (!$res) { + $this->writeLog('spider', 'insert_error.log', json_encode($delta)); + } + } + + return true; + } +} + +new spiderBehaviorDelta();