Browse Source

邮件依赖

pull/39/head
pengda 4 days ago
parent
commit
02ebdc01fe
  1. 48
      config/define.php
  2. 22
      control/weibo.php
  3. 36
      data/dWeiboBehavior.php
  4. 20
      model/mBase.php
  5. 301
      model/mSpider.php
  6. 12
      model/mWeibo.php
  7. 95
      model/mWeiboBehavior.php
  8. 2
      queue/config/daemonconf.php
  9. 51
      queue/crontab/spider_behavior_data.php
  10. 57
      queue/crontab/spider_behavior_delta.php

48
config/define.php

@ -49,4 +49,52 @@
define('WEIBO_IMG_URL', 'https://wx4.sinaimg.cn/mw690/%s.jpg');
define('WEIBO_LOGIN_COOKIE', '_2A25FItemDeRhGeFH41MV8ibNyjSIHXVmXlVurDV8PUNbmtAYLWP9kW9NekLV22FUUuTM0k2WTJS1xyONMvh_V90k');
define('WEIBO_USER_ZHANG',2282201403);
define('WEIBO_USER_XU',1285478591);
define('WEIBO_USER_CHENG',5031299688);
define('WEIBO_USER_LUO',1547596314);
$GLOBALS['WEIBO_USER_LIST'] = array(
WEIBO_USER_ZHANG => '针灸匠张宝旬',
// WEIBO_USER_XU => '徐文兵',
// WEIBO_USER_CHENG => '在下程程',
// WEIBO_USER_LUO => '罗大伦',
);
define('_RC_SPIDER_IP', 'rc_spider_ip');
define('SPIDER_REQUEST_MAX_NUMS', 5);
define('SPIDER_ERROR_IP_GET_EMPTY', 101);
define('SPIDER_ERROR_IP_GET_OUT', 102);
define('SPIDER_ERROR_IP_GET_FAIL', 103);
define('SPIDER_ERROR_IP_EXPIERD', 104);
define('SPIDER_ERROR_IP_ASTABLE', 105);
define('SPIDER_ERROR_IP_UNKNOWN', 199);
define('SPIDER_ERROR_URL_GET_EMPTY', 201);
define('SPIDER_ERROR_URL_COOKIE_EXPIRED', 202);
define('SPIDER_ERROR_URL_GET_UNKNOWN', 299);
$GLOBALS['spider_error'] = array(
SPIDER_ERROR_IP_GET_EMPTY => '代理ip获取为空',
SPIDER_ERROR_IP_GET_OUT => '代理ip获取超额',
SPIDER_ERROR_IP_GET_FAIL => '代理ip获取失败',
SPIDER_ERROR_IP_EXPIERD => '代理ip失效',
SPIDER_ERROR_IP_ASTABLE => '代理ip不稳定',
SPIDER_ERROR_IP_UNKNOWN => '代理ip未知错误',
SPIDER_ERROR_URL_GET_EMPTY => '请求为空',
SPIDER_ERROR_URL_COOKIE_EXPIRED => 'cookie失效',
SPIDER_ERROR_URL_GET_UNKNOWN => '请求未知错误',
);
$GLOBALS['notice_mail_list'] = array(
'mouzhi@qq.com' => array(
'host' => 'smtp.qq.com',
'password' => 'gtjywrxamasibjch'
),
);
define('BEHAVIOR_TYPE_REPOSTS', 0);
define('BEHAVIOR_TYPE_COMMENTS', 1);
define('BEHAVIOR_TYPE_ATTITUDES', 2);

22
control/weibo.php

@ -85,5 +85,25 @@ class weibo extends publicBase {
$this->ajax_json(true, '保存成功');
}
public function ajax_upload_file() {}
public function ajax_hot_list() {
$type = $this->post('type') + 0;
$sdate = trim($this->post('sdate'));
$edate = trim($this->post('edate'));
$cur_page = $this->post('currentPage') ? $this->post('currentPage') : 1;
$page_size = $this->post('pageSize') ? $this->post('pageSize') : 20;
$obj = new mWeiboBehavior();
$list = $obj->getHotBehavior($type, $sdate, $edate, $cur_page, $page_size);
$total = $obj->getHotBehaviorTotal($sdate, $edate);
$rdata = array(
'total' => $total,
'per_page' => $page_size,
'last_page' => ceil($total / $page_size),
'cur_page' => $cur_page,
'list' => $list,
);
$this->ajax_json(true, '获取成功', $rdata);
}
}

36
data/dWeiboBehavior.php

@ -0,0 +1,36 @@
<?php
/**
*
*/
include_once SERVER_ROOT . '/data/dBase.php';
class dWeiboBehavior extends dBase {
protected $fieldlist = array(
'spider_behavior_data' => array(
'id',
'uid',
'wid',
'reposts_count',
'comments_count',
'attitudes_count',
'date',
'created_at',
),
'spider_behavior_delta' => array(
'id',
'uid',
'wid',
'reposts_delta',
'comments_delta',
'attitudes_delta',
'date',
'created_at',
),
);
protected $primary_keys = array(
'spider_behavior_data' => 'id',
'spider_behavior_delta' => 'id',
);
}

20
model/mBase.php

@ -114,17 +114,19 @@ class mBase extends publicBase {
* @param string $title 邮件标题
* @param string $content 邮件内容
*/
public static function sendMail($emails, $title, $content) {
public static function sendMail($emails, $title, $content,$attr='') {
include_once SERVER_ROOT . '/library/mail/class.phpmailer.php';
$mail_name = array_rand($GLOBALS['notice_qqmail_list']);
$mail_name = array_rand($GLOBALS['notice_mail_list']);
$mail = new PHPMailer();
$mail->IsSMTP();
$mail->Host = 'smtp.163.com';
$mail->Port = 465;
$mail->Host = $GLOBALS['notice_mail_list'][$mail_name]['host'];
$mail->Port = 465; // TCP 端口
$mail->SMTPSecure = 'ssl'; // 启用 TLS 加密
//$mail->SMTPDebug = 2; // 是否开启调试
$mail->Username = $mail_name;
$mail->Password = $GLOBALS['notice_qqmail_list'][$mail_name];
$mail->Password = $GLOBALS['notice_mail_list'][$mail_name]['password'];
$mail->From = $mail_name;
$mail->CharSet = "utf-8";
$mail->IsHTML(true);
@ -132,9 +134,13 @@ class mBase extends publicBase {
$mail->ClearAddresses();
$mail->SetLanguage('en', SERVER_ROOT . '/library/mail/language/');
$mail->FromName = "快乐论文";
if($attr){
$mail->addAttachment($attr);
}
$mail->FromName = "知识库";
foreach($emails as $email) {
$mail->AddAddress($email);
$mail->AddAddress($email);
}
$mail->Subject = $title;
$mail->Body = $content;

301
model/mSpider.php

@ -7,6 +7,8 @@ include_once(SERVER_ROOT . "/model/mBase.php");
class mSpider extends mBase {
private $error_nums = 0;
public function __construct() {
$this->obj = new dWeibo();
$this->tbl = 'spider_weibo';
@ -37,12 +39,49 @@ class mSpider extends mBase {
return array('ip' => $ip, 'port' => $port);
}
public function getRequest($url, $headers = array(), $timeout = 60) {
if (empty($this->proxy_ip) || empty($this->proxy_port)) {
$this->setError('配置代理后再来请求吧');
public function getNewHttpIp($task_key, $times = 1) {
$url = "http://proxy.siyetian.com/apis_get.html?token=AesJWLNp2a65kaJdXTqFFeNpWT35ERNpnTn1STqFUeORUR31kaNh3TUl0dPRUQy4ERJdXT6lVN.QMxkTO0MjM0cTM&limit=1&type=1&time=&data_format=json&showTimeEnd=true";
$jsoninfo = $this->getCUrl($url);
$this->writeLog('spider', 'getNewHttpIp.log', $task_key . '|' . $jsoninfo);
//请求为空 重试第五次还是为空 则抛出异常
if (empty($jsoninfo)) {
if ($times >= SPIDER_REQUEST_MAX_NUMS) {
$this->setError(SPIDER_ERROR_IP_GET_EMPTY);
return false;
}
//请求为空 5秒后重试
sleep(5);
return $this->getNewHttpIp($task_key, $times + 1);
}
$data = json_decode($jsoninfo, true);
//套餐用完 需要换套餐了
if ($data['code'] == 10019 || $data['code'] == 10005) {
$this->setError(SPIDER_ERROR_IP_GET_OUT);
return false;
}
$ip = $data['data'][0]['ip'];
$port = $data['data'][0]['port'];
$end_time = $data['data'][0]['end_time'];
//其他异常错误
if (empty($ip) || empty($port)) {
$this->setError(SPIDER_ERROR_IP_GET_FAIL);
return false;
}
$expire_time = strtotime($end_time) - time();
$rdobj = $this->initRedis();
$rdobj->setex($task_key, $expire_time, json_encode(array('ip' => $ip, 'port' => $port, 'end_time' => $end_time)));
return array('ip' => $ip, 'port' => $port);
}
public function getRequest($url, $headers = array(), $timeout = 60) {
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
@ -206,4 +245,260 @@ class mSpider extends mBase {
return $this->obj->selectAll('spider_weibo', array('sql' => $where, 'vals' => array()), 'id desc ', array($offset, $page_size));
}
//原创微博 用户行为数据
public function spiderBehaviorOriginalData($uid, $page, $times = 1) {
$this->writeLog('spider', 'getData.log', "{$uid}|{$page}|{$times}");
$rdobj = $this->initRedis();
$ipinfo = $rdobj->get(_RC_SPIDER_IP);
$ipdata = json_decode($ipinfo, true);
if (!$ipinfo) {
$ipdata = $this->getNewHttpIp(_RC_SPIDER_IP);
}
//获取代理ip失败
if (empty($ipdata)) {
$this->setError($this->getError());
return false;
}
$this->proxy_ip = $ipdata['ip'];
$this->proxy_port = $ipdata['port'];
$cookie = WEIBO_LOGIN_COOKIE;
$url = "https://weibo.com/ajax/statuses/searchProfile?uid={$uid}&page={$page}&hasori=1";
$headers = array(
"Cookie: SUB={$cookie}",
"x-requested-with: XMLHttpRequest",
"User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36",
"Referer: https://weibo.com/u/{$uid}?tabtype=feed"
);
$res = $this->getRequest($url, $headers);
//请求错误
if (empty($res)) {
//请求为空
$error_code = SPIDER_ERROR_URL_GET_EMPTY;
//curl错误
if ($this->curl_err) {
$this->error_nums++;
$error_code = $this->getSpiderErrors();
}
//ip不稳定 请求不作数
if ($error_code == SPIDER_ERROR_IP_ASTABLE) $times--;
//代理失效
if ($error_code == SPIDER_ERROR_IP_EXPIERD) $rdobj->del(_RC_SPIDER_IP);
//连续五次请求错误 则抛出异常
if ($times >= SPIDER_REQUEST_MAX_NUMS) {
$this->setError($error_code);
return false;
}
sleep(2);
return $this->spiderBehaviorOriginalData($uid, $page, $times + 1);
}
$this->error_nums = 0;
if (strpos($res, '登录 - 微博')) {
$this->setError(SPIDER_ERROR_URL_COOKIE_EXPIRED);
return false;
}
$data = json_decode($res, 1);
if (!$data) {
$this->writeLog('spider', 'errorData.log', $res);
$this->setError(SPIDER_ERROR_URL_GET_UNKNOWN);
return false;
}
//获取完毕
if (empty($data['data']['list'])) return 'done';
$date = date('Y-m-d');
$bobj = new mWeiboBehavior();
$wobj = new mWeibo();
foreach ($data['data']['list'] as $da) {
//博主设置不可见
if (!isset($da['reposts_count'])) continue;
$temp = array(
'uid' => $uid,
'wid' => $da['id'],
'reposts_count' => $da['reposts_count'],
'comments_count' => $da['comments_count'],
'attitudes_count' => $da['attitudes_count'],
'date' => $date,
);
$res = $bobj->saveBehaviorData($temp);
if (!$res) {
$this->writeLog('spider', 'insert_error.log', json_encode($temp));
}
$created_at = new DateTime($da['created_at']); // 解析时间
$weibo = array(
'mblogid' => $da['mblogid'],
'text' => strip_tags($da['text']),
'refer' => '',
'pic_ids' => $da['pic_num'] > 0 ? json_encode($da['pic_ids']) : '',
'video_url' => isset($da['page_info']['media_info']['stream_url']) ? $da['page_info']['media_info']['stream_url'] : '',
'video_cover' => isset($da['page_info']['page_pic']) ? pathinfo(basename($da['page_info']['page_pic']), PATHINFO_FILENAME) : '',
'created_at' => $created_at->format('Y-m-d H:i:s')
);
$res = $wobj->saveWeiboData($temp, $weibo);
if (!$res) {
$this->writeLog('spider', 'insert_error.log', json_encode(array_merge($temp, $weibo)));
}
}
$this->writeLog('spider', 'getDone.log', "{$uid}|{$page}|{$times}");
return true;
}
//转发微博 用户行为数据
public function spiderBehaviorForwardData($uid, $page, $times = 1) {
$this->writeLog('spider', 'getData.log', "{$uid}|{$page}|{$times}");
$rdobj = $this->initRedis();
$ipinfo = $rdobj->get(_RC_SPIDER_IP);
$ipdata = json_decode($ipinfo, true);
if (!$ipinfo) {
$ipdata = $this->getNewHttpIp(_RC_SPIDER_IP);
}
//获取代理ip失败
if (empty($ipdata)) {
$this->setError($this->getError());
return false;
}
$this->proxy_ip = $ipdata['ip'];
$this->proxy_port = $ipdata['port'];
$cookie = WEIBO_LOGIN_COOKIE;
$endtime = strtotime("tomorrow");
$url = "https://weibo.com/ajax/statuses/searchProfile?uid={$uid}&page={$page}&endtime={$endtime}&hasret=1&hastext=1&haspic=1&hasvideo=1&hasmusic=1";
$headers = array(
"Cookie: SUB={$cookie}",
"x-requested-with: XMLHttpRequest",
"User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36",
"Referer: https://weibo.com/u/{$uid}?is_text=1&is_pic=1&is_video=1&is_music=1&is_forward=1&end_time={$endtime}"
);
$res = $this->getRequest($url, $headers);
//请求错误
if (empty($res)) {
//请求为空
$error_code = SPIDER_ERROR_URL_GET_EMPTY;
//curl错误
if ($this->curl_err) {
$this->error_nums++;
$error_code = $this->getSpiderErrors();
}
//ip不稳定 请求不作数
if ($error_code == SPIDER_ERROR_IP_ASTABLE) $times--;
//代理失效
if ($error_code == SPIDER_ERROR_IP_EXPIERD) $rdobj->del(_RC_SPIDER_IP);
//连续五次请求错误 则抛出异常
if ($times >= SPIDER_REQUEST_MAX_NUMS) {
$this->setError($error_code);
return false;
}
sleep(2);
return $this->spiderBehaviorForwardData($uid, $page, $times + 1);
}
$this->error_nums = 0;
if (strpos($res, '登录 - 微博')) {
$this->setError(SPIDER_ERROR_URL_COOKIE_EXPIRED);
return false;
}
$data = json_decode($res, 1);
if (!$data) {
$this->writeLog('spider', 'errorData.log', $res);
$this->setError(SPIDER_ERROR_URL_GET_UNKNOWN);
return false;
}
//获取完毕
if (empty($data['data']['list'])) return 'done';
$date = date('Y-m-d');
$bobj = new mWeiboBehavior();
$wobj = new mWeibo();
foreach ($data['data']['list'] as $item) {
if (!isset($item['user']['id']) || $item['user']['id'] != $uid) continue;
$da = $item['retweeted_status'];
//博主设置不可见
if (!isset($da['reposts_count'])) continue;
$temp = array(
'uid' => $uid,
'wid' => $da['id'],
'reposts_count' => $da['reposts_count'],
'comments_count' => $da['comments_count'],
'attitudes_count' => $da['attitudes_count'],
'date' => $date,
);
$res = $bobj->saveBehaviorData($temp);
if (!$res) {
$this->writeLog('spider', 'insert_error.log', json_encode($temp));
}
$created_at = new DateTime($da['created_at']); // 解析时间
$weibo = array(
'mblogid' => $da['mblogid'],
'text' => strip_tags($da['text']),
'refer' => '',
'pic_ids' => $da['pic_num'] > 0 ? json_encode($da['pic_ids']) : '',
'video_url' => isset($da['page_info']['media_info']['stream_url']) ? $da['page_info']['media_info']['stream_url'] : '',
'video_cover' => isset($da['page_info']['page_pic']) ? pathinfo(basename($da['page_info']['page_pic']), PATHINFO_FILENAME) : '',
'created_at' => $created_at->format('Y-m-d H:i:s')
);
$res = $wobj->saveWeiboData($temp, $weibo);
if (!$res) {
$this->writeLog('spider', 'insert_error.log', json_encode(array_merge($temp, $weibo)));
}
}
$this->writeLog('spider', 'getDone.log', "{$uid}|{$page}|{$times}");
return true;
}
private function getSpiderErrors() {
preg_match('/Unable to receive initial SOCKS5 response./', $this->curl_err, $match);
if ($match[0]) {
return SPIDER_ERROR_IP_EXPIERD;
}
preg_match('/SOCKS5 read timeout/', $this->curl_err, $match);
if ($match[0]) {
return SPIDER_ERROR_IP_EXPIERD;
}
$this->writeLog('spider', 'curl_error.log', $this->curl_err);
//ip不稳定
preg_match("/Can't complete SOCKS5 connection to 0.0.0.0:0/", $this->curl_err, $match);
if ($match[0]) {
if ($this->error_nums < 5) return SPIDER_ERROR_IP_ASTABLE;
if ($this->error_nums == 5) return SPIDER_ERROR_IP_EXPIERD;
}
//ip不稳定
preg_match("/Connection refused/", $this->curl_err, $match);
if ($match[0]) {
if ($this->error_nums < 5) return SPIDER_ERROR_IP_ASTABLE;
if ($this->error_nums == 5) return SPIDER_ERROR_IP_EXPIERD;
}
return SPIDER_ERROR_IP_UNKNOWN;
}
}

12
model/mWeibo.php

@ -109,4 +109,16 @@ class mWeibo extends mBase {
public function getVideoOssUrl($picid, $created_at) {
return ALIYUN_OSS_URI . date("Y-m", strtotime($created_at)) . '/' . $picid . '.mp4';
}
public function saveWeiboData($behavior, $data) {
$weibo = $this->obj->select($this->tbl, array('sql' => '`wid`=?', 'vals' => array($behavior['wid'])));
if ($weibo) {
return $this->updateWeibo($weibo['id'], $behavior);
}
return $this->addWeibo(array_merge($behavior, $data));
}
public function getWeiboByWids($wids) {
return $this->obj->selectIn($this->tbl, array('wid'=>$wids));
}
}

95
model/mWeiboBehavior.php

@ -0,0 +1,95 @@
<?php
/**
*
*/
include_once(SERVER_ROOT . "/model/mBase.php");
class mWeiboBehavior extends mBase {
private $tbl_data;
private $tbl_delta;
public function __construct() {
$this->obj = new dWeiboBehavior();
$this->tbl_data = 'spider_behavior_data';
$this->tbl_delta = 'spider_behavior_delta';
}
public function saveBehaviorData($data) {
return $this->obj->replace($this->tbl_data, $data);
}
public function getBehaviorByDate($date) {
return $this->obj->selectAll($this->tbl_data, array('sql' => "`date`=?", 'vals' => array($date)), 'id desc', array(0, 10000));
}
public function deleteExpireBehaviorData($date) {
return $this->obj->delete($this->tbl_data, array('sql' => "`date`<?", 'vals' => array($date)));
}
public function saveBehaviorDelta($data) {
return $this->obj->replace($this->tbl_delta, $data);
}
public function deleteExpireBehaviorDelta($date) {
return $this->obj->delete($this->tbl_delta, array('sql' => "`date`<?", 'vals' => array($date)));
}
public function getHotBehavior($type, $sdate, $edate, $page_num, $page_size) {
$offset = ($page_num - 1) * $page_size;
if (!$sdate) {
$this->setError('参数错误');
return false;
}
$sdate = date('Y-m-d', strtotime($sdate));
$where = " date = '{$sdate}' ";
if ($edate) {
$edate = date('Y-m-d', strtotime($edate));
$where = " date >= '{$sdate}' AND date <= '{$edate}' ";
}
$row = "";
if ($type == BEHAVIOR_TYPE_REPOSTS) $row = " SUM(reposts_delta) as num";
if ($type == BEHAVIOR_TYPE_COMMENTS) $row = " SUM(comments_delta) as num";
if ($type == BEHAVIOR_TYPE_ATTITUDES) $row = " SUM(attitudes_delta) as num";
$sql = "SELECT wid,{$row} FROM spider_behavior_delta WHERE {$where} GROUP BY wid ORDER BY num DESC LIMIT {$offset}, {$page_size}";
$res = $this->obj->execute($sql, true, true);
$mobj = new mWeibo();
$wids = array_column($res, 'wid');
$weibos = $mobj->getWeiboByWids($wids);
$weibo_list = array_column($weibos, null, 'wid');
$weibo_user = $GLOBALS['WEIBO_USER_LIST'];
foreach ($res as &$re){
$weibo = isset($weibo_list[$re['wid']]) ? $weibo_list[$re['wid']] : array();
$re['uname'] = empty($weibo)? $weibo_user[WEIBO_USER_ZHANG] : $weibo_user[$weibo['uid']];
$re['title'] = empty($weibo)? '无文字展示' : $weibo['text'];
}
return $res;
}
public function getHotBehaviorTotal($sdate, $edate) {
if (!$sdate) {
$this->setError('参数错误');
return false;
}
$sdate = date('Y-m-d', strtotime($sdate));
$where = " date = '{$sdate}' ";
if ($edate) {
$edate = date('Y-m-d', strtotime($edate));
$where = " date >= '{$sdate}' AND date <= '{$edate}' ";
}
$sql = "SELECT count(*) as total FROM spider_behavior_delta WHERE {$where}";
$res = $this->obj->execute($sql, false, true);
return $res['total'];
}
}

2
queue/config/daemonconf.php

@ -1,6 +1,6 @@
<?php
define('_RQ_SPIDER_USE_IP', 'rq_spider_use_ip');
define('_RC_SPIDER_USE_IP', 'rc_spider_use_ip');
define('_RQ_SPIDER_WEIBO_DATA', 'rq_spider_weibo_data');
define('SPIDER_WEIBO_DATA', 'spider_weibo_data');

51
queue/crontab/spider_behavior_data.php

@ -0,0 +1,51 @@
<?php
/**
* 抓取微博行为数据
* @package crontab
*/
include_once(dirname(dirname(dirname(__FILE__))) . "/library/publicBase.php");
class spiderBehaviorData {
public function __construct() {
//删除半年前的数据
$bobj = new mWeiboBehavior();
$six_month_ago = date('Y-m-d', strtotime('-6 month'));
$bobj->deleteExpireBehaviorData($six_month_ago);
$user_list = array_keys($GLOBALS['WEIBO_USER_LIST']);
$obj = new mSpider();
foreach ($user_list as $uid) {
//采集原创数据
for ($page = 1; $page <= 1000; $page++) {
$res = $obj->spiderBehaviorOriginalData($uid, $page);
//采集完毕
if($res === 'done') break;
if (!$res) {
$error_code = $obj->getError();
$obj->sendMail(array('1026652509@qq.com'), date('Y年m月d日', time()) . '-微博原创数据抓取', $GLOBALS['spider_error'][$error_code]);
break 2;
}
}
//采集转发数据
for ($page = 1; $page <= 1000; $page++) {
$res = $obj->spiderBehaviorForwardData($uid, $page);
//采集完毕
if($res === 'done') break;
if (!$res) {
$error_code = $obj->getError();
$obj->sendMail(array('1026652509@qq.com'), date('Y年m月d日', time()) . '-微博转发数据抓取', $GLOBALS['spider_error'][$error_code]);
break 2;
}
}
}
return true;
}
}
new spiderBehaviorData();

57
queue/crontab/spider_behavior_delta.php

@ -0,0 +1,57 @@
<?php
/**
* 汇总用户行为数据日增量
* @package crontab
*/
include_once(dirname(dirname(dirname(__FILE__))) . "/library/publicBase.php");
class spiderBehaviorDelta {
public function __construct() {
//删除半年前的数据
$obj = new mWeiboBehavior();
$six_month_ago = date('Y-m-d', strtotime('-6 month'));
$obj->deleteExpireBehaviorDelta($six_month_ago);
//统计今日增量
$today = date('Y-m-d');
$today_data = $obj->getBehaviorByDate($today);
$today_data = array_column($today_data, null, 'wid');
$yesterday = date('Y-m-d', strtotime('-1 day'));
$yesterday_data = $obj->getBehaviorByDate($yesterday);
$yesterday_data = array_column($yesterday_data, null, 'wid');
if (empty($yesterday_data)) return true;
foreach ($today_data as $k => $v) {
$yesterday_reposts_count = 0;
$yesterday_comments_count = 0;
$yesterday_attitudes_count = 0;
if (isset($yesterday_data[$k])) {
$yesterday_reposts_count = $yesterday_data[$k]['reposts_count'];
$yesterday_comments_count = $yesterday_data[$k]['comments_count'];
$yesterday_attitudes_count = $yesterday_data[$k]['attitudes_count'];
}
$delta = array(
'uid' => $v['uid'],
'wid' => $v['wid'],
'reposts_delta' => $v['reposts_count'] - $yesterday_reposts_count,
'comments_delta' => $v['comments_count'] - $yesterday_comments_count,
'attitudes_delta' => $v['attitudes_count'] - $yesterday_attitudes_count,
'date' => $v['date'],
);
$res = $obj->saveBehaviorDelta($delta);
if (!$res) {
$this->writeLog('spider', 'insert_error.log', json_encode($delta));
}
}
return true;
}
}
new spiderBehaviorDelta();
Loading…
Cancel
Save