You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
80 lines
3.0 KiB
80 lines
3.0 KiB
<?php
|
|
/**
|
|
* 汇总用户行为数据日增量
|
|
* @package crontab
|
|
*/
|
|
|
|
include_once(dirname(dirname(dirname(__FILE__))) . "/library/publicBase.php");
|
|
|
|
define('DATE', $argv[1]);
|
|
|
|
class spiderBehaviorDelta {
|
|
public function __construct() {
|
|
//删除半年前的数据
|
|
$obj = new mWeiboBehavior();
|
|
$six_month_ago = date('Y-m-d', strtotime('-1 month'));
|
|
$obj->deleteExpireBehaviorDelta($six_month_ago);
|
|
|
|
//统计今日增量
|
|
$today = date('Y-m-d',strtotime(DATE));
|
|
|
|
$page_num = 1;
|
|
$page_size = 2000;
|
|
while (true) {
|
|
$today_data = $obj->getSpiderBehavior($today, 0, 0, $page_num, $page_size);
|
|
if (empty($today_data)) break;
|
|
$page_num++;
|
|
|
|
$today_data = array_column($today_data, null, 'wid');
|
|
|
|
$wids = array_column($today_data, 'wid');
|
|
|
|
$yesterday = date('Y-m-d', strtotime('-1 day', strtotime($today)));
|
|
$yesterday_data = $obj->getBehaviorByWids($wids, $yesterday);
|
|
$yesterday_data = array_column($yesterday_data, null, 'wid');
|
|
|
|
$three_day_ago = date('Y-m-d', strtotime('-2 day', strtotime($today)));
|
|
$three_day_ago_data = $obj->getBehaviorByWids($wids, $three_day_ago);
|
|
$three_day_ago_data = array_column($three_day_ago_data, null, 'wid');
|
|
|
|
//将前天的数据查询出来做兼容
|
|
if (empty($yesterday_data) && empty($three_day_ago_data)) return true;
|
|
|
|
foreach ($today_data as $k => $v) {
|
|
$reposts_delta = 0;
|
|
$comments_delta = 0;
|
|
$attitudes_delta = 0;
|
|
|
|
if (isset($yesterday_data[$k])) {
|
|
$reposts_delta = $v['reposts_count'] - $yesterday_data[$k]['reposts_count'];
|
|
$comments_delta = $v['comments_count'] - $yesterday_data[$k]['comments_count'];
|
|
$attitudes_delta = $v['attitudes_count'] - $yesterday_data[$k]['attitudes_count'];
|
|
}
|
|
|
|
if (isset($three_day_ago_data[$k]) && !isset($yesterday_data[$k])) {
|
|
$reposts_delta = $v['reposts_count'] - $three_day_ago_data[$k]['reposts_count'];
|
|
$comments_delta = $v['comments_count'] - $three_day_ago_data[$k]['comments_count'];
|
|
$attitudes_delta = $v['attitudes_count'] - $three_day_ago_data[$k]['attitudes_count'];
|
|
}
|
|
|
|
$delta = array(
|
|
'uid' => $v['uid'],
|
|
'wid' => $v['wid'],
|
|
'reposts_delta' => $reposts_delta,
|
|
'comments_delta' => $comments_delta,
|
|
'attitudes_delta' => $attitudes_delta,
|
|
'date' => $v['date'],
|
|
);
|
|
|
|
$res = $obj->saveBehaviorDelta($delta);
|
|
if (!$res) {
|
|
$this->writeLog('spider', 'insert_error.log', json_encode($delta));
|
|
}
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
}
|
|
|
|
new spiderBehaviorDelta();
|
|
|