Browse Source

数据增量逻辑修正

pull/57/head
pengda 4 days ago
parent
commit
9ff88ed5bd
  1. 6
      model/mWeiboBehavior.php
  2. 2
      queue/crontab/spider_behavior_data.php
  3. 81
      queue/crontab/spider_behavior_delta.php

6
model/mWeiboBehavior.php

@ -20,8 +20,8 @@ class mWeiboBehavior extends mBase {
return $this->obj->replace($this->tbl_data, $data);
}
public function getBehaviorByDate($date) {
return $this->obj->selectAll($this->tbl_data, array('sql' => "`date`=?", 'vals' => array($date)), 'id desc', array(0, 10000));
public function getBehaviorByWids($wids, $date) {
return $this->obj->selectIn($this->tbl_data, array('wid' => $wids), array('sql' => "`date`= ?", 'vals' => array($date)));
}
public function deleteExpireBehaviorData($date) {
@ -53,7 +53,7 @@ class mWeiboBehavior extends mBase {
$offset = ($page_num - 1) * $page_size;
$orderby = "created_at desc";
if(!$date) $orderby = "wid desc";
if (!$date) $orderby = "wid desc";
return $this->obj->selectAll($this->tbl_data, $where, $orderby, array($offset, $page_size));
}

2
queue/crontab/spider_behavior_data.php

@ -10,7 +10,7 @@ class spiderBehaviorData {
public function __construct() {
//删除半年前的数据
$bobj = new mWeiboBehavior();
$six_month_ago = date('Y-m-d', strtotime('-6 month'));
$six_month_ago = date('Y-m-d', strtotime('-1 month'));
$bobj->deleteExpireBehaviorData($six_month_ago);
$user_list = array_keys($GLOBALS['WEIBO_USER_LIST']);

81
queue/crontab/spider_behavior_delta.php

@ -6,47 +6,70 @@
include_once(dirname(dirname(dirname(__FILE__))) . "/library/publicBase.php");
define('DATE', $argv[1]);
class spiderBehaviorDelta {
public function __construct() {
//删除半年前的数据
$obj = new mWeiboBehavior();
$six_month_ago = date('Y-m-d', strtotime('-6 month'));
$six_month_ago = date('Y-m-d', strtotime('-1 month'));
$obj->deleteExpireBehaviorDelta($six_month_ago);
//统计今日增量
$today = date('Y-m-d');
$today_data = $obj->getBehaviorByDate($today);
$today_data = array_column($today_data, null, 'wid');
$today = date('Y-m-d',strtotime(DATE));
$yesterday = date('Y-m-d', strtotime('-1 day'));
$yesterday_data = $obj->getBehaviorByDate($yesterday);
$yesterday_data = array_column($yesterday_data, null, 'wid');
$page_num = 1;
$page_size = 2000;
while (true) {
$today_data = $obj->getSpiderBehavior($today, 0, 0, $page_num, $page_size);
if (empty($today_data)) break;
$page_num++;
if (empty($yesterday_data)) return true;
$today_data = array_column($today_data, null, 'wid');
foreach ($today_data as $k => $v) {
$yesterday_reposts_count = 0;
$yesterday_comments_count = 0;
$yesterday_attitudes_count = 0;
$wids = array_column($today_data, 'wid');
if (isset($yesterday_data[$k])) {
$yesterday_reposts_count = $yesterday_data[$k]['reposts_count'];
$yesterday_comments_count = $yesterday_data[$k]['comments_count'];
$yesterday_attitudes_count = $yesterday_data[$k]['attitudes_count'];
}
$yesterday = date('Y-m-d', strtotime('-1 day', strtotime($today)));
$yesterday_data = $obj->getBehaviorByWids($wids, $yesterday);
$yesterday_data = array_column($yesterday_data, null, 'wid');
$three_day_ago = date('Y-m-d', strtotime('-2 day', strtotime($today)));
$three_day_ago_data = $obj->getBehaviorByWids($wids, $three_day_ago);
$three_day_ago_data = array_column($three_day_ago_data, null, 'wid');
//将前天的数据查询出来做兼容
if (empty($yesterday_data) && empty($three_day_ago_data)) return true;
foreach ($today_data as $k => $v) {
$reposts_delta = 0;
$comments_delta = 0;
$attitudes_delta = 0;
if (isset($yesterday_data[$k])) {
$reposts_delta = $v['reposts_count'] - $yesterday_data[$k]['reposts_count'];
$comments_delta = $v['comments_count'] - $yesterday_data[$k]['comments_count'];
$attitudes_delta = $v['attitudes_count'] - $yesterday_data[$k]['attitudes_count'];
}
if (isset($three_day_ago_data[$k]) && !isset($yesterday_data[$k])) {
$reposts_delta = $v['reposts_count'] - $three_day_ago_data[$k]['reposts_count'];
$comments_delta = $v['comments_count'] - $three_day_ago_data[$k]['comments_count'];
$attitudes_delta = $v['attitudes_count'] - $three_day_ago_data[$k]['attitudes_count'];
}
$delta = array(
'uid' => $v['uid'],
'wid' => $v['wid'],
'reposts_delta' => $reposts_delta,
'comments_delta' => $comments_delta,
'attitudes_delta' => $attitudes_delta,
'date' => $v['date'],
);
$delta = array(
'uid' => $v['uid'],
'wid' => $v['wid'],
'reposts_delta' => $v['reposts_count'] - $yesterday_reposts_count,
'comments_delta' => $v['comments_count'] - $yesterday_comments_count,
'attitudes_delta' => $v['attitudes_count'] - $yesterday_attitudes_count,
'date' => $v['date'],
);
$res = $obj->saveBehaviorDelta($delta);
if (!$res) {
$this->writeLog('spider', 'insert_error.log', json_encode($delta));
$res = $obj->saveBehaviorDelta($delta);
if (!$res) {
$this->writeLog('spider', 'insert_error.log', json_encode($delta));
}
}
}

Loading…
Cancel
Save