Browse Source

数据增量逻辑修正

pull/57/head
pengda 6 days ago
parent
commit
9ff88ed5bd
  1. 6
      model/mWeiboBehavior.php
  2. 2
      queue/crontab/spider_behavior_data.php
  3. 81
      queue/crontab/spider_behavior_delta.php

6
model/mWeiboBehavior.php

@ -20,8 +20,8 @@ class mWeiboBehavior extends mBase {
return $this->obj->replace($this->tbl_data, $data); return $this->obj->replace($this->tbl_data, $data);
} }
public function getBehaviorByDate($date) { public function getBehaviorByWids($wids, $date) {
return $this->obj->selectAll($this->tbl_data, array('sql' => "`date`=?", 'vals' => array($date)), 'id desc', array(0, 10000)); return $this->obj->selectIn($this->tbl_data, array('wid' => $wids), array('sql' => "`date`= ?", 'vals' => array($date)));
} }
public function deleteExpireBehaviorData($date) { public function deleteExpireBehaviorData($date) {
@ -53,7 +53,7 @@ class mWeiboBehavior extends mBase {
$offset = ($page_num - 1) * $page_size; $offset = ($page_num - 1) * $page_size;
$orderby = "created_at desc"; $orderby = "created_at desc";
if(!$date) $orderby = "wid desc"; if (!$date) $orderby = "wid desc";
return $this->obj->selectAll($this->tbl_data, $where, $orderby, array($offset, $page_size)); return $this->obj->selectAll($this->tbl_data, $where, $orderby, array($offset, $page_size));
} }

2
queue/crontab/spider_behavior_data.php

@ -10,7 +10,7 @@ class spiderBehaviorData {
public function __construct() { public function __construct() {
//删除半年前的数据 //删除半年前的数据
$bobj = new mWeiboBehavior(); $bobj = new mWeiboBehavior();
$six_month_ago = date('Y-m-d', strtotime('-6 month')); $six_month_ago = date('Y-m-d', strtotime('-1 month'));
$bobj->deleteExpireBehaviorData($six_month_ago); $bobj->deleteExpireBehaviorData($six_month_ago);
$user_list = array_keys($GLOBALS['WEIBO_USER_LIST']); $user_list = array_keys($GLOBALS['WEIBO_USER_LIST']);

81
queue/crontab/spider_behavior_delta.php

@ -6,47 +6,70 @@
include_once(dirname(dirname(dirname(__FILE__))) . "/library/publicBase.php"); include_once(dirname(dirname(dirname(__FILE__))) . "/library/publicBase.php");
define('DATE', $argv[1]);
class spiderBehaviorDelta { class spiderBehaviorDelta {
public function __construct() { public function __construct() {
//删除半年前的数据 //删除半年前的数据
$obj = new mWeiboBehavior(); $obj = new mWeiboBehavior();
$six_month_ago = date('Y-m-d', strtotime('-6 month')); $six_month_ago = date('Y-m-d', strtotime('-1 month'));
$obj->deleteExpireBehaviorDelta($six_month_ago); $obj->deleteExpireBehaviorDelta($six_month_ago);
//统计今日增量 //统计今日增量
$today = date('Y-m-d'); $today = date('Y-m-d',strtotime(DATE));
$today_data = $obj->getBehaviorByDate($today);
$today_data = array_column($today_data, null, 'wid');
$yesterday = date('Y-m-d', strtotime('-1 day')); $page_num = 1;
$yesterday_data = $obj->getBehaviorByDate($yesterday); $page_size = 2000;
$yesterday_data = array_column($yesterday_data, null, 'wid'); while (true) {
$today_data = $obj->getSpiderBehavior($today, 0, 0, $page_num, $page_size);
if (empty($today_data)) break;
$page_num++;
if (empty($yesterday_data)) return true; $today_data = array_column($today_data, null, 'wid');
foreach ($today_data as $k => $v) { $wids = array_column($today_data, 'wid');
$yesterday_reposts_count = 0;
$yesterday_comments_count = 0;
$yesterday_attitudes_count = 0;
if (isset($yesterday_data[$k])) { $yesterday = date('Y-m-d', strtotime('-1 day', strtotime($today)));
$yesterday_reposts_count = $yesterday_data[$k]['reposts_count']; $yesterday_data = $obj->getBehaviorByWids($wids, $yesterday);
$yesterday_comments_count = $yesterday_data[$k]['comments_count']; $yesterday_data = array_column($yesterday_data, null, 'wid');
$yesterday_attitudes_count = $yesterday_data[$k]['attitudes_count'];
} $three_day_ago = date('Y-m-d', strtotime('-2 day', strtotime($today)));
$three_day_ago_data = $obj->getBehaviorByWids($wids, $three_day_ago);
$three_day_ago_data = array_column($three_day_ago_data, null, 'wid');
//将前天的数据查询出来做兼容
if (empty($yesterday_data) && empty($three_day_ago_data)) return true;
foreach ($today_data as $k => $v) {
$reposts_delta = 0;
$comments_delta = 0;
$attitudes_delta = 0;
if (isset($yesterday_data[$k])) {
$reposts_delta = $v['reposts_count'] - $yesterday_data[$k]['reposts_count'];
$comments_delta = $v['comments_count'] - $yesterday_data[$k]['comments_count'];
$attitudes_delta = $v['attitudes_count'] - $yesterday_data[$k]['attitudes_count'];
}
if (isset($three_day_ago_data[$k]) && !isset($yesterday_data[$k])) {
$reposts_delta = $v['reposts_count'] - $three_day_ago_data[$k]['reposts_count'];
$comments_delta = $v['comments_count'] - $three_day_ago_data[$k]['comments_count'];
$attitudes_delta = $v['attitudes_count'] - $three_day_ago_data[$k]['attitudes_count'];
}
$delta = array(
'uid' => $v['uid'],
'wid' => $v['wid'],
'reposts_delta' => $reposts_delta,
'comments_delta' => $comments_delta,
'attitudes_delta' => $attitudes_delta,
'date' => $v['date'],
);
$delta = array( $res = $obj->saveBehaviorDelta($delta);
'uid' => $v['uid'], if (!$res) {
'wid' => $v['wid'], $this->writeLog('spider', 'insert_error.log', json_encode($delta));
'reposts_delta' => $v['reposts_count'] - $yesterday_reposts_count, }
'comments_delta' => $v['comments_count'] - $yesterday_comments_count,
'attitudes_delta' => $v['attitudes_count'] - $yesterday_attitudes_count,
'date' => $v['date'],
);
$res = $obj->saveBehaviorDelta($delta);
if (!$res) {
$this->writeLog('spider', 'insert_error.log', json_encode($delta));
} }
} }

Loading…
Cancel
Save