diff --git a/model/mWeiboBehavior.php b/model/mWeiboBehavior.php index df276dc..bfbb2d0 100644 --- a/model/mWeiboBehavior.php +++ b/model/mWeiboBehavior.php @@ -20,8 +20,8 @@ class mWeiboBehavior extends mBase { return $this->obj->replace($this->tbl_data, $data); } - public function getBehaviorByDate($date) { - return $this->obj->selectAll($this->tbl_data, array('sql' => "`date`=?", 'vals' => array($date)), 'id desc', array(0, 10000)); + public function getBehaviorByWids($wids, $date) { + return $this->obj->selectIn($this->tbl_data, array('wid' => $wids), array('sql' => "`date`= ?", 'vals' => array($date))); } public function deleteExpireBehaviorData($date) { @@ -53,7 +53,7 @@ class mWeiboBehavior extends mBase { $offset = ($page_num - 1) * $page_size; $orderby = "created_at desc"; - if(!$date) $orderby = "wid desc"; + if (!$date) $orderby = "wid desc"; return $this->obj->selectAll($this->tbl_data, $where, $orderby, array($offset, $page_size)); } diff --git a/queue/crontab/spider_behavior_data.php b/queue/crontab/spider_behavior_data.php index a0d5c2e..c45e265 100644 --- a/queue/crontab/spider_behavior_data.php +++ b/queue/crontab/spider_behavior_data.php @@ -10,7 +10,7 @@ class spiderBehaviorData { public function __construct() { //删除半年前的数据 $bobj = new mWeiboBehavior(); - $six_month_ago = date('Y-m-d', strtotime('-6 month')); + $six_month_ago = date('Y-m-d', strtotime('-1 month')); $bobj->deleteExpireBehaviorData($six_month_ago); $user_list = array_keys($GLOBALS['WEIBO_USER_LIST']); diff --git a/queue/crontab/spider_behavior_delta.php b/queue/crontab/spider_behavior_delta.php index 7d8b32a..e3110bc 100644 --- a/queue/crontab/spider_behavior_delta.php +++ b/queue/crontab/spider_behavior_delta.php @@ -6,47 +6,70 @@ include_once(dirname(dirname(dirname(__FILE__))) . "/library/publicBase.php"); +define('DATE', $argv[1]); + class spiderBehaviorDelta { public function __construct() { //删除半年前的数据 $obj = new mWeiboBehavior(); - $six_month_ago = date('Y-m-d', strtotime('-6 month')); + $six_month_ago = date('Y-m-d', strtotime('-1 month')); $obj->deleteExpireBehaviorDelta($six_month_ago); //统计今日增量 - $today = date('Y-m-d'); - $today_data = $obj->getBehaviorByDate($today); - $today_data = array_column($today_data, null, 'wid'); + $today = date('Y-m-d',strtotime(DATE)); - $yesterday = date('Y-m-d', strtotime('-1 day')); - $yesterday_data = $obj->getBehaviorByDate($yesterday); - $yesterday_data = array_column($yesterday_data, null, 'wid'); + $page_num = 1; + $page_size = 2000; + while (true) { + $today_data = $obj->getSpiderBehavior($today, 0, 0, $page_num, $page_size); + if (empty($today_data)) break; + $page_num++; - if (empty($yesterday_data)) return true; + $today_data = array_column($today_data, null, 'wid'); - foreach ($today_data as $k => $v) { - $yesterday_reposts_count = 0; - $yesterday_comments_count = 0; - $yesterday_attitudes_count = 0; + $wids = array_column($today_data, 'wid'); - if (isset($yesterday_data[$k])) { - $yesterday_reposts_count = $yesterday_data[$k]['reposts_count']; - $yesterday_comments_count = $yesterday_data[$k]['comments_count']; - $yesterday_attitudes_count = $yesterday_data[$k]['attitudes_count']; - } + $yesterday = date('Y-m-d', strtotime('-1 day', strtotime($today))); + $yesterday_data = $obj->getBehaviorByWids($wids, $yesterday); + $yesterday_data = array_column($yesterday_data, null, 'wid'); + + $three_day_ago = date('Y-m-d', strtotime('-2 day', strtotime($today))); + $three_day_ago_data = $obj->getBehaviorByWids($wids, $three_day_ago); + $three_day_ago_data = array_column($three_day_ago_data, null, 'wid'); + + //将前天的数据查询出来做兼容 + if (empty($yesterday_data) && empty($three_day_ago_data)) return true; + + foreach ($today_data as $k => $v) { + $reposts_delta = 0; + $comments_delta = 0; + $attitudes_delta = 0; + + if (isset($yesterday_data[$k])) { + $reposts_delta = $v['reposts_count'] - $yesterday_data[$k]['reposts_count']; + $comments_delta = $v['comments_count'] - $yesterday_data[$k]['comments_count']; + $attitudes_delta = $v['attitudes_count'] - $yesterday_data[$k]['attitudes_count']; + } + + if (isset($three_day_ago_data[$k]) && !isset($yesterday_data[$k])) { + $reposts_delta = $v['reposts_count'] - $three_day_ago_data[$k]['reposts_count']; + $comments_delta = $v['comments_count'] - $three_day_ago_data[$k]['comments_count']; + $attitudes_delta = $v['attitudes_count'] - $three_day_ago_data[$k]['attitudes_count']; + } + + $delta = array( + 'uid' => $v['uid'], + 'wid' => $v['wid'], + 'reposts_delta' => $reposts_delta, + 'comments_delta' => $comments_delta, + 'attitudes_delta' => $attitudes_delta, + 'date' => $v['date'], + ); - $delta = array( - 'uid' => $v['uid'], - 'wid' => $v['wid'], - 'reposts_delta' => $v['reposts_count'] - $yesterday_reposts_count, - 'comments_delta' => $v['comments_count'] - $yesterday_comments_count, - 'attitudes_delta' => $v['attitudes_count'] - $yesterday_attitudes_count, - 'date' => $v['date'], - ); - - $res = $obj->saveBehaviorDelta($delta); - if (!$res) { - $this->writeLog('spider', 'insert_error.log', json_encode($delta)); + $res = $obj->saveBehaviorDelta($delta); + if (!$res) { + $this->writeLog('spider', 'insert_error.log', json_encode($delta)); + } } }