You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
119 lines
4.5 KiB
119 lines
4.5 KiB
<?php
|
|
/**
|
|
*
|
|
*/
|
|
include_once(SERVER_ROOT . "/model/mBase.php");
|
|
|
|
|
|
class mWeiboComments extends mBase {
|
|
private $obj;
|
|
private $tbl;
|
|
|
|
public function __construct() {
|
|
$this->obj = new dWeiboComments();
|
|
$this->tbl = 'spider_weibo_comments';
|
|
}
|
|
|
|
public function getCommentByWeiboId($weibo_id, $page = 0, $limit = 0, $order = 'id asc') {
|
|
$limit_info = array();
|
|
if($page > 0 && $limit > 0) $limit_info = array(($page-1)*$limit, $limit);
|
|
return $this->obj->selectAll($this->tbl, array('sql' => '`weibo_id`=?', 'vals' => array($weibo_id)), $order, $limit_info);
|
|
}
|
|
|
|
public function getCommentByWeiboDataId($weibo_data_id) {
|
|
return $this->obj->select($this->tbl, array('sql' => '`weibo_data_id`=?', 'vals' => array($weibo_data_id)));
|
|
}
|
|
|
|
public function isNeedInsertData($weibo_id, $max_weibo_data_id, $comment_count) {
|
|
$max_weibo_data_id_info = $this->getCommentByWeiboId($max_weibo_data_id, 1, 1, 'id desc');
|
|
if(empty($max_weibo_data_id_info) || $max_weibo_data_id_info['spider_comment_status']) return true;
|
|
return true;
|
|
}
|
|
|
|
public function addComment($weibo_id, $source_json) {
|
|
$obj = new mWeibo();
|
|
$weibo_info = $obj->getWeiboById($weibo_id);
|
|
if(empty($weibo_info)) {
|
|
$this->setError('微博不存在');
|
|
$this->writeLog(ZHISHIKU_SPIDER_LOG, ZHISHIKU_SPIDER_COMMENT, '微博不存在:'.$weibo_id);
|
|
return false;
|
|
}
|
|
|
|
$source_data = json_decode($source_json, true);
|
|
if($source_data['ok'] != 1) {
|
|
$this->setError('抓取失败');
|
|
$this->writeLog(ZHISHIKU_SPIDER_LOG, ZHISHIKU_SPIDER_COMMENT, '抓取评论失败:'.$source_json);
|
|
return false;
|
|
}
|
|
|
|
if(!is_dir(ZHISHIKU_SPIDER_TEMP_PATH)) {
|
|
mkdir(ZHISHIKU_SPIDER_TEMP_PATH, 0755, true);
|
|
chown(ZHISHIKU_SPIDER_TEMP_PATH, 'nobody');
|
|
chgrp(ZHISHIKU_SPIDER_TEMP_PATH, 'nobody');
|
|
}
|
|
|
|
foreach($source_data['data'] as $key=>$comment) {
|
|
$weibo_data_id = $comment['id']+0;
|
|
$source_json_save_path = sprintf(ZHISHIKU_SPIDER_COMMENT_PATH, $weibo_info['wid'], $weibo_data_id);
|
|
$dir = dirname(dirname($source_json_save_path));
|
|
if(!is_dir($dir)) {
|
|
mkdir($dir, 0755, true);
|
|
chown($dir, 'nobody');
|
|
chgrp($dir, 'nobody');
|
|
}
|
|
|
|
$dir = dirname($source_json_save_path);
|
|
if(!is_dir($dir)) {
|
|
mkdir($dir, 0755, true);
|
|
chown($dir, 'nobody');
|
|
chgrp($dir, 'nobody');
|
|
}
|
|
|
|
if(!is_dir($dir)) {
|
|
$this->writeLog(ZHISHIKU_SPIDER_LOG, ZHISHIKU_SPIDER_COMMENT, '创建目录失败:'.$source_json_save_path);
|
|
return false;
|
|
}
|
|
|
|
if(file_exists($source_json_save_path)) continue;
|
|
|
|
file_put_contents($source_json_save_path, json_encode($comment), LOCK_EX);
|
|
if(!file_exists($source_json_save_path) || filesize($source_json_save_path) < 10) {
|
|
$this->writeLog(ZHISHIKU_SPIDER_LOG, ZHISHIKU_SPIDER_COMMENT, '评论保存到文件失败:'.$source_json_save_path);
|
|
return false;
|
|
}
|
|
|
|
chmod($source_json_save_path, 0755);
|
|
chown($source_json_save_path, 'nobody');
|
|
chgrp($source_json_save_path, 'nobody');
|
|
}
|
|
|
|
$sqls = array();
|
|
$max_id = 0;
|
|
foreach($source_data['data'] as $comment) {
|
|
$content = $comment['text'];
|
|
$weibo_data_id = $comment['id']+0;
|
|
$comment_time = date('Y-m-d H:i:s', strtotime($comment['created_at']));
|
|
|
|
$sqls[] = array(
|
|
'sql' => 'insert into '.$this->tbl.' (`weibo_id`, `weibo_data_id`, `content`, `comment_time`) values (?, ?, ?, ?) ON DUPLICATE KEY UPDATE `weibo_data_id`=?',
|
|
'vals' => array($weibo_id, $weibo_data_id, $content, $comment_time, $weibo_data_id),
|
|
);
|
|
$max_id = $weibo_data_id - 1;
|
|
}
|
|
|
|
$res = $this->obj->execTrans2($sqls);
|
|
if(!$res) {
|
|
$this->setError('保存评论失败');
|
|
$this->writeLog(ZHISHIKU_SPIDER_LOG, ZHISHIKU_SPIDER_COMMENT, '保存评论失败:'.json_encode($sqls));
|
|
return false;
|
|
}
|
|
|
|
$rdata['total_number'] = $source_data['total_number'];
|
|
$rdata['max_id'] = $max_id;
|
|
$rdata['weibo_id'] = $weibo_id;
|
|
$rdata['is_load_all'] = count($source_data['data']) < 20 ? true : false;
|
|
|
|
return $rdata;
|
|
}
|
|
|
|
}
|