|
|
@ -6,67 +6,114 @@ include_once(SERVER_ROOT . "/model/mBase.php"); |
|
|
|
|
|
|
|
|
|
|
|
class mWeiboComments extends mBase { |
|
|
|
private $config = [ |
|
|
|
'positive' => [ |
|
|
|
'keywords' => [ |
|
|
|
'有效', '好用', '好了', '治愈', '见效', '不错', '管用', '有用','感恩', |
|
|
|
'灵验', '缓解', '痊愈', '见效', '神效', '推荐', '感谢', '好了', |
|
|
|
'管用', '有效果', '有效果', '奏效', '改善', '见效快', '神奇', |
|
|
|
'有效', '有效果', '有效果', '有效果', '有效果', '有效果' |
|
|
|
], |
|
|
|
'patterns' => [ |
|
|
|
'/(喝|吃|用)了?\s*\d+\s*次?[就]?(好|痊愈|缓解|不咳|不痛)了/u', |
|
|
|
'/(效果|结果)\s*(非常|很)?\s*(好|明显|不错|显著|惊人)/u', |
|
|
|
'/(尝试|试了|用了)\s*\d+\s*[天个]?\s*(就)?(好|痊愈|缓解|见效)/u', |
|
|
|
'/(咳嗽|咳|痛|疼|症状)\s*(明显)?\s*(减轻|缓解|消失|好了)/u', |
|
|
|
'/(感谢|谢谢)\s*(分享|博主|张医生|宝旬)/u' |
|
|
|
] |
|
|
|
], |
|
|
|
'negative' => [ |
|
|
|
'keywords' => [ |
|
|
|
'无效', '没用', '不好', '没效果', '不行', '不见效', '没好转', |
|
|
|
'加重', '恶化', '白费', '失望', '避雷', '没用', '没效果', |
|
|
|
'无效果', '无改善', '没作用', '不灵', '骗人', '忽悠', '上当' |
|
|
|
], |
|
|
|
'patterns' => [ |
|
|
|
'/(还是|依然|仍旧|仍然)\s*(咳|难受|痛|疼|没效果)/u', |
|
|
|
'/(一点|完全|根本|丝毫)\s*没(效果|用|好转|作用|改善)/u', |
|
|
|
'/(不仅|不但)\s*没(好|改善).*(反而|而且)\s*(加重|恶化)/u', |
|
|
|
'/(浪费|白费|白忙|白折腾)\s*(时间|精力)/u', |
|
|
|
'/(失望|后悔|上当|骗人|忽悠|别信)\s*(了|吧|!)/u' |
|
|
|
] |
|
|
|
] |
|
|
|
]; |
|
|
|
|
|
|
|
public function __construct() {} |
|
|
|
|
|
|
|
private function preprocess($comment) { |
|
|
|
$comment = preg_replace('/\s+/u', ' ', $comment); |
|
|
|
$comment = preg_replace('/[^\p{Han}\p{P}\w\s]/u', '', $comment); |
|
|
|
return trim($comment); |
|
|
|
private $obj; |
|
|
|
private $tbl; |
|
|
|
|
|
|
|
public function __construct() { |
|
|
|
$this->obj = new dWeiboComments(); |
|
|
|
$this->tbl = 'spider_weibo_comments'; |
|
|
|
} |
|
|
|
|
|
|
|
public function getCommentByWeiboId($weibo_id, $page = 0, $limit = 0, $order = 'id asc') { |
|
|
|
$limit_info = array(); |
|
|
|
if($page > 0 && $limit > 0) $limit_info = array(($page-1)*$limit, $limit); |
|
|
|
return $this->obj->selectAll($this->tbl, array('sql' => '`weibo_id`=?', 'vals' => array($weibo_id)), $order, $limit_info); |
|
|
|
} |
|
|
|
|
|
|
|
public function getCommentByWeiboDataId($weibo_data_id) { |
|
|
|
return $this->obj->select($this->tbl, array('sql' => '`weibo_data_id`=?', 'vals' => array($weibo_data_id))); |
|
|
|
} |
|
|
|
|
|
|
|
public function analyze($comment) { |
|
|
|
$comment = $this->preprocess($comment); |
|
|
|
public function isNeedInsertData($weibo_id, $max_weibo_data_id) { |
|
|
|
$max_weibo_data_id_info = $this->getCommentByWeiboId($max_weibo_data_id, 1, 1); |
|
|
|
if(empty($max_weibo_data_id_info)) return true; |
|
|
|
|
|
|
|
foreach ($this->config['negative']['keywords'] as $word) { |
|
|
|
if (mb_strpos($comment, $word) !== false) return 'negative'; |
|
|
|
$weibo_info = $this->getCommentByWeiboId($weibo_id, 1, 1); |
|
|
|
if(empty($weibo_info)) return true; |
|
|
|
if($max_weibo_data_id <= $weibo_info[0]['weibo_data_id']) return false; |
|
|
|
|
|
|
|
return true; |
|
|
|
} |
|
|
|
|
|
|
|
public function addComment($weibo_id, $source_json) { |
|
|
|
$obj = new mWeibo(); |
|
|
|
$weibo_info = $obj->getWeiboById($weibo_id); |
|
|
|
if(empty($weibo_info)) { |
|
|
|
$this->setError('微博不存在'); |
|
|
|
$this->writeLog(ZHISHIKU_SPIDER_LOG, ZHISHIKU_SPIDER_COMMENT, '微博不存在:'.$weibo_id); |
|
|
|
return false; |
|
|
|
} |
|
|
|
|
|
|
|
foreach ($this->config['positive']['keywords'] as $word) { |
|
|
|
if (mb_strpos($comment, $word) !== false) return 'positive'; |
|
|
|
$source_data = json_decode($source_json, true); |
|
|
|
if($source_data['ok'] != 1) { |
|
|
|
$this->setError('抓取失败'); |
|
|
|
$this->writeLog(ZHISHIKU_SPIDER_LOG, ZHISHIKU_SPIDER_COMMENT, '抓取评论失败:'.$source_json); |
|
|
|
return false; |
|
|
|
} |
|
|
|
|
|
|
|
foreach ($this->config['negative']['patterns'] as $pattern) { |
|
|
|
if (preg_match($pattern, $comment)) return 'negative'; |
|
|
|
if(!is_dir(ZHISHIKU_SPIDER_TEMP_PATH)) { |
|
|
|
mkdir(ZHISHIKU_SPIDER_TEMP_PATH, 0755, true); |
|
|
|
chown(ZHISHIKU_SPIDER_TEMP_PATH, 'nobody'); |
|
|
|
chgrp(ZHISHIKU_SPIDER_TEMP_PATH, 'nobody'); |
|
|
|
} |
|
|
|
|
|
|
|
foreach ($this->config['positive']['patterns'] as $pattern) { |
|
|
|
if (preg_match($pattern, $comment)) return 'positive'; |
|
|
|
foreach($source_data['data'] as $key=>$comment) { |
|
|
|
$weibo_data_id = $comment['id']+0; |
|
|
|
$source_json_save_path = sprintf(ZHISHIKU_SPIDER_COMMENT_PATH, $weibo_info['wid'], $weibo_data_id); |
|
|
|
$dir = dirname(dirname($source_json_save_path)); |
|
|
|
if(!is_dir($dir)) { |
|
|
|
mkdir($dir, 0755, true); |
|
|
|
chown($dir, 'nobody'); |
|
|
|
chgrp($dir, 'nobody'); |
|
|
|
} |
|
|
|
|
|
|
|
$dir = dirname($source_json_save_path); |
|
|
|
if(!is_dir($dir)) { |
|
|
|
mkdir($dir, 0755, true); |
|
|
|
chown($dir, 'nobody'); |
|
|
|
chgrp($dir, 'nobody'); |
|
|
|
} |
|
|
|
|
|
|
|
if(!is_dir($dir)) { |
|
|
|
$this->writeLog(ZHISHIKU_SPIDER_LOG, ZHISHIKU_SPIDER_COMMENT, '创建目录失败:'.$source_json_save_path); |
|
|
|
continue; |
|
|
|
} |
|
|
|
|
|
|
|
if(file_exists($source_json_save_path)) continue; |
|
|
|
|
|
|
|
file_put_contents($source_json_save_path, json_encode($comment), LOCK_EX); |
|
|
|
if(!file_exists($source_json_save_path) || filesize($source_json_save_path) < 10) { |
|
|
|
$this->writeLog(ZHISHIKU_SPIDER_LOG, ZHISHIKU_SPIDER_COMMENT, '评论保存到文件失败:'.$source_json_save_path); |
|
|
|
unset($source_data['data'][$key]); |
|
|
|
continue; |
|
|
|
} |
|
|
|
|
|
|
|
chmod($source_json_save_path, 0755); |
|
|
|
chown($source_json_save_path, 'nobody'); |
|
|
|
chgrp($source_json_save_path, 'nobody'); |
|
|
|
} |
|
|
|
if(empty($source_data['data'])) return true; |
|
|
|
|
|
|
|
return 'neutral'; |
|
|
|
} |
|
|
|
$sqls = array(); |
|
|
|
foreach($source_data['data'] as $comment) { |
|
|
|
$content = $comment['text']; |
|
|
|
$weibo_data_id = $comment['id']+0; |
|
|
|
$comment_time = date('Y-m-d H:i:s', strtotime($comment['created_at'])); |
|
|
|
|
|
|
|
$sqls[] = array( |
|
|
|
'sql' => 'insert into '.$this->tbl.' (`weibo_id`, `weibo_data_id`, `content`, `comment_time`) values (?, ?, ?, ?) ON DUPLICATE KEY UPDATE `weibo_data_id`=?', |
|
|
|
'vals' => array($weibo_id, $weibo_data_id, $content, $comment_time, $weibo_data_id), |
|
|
|
); |
|
|
|
} |
|
|
|
|
|
|
|
$res = $this->obj->execTrans2($sqls); |
|
|
|
if(!$res) { |
|
|
|
$this->setError('保存评论失败'); |
|
|
|
$this->writeLog(ZHISHIKU_SPIDER_LOG, ZHISHIKU_SPIDER_COMMENT, '保存评论失败:'.json_encode($sqls)); |
|
|
|
return false; |
|
|
|
} |
|
|
|
|
|
|
|
return true; |
|
|
|
} |
|
|
|
|
|
|
|
} |