23 changed files with 1840 additions and 23 deletions
@ -0,0 +1 @@ |
|||||
|
.vscode |
@ -0,0 +1,23 @@ |
|||||
|
<?php |
||||
|
/** |
||||
|
* |
||||
|
*/ |
||||
|
include_once SERVER_ROOT . '/data/dBase.php'; |
||||
|
|
||||
|
class dWeiboComments extends dBase { |
||||
|
protected $fieldlist = array( |
||||
|
'spider_weibo_comments' => array( |
||||
|
'id', |
||||
|
'weibo_id', |
||||
|
'content', |
||||
|
'weibo_data_id', |
||||
|
'is_search', |
||||
|
'comment_time', |
||||
|
'create_time', |
||||
|
) |
||||
|
); |
||||
|
|
||||
|
protected $primary_keys = array( |
||||
|
); |
||||
|
} |
||||
|
|
@ -0,0 +1,149 @@ |
|||||
|
<?php |
||||
|
/** |
||||
|
* |
||||
|
*/ |
||||
|
include_once(SERVER_ROOT . "/model/mBase.php"); |
||||
|
|
||||
|
|
||||
|
class mWeiboComments extends mBase { |
||||
|
private $obj; |
||||
|
private $tbl; |
||||
|
|
||||
|
public function __construct() { |
||||
|
$this->obj = new dWeiboComments(); |
||||
|
$this->tbl = 'spider_weibo_comments'; |
||||
|
} |
||||
|
|
||||
|
public function getCommentByWeiboId($weibo_id, $page = 0, $limit = 0, $order = 'id asc', $condition = array()) { |
||||
|
$where = "1=1 "; |
||||
|
if (!empty($condition)) { |
||||
|
foreach ($condition as $key => $val) { |
||||
|
if (is_array($val)) { |
||||
|
$val = implode(',', $val); |
||||
|
$where .= " and {$key} in ({$val})"; |
||||
|
} else { |
||||
|
$where .= " and {$key}={$val}"; |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
$where.= " and `weibo_id`={$weibo_id}"; |
||||
|
|
||||
|
$limit_info = array(); |
||||
|
if($page > 0 && $limit > 0) $limit_info = array(($page-1)*$limit, $limit); |
||||
|
return $this->obj->selectAll($this->tbl, array('sql' => $where, 'vals' => array()), $order, $limit_info); |
||||
|
} |
||||
|
|
||||
|
public function getCommentCountByWeiboId($weibo_id, $condition = array()) { |
||||
|
$where = "1=1 "; |
||||
|
if (!empty($condition)) { |
||||
|
foreach ($condition as $key => $val) { |
||||
|
if (is_array($val)) { |
||||
|
$val = implode(',', $val); |
||||
|
$where .= " and {$key} in ({$val})"; |
||||
|
} else { |
||||
|
$where .= " and {$key}={$val}"; |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
$where.= " and `weibo_id`={$weibo_id}"; |
||||
|
|
||||
|
return $this->obj->count($this->tbl, array('sql' => $where, 'vals' => array())); |
||||
|
} |
||||
|
|
||||
|
public function getCommentByWeiboDataId($weibo_data_id) { |
||||
|
return $this->obj->select($this->tbl, array('sql' => '`weibo_data_id`=?', 'vals' => array($weibo_data_id))); |
||||
|
} |
||||
|
|
||||
|
public function isNeedInsertData($weibo_id, $max_weibo_data_id, $comment_count) { |
||||
|
$max_weibo_data_id_info = $this->getCommentByWeiboId($max_weibo_data_id, 1, 1, 'id desc'); |
||||
|
if(empty($max_weibo_data_id_info) || $max_weibo_data_id_info['spider_comment_status']) return true; |
||||
|
return true; |
||||
|
} |
||||
|
|
||||
|
public function addComment($weibo_id, $source_json) { |
||||
|
$obj = new mWeibo(); |
||||
|
$weibo_info = $obj->getWeiboById($weibo_id); |
||||
|
if(empty($weibo_info)) { |
||||
|
$this->setError('微博不存在'); |
||||
|
$this->writeLog(ZHISHIKU_SPIDER_LOG, ZHISHIKU_SPIDER_COMMENT, '微博不存在:'.$weibo_id); |
||||
|
return false; |
||||
|
} |
||||
|
|
||||
|
$source_data = json_decode($source_json, true); |
||||
|
if($source_data['ok'] != 1) { |
||||
|
$this->setError('抓取失败'); |
||||
|
$this->writeLog(ZHISHIKU_SPIDER_LOG, ZHISHIKU_SPIDER_COMMENT, '抓取评论失败:'.$source_json); |
||||
|
return false; |
||||
|
} |
||||
|
|
||||
|
if(!is_dir(ZHISHIKU_SPIDER_TEMP_PATH)) { |
||||
|
mkdir(ZHISHIKU_SPIDER_TEMP_PATH, 0755, true); |
||||
|
chown(ZHISHIKU_SPIDER_TEMP_PATH, 'nobody'); |
||||
|
chgrp(ZHISHIKU_SPIDER_TEMP_PATH, 'nobody'); |
||||
|
} |
||||
|
|
||||
|
foreach($source_data['data'] as $key=>$comment) { |
||||
|
$weibo_data_id = $comment['id']+0; |
||||
|
$source_json_save_path = sprintf(ZHISHIKU_SPIDER_COMMENT_PATH, $weibo_info['wid'], $weibo_data_id); |
||||
|
$dir = dirname(dirname($source_json_save_path)); |
||||
|
if(!is_dir($dir)) { |
||||
|
mkdir($dir, 0755, true); |
||||
|
chown($dir, 'nobody'); |
||||
|
chgrp($dir, 'nobody'); |
||||
|
} |
||||
|
|
||||
|
$dir = dirname($source_json_save_path); |
||||
|
if(!is_dir($dir)) { |
||||
|
mkdir($dir, 0755, true); |
||||
|
chown($dir, 'nobody'); |
||||
|
chgrp($dir, 'nobody'); |
||||
|
} |
||||
|
|
||||
|
if(!is_dir($dir)) { |
||||
|
$this->writeLog(ZHISHIKU_SPIDER_LOG, ZHISHIKU_SPIDER_COMMENT, '创建目录失败:'.$source_json_save_path); |
||||
|
return false; |
||||
|
} |
||||
|
|
||||
|
if(file_exists($source_json_save_path)) continue; |
||||
|
|
||||
|
file_put_contents($source_json_save_path, json_encode($comment), LOCK_EX); |
||||
|
if(!file_exists($source_json_save_path) || filesize($source_json_save_path) < 10) { |
||||
|
$this->writeLog(ZHISHIKU_SPIDER_LOG, ZHISHIKU_SPIDER_COMMENT, '评论保存到文件失败:'.$source_json_save_path); |
||||
|
return false; |
||||
|
} |
||||
|
|
||||
|
chmod($source_json_save_path, 0755); |
||||
|
chown($source_json_save_path, 'nobody'); |
||||
|
chgrp($source_json_save_path, 'nobody'); |
||||
|
} |
||||
|
|
||||
|
$sqls = array(); |
||||
|
$max_id = 0; |
||||
|
foreach($source_data['data'] as $comment) { |
||||
|
$content = $comment['text']; |
||||
|
$weibo_data_id = $comment['id']+0; |
||||
|
$comment_time = date('Y-m-d H:i:s', strtotime($comment['created_at'])); |
||||
|
|
||||
|
$sqls[] = array( |
||||
|
'sql' => 'insert into '.$this->tbl.' (`weibo_id`, `weibo_data_id`, `content`, `comment_time`) values (?, ?, ?, ?) ON DUPLICATE KEY UPDATE `weibo_data_id`=?', |
||||
|
'vals' => array($weibo_id, $weibo_data_id, $content, $comment_time, $weibo_data_id), |
||||
|
); |
||||
|
$max_id = $weibo_data_id - 1; |
||||
|
} |
||||
|
|
||||
|
$res = $this->obj->execTrans2($sqls); |
||||
|
if(!$res) { |
||||
|
$this->setError('保存评论失败'); |
||||
|
$this->writeLog(ZHISHIKU_SPIDER_LOG, ZHISHIKU_SPIDER_COMMENT, '保存评论失败:'.json_encode($sqls)); |
||||
|
return false; |
||||
|
} |
||||
|
|
||||
|
$rdata['total_number'] = $source_data['total_number']; |
||||
|
$rdata['max_id'] = $max_id; |
||||
|
$rdata['weibo_id'] = $weibo_id; |
||||
|
$rdata['is_load_all'] = count($source_data['data']) < 20 ? true : false; |
||||
|
|
||||
|
return $rdata; |
||||
|
} |
||||
|
|
||||
|
} |
@ -0,0 +1,38 @@ |
|||||
|
<?php |
||||
|
/** |
||||
|
* 抓取微博评论任务 |
||||
|
* @package crontab |
||||
|
*/ |
||||
|
|
||||
|
include_once(dirname(dirname(dirname(__FILE__))) . "/library/publicBase.php"); |
||||
|
|
||||
|
class addSpiderWeiboCommentTask { |
||||
|
public function __construct() { |
||||
|
$page_size = 500; |
||||
|
|
||||
|
$condition['uid'] = WEIBO_USER_ZHANG; |
||||
|
$condition['spider_comment_status'] = SPIDER_COMMENT_STATUS_NO; |
||||
|
|
||||
|
$obj = new mWeibo(); |
||||
|
$rdobj = $obj->initRedis(); |
||||
|
if($rdobj->llen(_RQ_SPIDER_WEIBO_COMMENT)+0 > 0) return true; |
||||
|
|
||||
|
for($page_num=1;$page_num<=1;$page_num++) { |
||||
|
$weibo_data = $obj->getWeiboList($condition, $page_num, $page_size); |
||||
|
if(empty($weibo_data)) break; |
||||
|
|
||||
|
foreach($weibo_data as $data) { |
||||
|
$pdata['weibo_id'] = $data['id']; |
||||
|
$pdata['max_id'] = 0; |
||||
|
|
||||
|
if ($rdobj->sIsMember(_RS_SPIDER_WEIBO_COMMENT, $data['id'])) continue; |
||||
|
$rdobj->lpush(_RQ_SPIDER_WEIBO_COMMENT, json_encode($pdata)); |
||||
|
$rdobj->sAdd(_RS_SPIDER_WEIBO_COMMENT, $data['id']); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
return true; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
new addSpiderWeiboCommentTask(); |
@ -0,0 +1,119 @@ |
|||||
|
<?php |
||||
|
include_once dirname(dirname(__FILE__)).'/base/dealBase.php'; |
||||
|
|
||||
|
define('PROC_CODE', $argv[2]); |
||||
|
|
||||
|
class spiderWeiboComment extends dealBase { |
||||
|
|
||||
|
private $task_key = _RQ_SPIDER_WEIBO_COMMENT; |
||||
|
|
||||
|
public function setPara() { |
||||
|
$this->processnum = $GLOBALS['DAEMON_NUMLIMIT'][SPIDER_WEIBO_COMMENT]; |
||||
|
$this->is_while = true; |
||||
|
} |
||||
|
|
||||
|
public function deal() { |
||||
|
$obj = new mSpider(); |
||||
|
$rdobj = $obj->initRedis(); |
||||
|
|
||||
|
$cookie_json = $rdobj->get(_RC_WEIBO_LOGIN_COOKIE2); |
||||
|
$baselog = PROC_CODE . "|"; |
||||
|
if(empty($cookie_json)) { |
||||
|
if($rdobj->llen($this->task_key) + 0 > 0) $obj->writeLog(ZHISHIKU_SPIDER_LOG, ZHISHIKU_SPIDER_COMMENT, 'cookie为空:'.$baselog); |
||||
|
exit; |
||||
|
} |
||||
|
|
||||
|
$cookie_data = json_decode($cookie_json, true); |
||||
|
$cookie = $cookie_data[PROC_CODE]; |
||||
|
if(empty($cookie)) { |
||||
|
if($rdobj->llen($this->task_key) + 0 > 0) $obj->writeLog(ZHISHIKU_SPIDER_LOG, ZHISHIKU_SPIDER_COMMENT, '未设置该进程cookie:'.$baselog.'|'.$cookie_json); |
||||
|
exit; |
||||
|
} |
||||
|
|
||||
|
$ipinfo = $rdobj->get(_RC_SPIDER_WEIBO_COMMENT_USE_IP); |
||||
|
$ipdata = json_decode($ipinfo, true); |
||||
|
if (!$ipinfo && PROC_CODE == 0 && ($rdobj->llen($this->task_key) + 0 > 0 || !$this->is_while)) $ipdata = $obj->getZmhttpIp(_RC_SPIDER_WEIBO_COMMENT_USE_IP); |
||||
|
if (empty($ipdata)) { |
||||
|
sleep(2); |
||||
|
return false; |
||||
|
} |
||||
|
|
||||
|
$obj->proxy_ip = $ipdata['ip']; |
||||
|
$obj->proxy_port = $ipdata['port']; |
||||
|
|
||||
|
$rq_data = $rdobj->rpop($this->task_key); |
||||
|
if(empty($rq_data)) exit(); |
||||
|
|
||||
|
$rq_data = json_decode($rq_data, true); |
||||
|
|
||||
|
$weibo_id = $rq_data['weibo_id']+0; |
||||
|
$max_id = $rq_data['max_id']+0; |
||||
|
if($weibo_id+0 <= 0 || $max_id < 0) { |
||||
|
$obj->writeLog(ZHISHIKU_SPIDER_LOG, ZHISHIKU_SPIDER_COMMENT, 'queue:数据格式不正确:'.json_decode( $rq_data)); |
||||
|
exit; |
||||
|
} |
||||
|
|
||||
|
$baselog = $weibo_id . "|" .$max_id . "|" . $ipdata['ip'] . "|" . $ipdata['port'] . "|" . PROC_CODE . "|"; |
||||
|
|
||||
|
$wobj = new mWeibo(); |
||||
|
$weibo_info = $wobj->getWeiboById($weibo_id); |
||||
|
if(empty($weibo_info)) { |
||||
|
$obj->writeLog(ZHISHIKU_SPIDER_LOG, ZHISHIKU_SPIDER_COMMENT, '微博信息不存在:'.$baselog); |
||||
|
exit; |
||||
|
} |
||||
|
|
||||
|
$res = $obj->spiderComments($cookie, $weibo_info['uid'], $weibo_info['wid'], $weibo_info['mblogid'], $max_id); |
||||
|
if (!$res) { |
||||
|
$error = $obj->getError(); |
||||
|
$obj->writeLog(ZHISHIKU_SPIDER_LOG, ZHISHIKU_SPIDER_COMMENT, '队列评论抓取失败,重试中:'.$error.'|'.$baselog); |
||||
|
|
||||
|
if($error == 'cookie失效') { |
||||
|
$obj->sendMail(array('1432334894@qq.com'), date('Y年m月d日', time()) . '-微博评论数据抓取', $error); |
||||
|
|
||||
|
unset($cookie_data[PROC_CODE]); |
||||
|
$rdobj->set(_RC_WEIBO_LOGIN_COOKIE2, json_encode($cookie_data)); |
||||
|
$rdobj->lpush($this->task_key, json_encode($rq_data)); |
||||
|
if ($obj->is_change_ip && PROC_CODE == 0) $rdobj->del(_RC_SPIDER_WEIBO_COMMENT_USE_IP); |
||||
|
// $rdobj->del($this->task_key); |
||||
|
// $rdobj->del(_RS_SPIDER_WEIBO_COMMENT); |
||||
|
exit; |
||||
|
} |
||||
|
|
||||
|
$rdobj->lpush($this->task_key, json_encode($rq_data)); |
||||
|
if ($obj->is_change_ip && PROC_CODE == 0) $rdobj->del(_RC_SPIDER_WEIBO_COMMENT_USE_IP); |
||||
|
sleep(30); |
||||
|
|
||||
|
return false; |
||||
|
} |
||||
|
|
||||
|
$cobj = new mWeiboComments(); |
||||
|
$res = $cobj->addComment($weibo_id, $res); |
||||
|
if(!$res) { |
||||
|
$error = $cobj->getError().'|评论json数据保存到数据库失败~'; |
||||
|
$obj->writeLog(ZHISHIKU_SPIDER_LOG, ZHISHIKU_SPIDER_COMMENT, $baselog.'|'.$error); |
||||
|
$wobj->updateWeibo($weibo_id, array('spider_comment_status' => SPIDER_COMMENT_STATUS_ERROR)); |
||||
|
return false; |
||||
|
} |
||||
|
|
||||
|
if(!$res['is_load_all']) { |
||||
|
$rdobj->lpush($this->task_key, json_encode($res)); |
||||
|
return true; |
||||
|
} |
||||
|
|
||||
|
$res = $wobj->updateWeibo($weibo_id, array('spider_comment_status' => SPIDER_COMMENT_STATUS_SUCCESS)); |
||||
|
if(!$res) { |
||||
|
$error = $wobj->getError(); |
||||
|
$obj->writeLog(ZHISHIKU_SPIDER_LOG, ZHISHIKU_SPIDER_COMMENT, '更新微博状态失败:'.$error.'|'.$baselog); |
||||
|
sleep(2); |
||||
|
return false; |
||||
|
} |
||||
|
|
||||
|
$obj->writeLog(ZHISHIKU_SPIDER_LOG, ZHISHIKU_SPIDER_COMMENT, '抓取成功:'.$baselog); |
||||
|
$rdobj->sRem(_RS_SPIDER_WEIBO_COMMENT, $weibo_id); |
||||
|
sleep(3); |
||||
|
|
||||
|
return true; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
new spiderWeiboComment(); |
File diff suppressed because one or more lines are too long
@ -0,0 +1,460 @@ |
|||||
|
# -*- coding: utf-8 -*- |
||||
|
from snownlp import seg |
||||
|
from snownlp import SnowNLP |
||||
|
import pymysql |
||||
|
import configparser |
||||
|
import os |
||||
|
import re |
||||
|
|
||||
|
def get_db_config(): |
||||
|
config = configparser.ConfigParser() |
||||
|
config_path = os.path.join(os.path.dirname(__file__), '../config/database.ini') |
||||
|
config.read(config_path) |
||||
|
|
||||
|
return { |
||||
|
'host': config.get('simplyphp', 'master').replace('"',''), |
||||
|
'user': config.get('simplyphp', 'user').replace('"',''), |
||||
|
'password': config.get('simplyphp', 'passwd').replace('"',''), |
||||
|
'database': config.get('simplyphp', 'db').replace('"',''), |
||||
|
'charset': 'utf8mb4' |
||||
|
} |
||||
|
|
||||
|
# 新增数据库连接配置 |
||||
|
DB_CONFIG = get_db_config() |
||||
|
|
||||
|
def get_comments_from_db(): |
||||
|
"""从数据库获取微博评论""" |
||||
|
connection = pymysql.connect(**DB_CONFIG) |
||||
|
try: |
||||
|
with connection.cursor(pymysql.cursors.DictCursor) as cursor: |
||||
|
sql = "SELECT id,content FROM spider_weibo_comments where is_search=-1 limit 0,1000" |
||||
|
cursor.execute(sql) |
||||
|
results = cursor.fetchall() |
||||
|
# return [item[0] for item in results] |
||||
|
return results |
||||
|
finally: |
||||
|
connection.close() |
||||
|
|
||||
|
#pip install snownlp |
||||
|
def filter_medical_comments(comments_list): |
||||
|
filtered_comments = [] |
||||
|
update_records = [] |
||||
|
positive_keywords = [ |
||||
|
'有效', '好用', '管用', '有用', '效果好', '见效', '有效果', '显著', '明显', |
||||
|
'改善', '缓解', '康复', '痊愈', '立竿见影', '有奇效', '灵验', '奏效', |
||||
|
'特效', '疗效显著', '效果持久', '见效快', '恢复快', '作用很大','作用很好', |
||||
|
|
||||
|
'赞', '好评', '感谢', '谢谢', '感恩', '推荐', '点赞', '太好了', '太神了', |
||||
|
'太厉害了', '太棒了', '太赞了', '太强了', '超赞', '惊喜', '感动', '欣慰', |
||||
|
'开心', '喜欢', '喜爱', '信赖', '满意', '安心', '放心', '惊喜', '感动', |
||||
|
'惊艳', '惊艳到', '惊叹', '佩服', '信赖', '信任', '安心', '舒心', '贴心', |
||||
|
'温暖', '幸福', '惊喜', '惊喜万分', '喜出望外', '心满意足', '赞不绝口', |
||||
|
'竖起大拇指', '五星好评', '无可挑剔', '令人难忘', '爱不释手', |
||||
|
|
||||
|
'巨好用', '贼管用', '超有效', '特有用', '极佳', '绝了', '无敌', '完美', |
||||
|
'顶尖', '非凡', '卓越', '出色', '意外的好', '好到爆', '好用爆了', |
||||
|
'效果拔群', '惊艳', '令人惊叹', '极其有效', '格外好用', '分外管用', |
||||
|
'异常出色', '相当不错', '真心不错', '真心好用', '实在管用', '确实有效', |
||||
|
'非常明显', '特别显著', '极度舒适', '超级满意', '无比惊喜', '彻底解决', |
||||
|
'完全康复', '根本改善', '质的飞跃', '翻天覆地', '脱胎换骨', '焕然一新', |
||||
|
|
||||
|
'解决了', '治好了', '好多了', '舒服多了', '舒缓了', '减轻了', '消失', '好了', |
||||
|
'根除', '治愈', '解救', '化解', '战胜', '摆脱', '修复', '解救', '消除', |
||||
|
'消退', '痊愈', '康复', '愈合', '好转', '恢复', '根治', '清除', '驱散', |
||||
|
'击退', '控制', '抑制', '止住', '缓解', '舒缓', '镇定', '安抚', '平复', |
||||
|
|
||||
|
'临床验证', '科学有效', '安全可靠', '无副作用', '标本兼治', '对症下药', |
||||
|
'循证有效', '机理明确', '数据支持', '实验证明', '权威认证', '专家推荐', |
||||
|
'医学验证', '科研证实', '专利技术', '独家配方', '国际标准', '行业领先', |
||||
|
'技术先进', '工艺精湛', '成分安全', '天然无害', '环保健康', '质量上乘', |
||||
|
|
||||
|
'yyds', '神仙产品', '宝藏', '天花板', '绝绝子', '吹爆', '按头安利', |
||||
|
'回购', '囤货', '无限回购', '一生推', '锁死', '入坑不亏', '种草', |
||||
|
'拔草成功', '真香', '神仙操作', '宝藏发现', '绝了', '封神', '炸裂', |
||||
|
'逆天', '开挂', '神仙效果', '王炸产品', '必备神器', '不踩雷', '闭眼入', |
||||
|
'直接封神', '原地封神', 'yyds永不过时', '绝绝子本尊', |
||||
|
|
||||
|
'物超所值', '性价比高', '值得买', '超值', '划算', '省心', '省事', |
||||
|
'方便', '简单', '易用', '必备', '神器', '救星', '必备品', '物有所值', |
||||
|
'一分钱一分货', '值得投资', '值得拥有', '买得值', '不亏', '不后悔', |
||||
|
'物美价廉', '价廉物美', '经济实惠', '实惠好用', '性价比之王', '良心价', |
||||
|
'超划算', '超实惠', '物超所值', '买对了', '选对了', '明智之选', '正确决定', |
||||
|
|
||||
|
'比...好', '完胜', '碾压', '吊打', '远超预期', '出乎意料', '远超同类', |
||||
|
'秒杀其他', '与众不同', '独一无二', '甩几条街', '高下立判', '不可比拟', |
||||
|
'无可替代', '无与伦比', '独树一帜', '领先一步', '更胜一筹', '技高一筹', |
||||
|
'优势明显', '竞争力强', '行业标杆', '标杆产品', '标准制定者', '引领者', |
||||
|
'颠覆传统', '突破创新', '革新体验', '改变游戏规则', |
||||
|
|
||||
|
'稳定', '持久', '根治', '不易复发', '巩固', '维持', '长效', '持续改善', |
||||
|
'稳步提升', '渐进好转', '日渐康复', '日益改善', '逐步恢复', '全面提升', |
||||
|
'全方位改善', '整体提升', '综合改善', '系统解决', '深度修复', '源头治理', |
||||
|
'标本兼治', '根本解决', '彻底改变', '全面革新', '高效能', '高效应', |
||||
|
'高速见效', '快速起效', '迅捷恢复', '急速改善', '瞬间舒缓', '即刻缓解', |
||||
|
|
||||
|
'再次购买', '多次回购', '长期使用', '持续使用', '坚持使用', '推荐给朋友', |
||||
|
'分享给家人', '安利同事', '转介绍', '口碑传播', '主动推荐', '自发宣传', |
||||
|
'写好评', '晒单', '上传照片', '视频分享', '发朋友圈', '小红书推荐', |
||||
|
'微博分享', '抖音推荐', '知乎推荐', 'B站分享', '成为粉丝', '忠实用户', |
||||
|
'长期支持', '品牌拥护', '信赖品牌', '支持国货', '选择信任', '持续关注', |
||||
|
|
||||
|
'温和', '舒适', '安全', '无刺激', '无负担', '无压力', '轻松', '自在', |
||||
|
'惬意', '享受', '愉悦', '舒爽', '清新', '自然', '柔和', '亲肤', '零负担', |
||||
|
'零刺激', '零风险', '零不适', '无过敏', '无红肿', '无副作用', '无依赖', |
||||
|
'无抗药性', '无耐药性', '可长期使用', '老少皆宜', '全家适用', '孕妇可用', |
||||
|
'婴幼儿适用', '敏感肌友好', '温和配方', '天然成分', '有机原料', '绿色环保', |
||||
|
|
||||
|
'服务周到', '售后完善', '专业指导', '耐心解答', '快速响应', '及时回复', |
||||
|
'贴心服务', '细致入微', '全程跟进', '个性化方案', '定制服务', '专属顾问', |
||||
|
'无忧售后', '包退包换', '质保可靠', '物流快捷', '包装精美', '使用方便', |
||||
|
'操作简单', '容易上手', '说明书清晰', '客服专业', '态度友好', '体验愉悦', |
||||
|
'购物愉快', '流程顺畅', '支付便捷', '配送及时', '开箱惊喜' |
||||
|
] |
||||
|
|
||||
|
negative_keywords = [ |
||||
|
'无效', '没用', '不管用', '没效果', '没有效果', '不行', '骗人', '无用', '啥用', '没啥用', |
||||
|
'没什么用', '没卵用', '浪费时间', '浪费钱', '不起作用', '不见好', '没好转', '毫无效果', |
||||
|
'徒劳', '白费力气', '不顶用', '不好使', '不灵', '不灵验', '不见效', '没效', '无济于事', |
||||
|
'不起效', '没作用', '无作用', '无效果', '白花钱', '打水漂', '石沉大海', '竹篮打水', |
||||
|
'杯水车薪', '隔靴搔痒', '对牛弹琴', '徒劳无功', '于事无补', '效果甚微', '微乎其微', |
||||
|
'聊胜于无', '形同虚设', '有名无实', '华而不实', '花架子', '空架子', '纸上谈兵', |
||||
|
'作用不大', '效果不佳', '效果不彰', '效果有限', '效果甚微', '效果差劲', '效果全无', |
||||
|
|
||||
|
'劣质', '低劣', '粗糙', '差劲', '垃圾', '烂货', '次品', '山寨', '假冒', '假货', |
||||
|
'伪劣', '瑕疵', '残次', '破旧', '易坏', '易碎', '易损', '易褪色', '易变形', |
||||
|
'不耐用', '不结实', '不牢固', '不持久', '掉漆', '脱色', '开胶', '断裂', '破损', |
||||
|
'漏液', '渗漏', '发霉', '生锈', '氧化', '变质', '过期', '有异味', '有瑕疵', |
||||
|
'有缺陷', '有划痕', '有污渍', '有杂质', '有异物', '不合格', '不达标', '不匹配', |
||||
|
|
||||
|
'欺诈', '诈骗', '虚假宣传', '夸大其词', '言过其实', '名不副实', '挂羊头卖狗肉', |
||||
|
'偷工减料', '以次充好', '缺斤少两', '货不对板', '图文不符', '描述不符', '夸大效果', |
||||
|
'虚假广告', '误导消费', '消费陷阱', '文字游戏', '玩套路', '设圈套', '割韭菜', |
||||
|
'智商税', '交学费', '被套路', '上当受骗', '受骗上当', '蒙骗', '蒙蔽', '欺瞒', |
||||
|
|
||||
|
'危险', '有害', '有毒', '致癌', '致畸', '致敏', '刺激', '腐蚀', '灼伤', '损伤', |
||||
|
'感染', '发炎', '红肿', '瘙痒', '疼痛', '溃烂', '留疤', '毁容', '后遗症', |
||||
|
'并发症', '中毒', '不适', '难受', '头晕', '恶心', '呕吐', '腹泻', '过敏反应', |
||||
|
'严重过敏', '全身过敏', '过敏性休克', '医疗事故', '安全隐患', '质量事故', |
||||
|
|
||||
|
'难用', '难闻', '难吃', '难喝', '难以下咽', '刺鼻', '异味', '怪味', '刺眼', '刺耳', |
||||
|
'扎人', '磨脚', '硌手', '卡顿', '死机', '闪退', '崩溃', '延迟', '卡死', '不流畅', |
||||
|
'不顺手', '不方便', '不人性', '反人类', '设计缺陷', '操作复杂', '界面丑陋', '体验糟糕', |
||||
|
'体验极差', '体验感差', '毫无体验', '令人烦躁', '令人抓狂', '令人崩溃', '令人作呕', |
||||
|
|
||||
|
'态度差', '态度恶劣', '爱答不理', '推诿', '推脱', '推卸', '敷衍', '搪塞', '拖延', |
||||
|
'不作为', '不负责', '不专业', '业务不熟', '解答不清', '误导', '欺骗', '强买强卖', |
||||
|
'霸王条款', '捆绑销售', '价格欺诈', '虚假促销', '发货慢', '物流慢', '配送延误', |
||||
|
'丢件', '损毁', '包装破损', '漏发', '错发', '不发货', '不退款', '不退换', '售后差', |
||||
|
'售后无门', '客服难找', '电话不通', '无人回复', '维权困难', '投诉无果', |
||||
|
|
||||
|
'不值', '不值当', '不值钱', '不值这个价', '价不配位', '性价比低', '价格虚高', |
||||
|
'漫天要价', '暴利', '宰客', '抢钱', '智商税', '冤枉钱', '花得冤', '买贵了', |
||||
|
'买后悔', '买错了', '选错了', '决策失误', '浪费', '奢侈', '奢侈浪费', '不划算', |
||||
|
'血亏', '亏大了', '买亏了', '被宰', '被坑', '被割韭菜', '交智商税', '价格水分大', |
||||
|
|
||||
|
'失望', '绝望', '心寒', '心碎', '愤怒', '恼火', '生气', '气愤', '郁闷', '烦躁', |
||||
|
'无奈', '无语', '懊悔', '后悔', '遗憾', '痛苦', '难受', '焦虑', '担忧', '害怕', |
||||
|
'恐慌', '恐惧', '厌恶', '讨厌', '憎恶', '嫌弃', '鄙视', '看不起', '唾弃', '痛恨', |
||||
|
'痛哭', '泪流满面', '彻夜难眠', '心如刀割', '万念俱灰', '生无可恋', '后悔莫及', |
||||
|
'追悔莫及', '悔不当初', '欲哭无泪', '气急败坏', '火冒三丈', '七窍生烟', '怒不可遏', |
||||
|
|
||||
|
'极其差劲', '极度糟糕', '超级垃圾', '特别坑爹', '非常失望', '巨难用', '贼难吃', |
||||
|
'忒难闻', '顶难喝', '死难用', '烂到极致', '差到极点', '糟糕透顶', '无可救药', |
||||
|
'一文不值', '一塌糊涂', '一败涂地', '一无是处', '不可救药', '病入膏肓', '积重难返', |
||||
|
'千疮百孔', '满目疮痍', '惨不忍睹', '触目惊心', '令人发指', '人神共愤', '天怒人怨', |
||||
|
|
||||
|
'踩雷', '大雷', '巨雷', '天雷', '避坑', '快逃', '拔草', '劝退', '翻车', '翻船', |
||||
|
'塌房', '暴雷', '雷品', '黑榜', '差评如潮', '吐槽大会', '一生黑', '拉黑', '取关', |
||||
|
'卸载', '再见', '永别', '再也不见', '绕道走', '快跑', '别买', '别入', '慎入', |
||||
|
'血泪教训', '前车之鉴', '反面教材', '智商检测', '韭菜专属', '冤种必备', |
||||
|
|
||||
|
'病情加重', '症状加剧', '并发症', '器官损伤', '功能受损', '免疫力下降', '耐药性', |
||||
|
'药物依赖', '成瘾性', '戒断反应', '慢性中毒', '肝肾损伤', '神经损伤', '不可逆损伤', |
||||
|
'终身残疾', '生命危险', '危及生命', '抢救', '急救', '住院', '手术', '后遗症', |
||||
|
'终身服药', '治疗失败', '病情反复', '复发', '转移', '恶化', '不治', '无救', |
||||
|
|
||||
|
'不如...', '比不上', '被吊打', '被碾压', '被秒杀', '差距大', '天壤之别', '云泥之别', |
||||
|
'相形见绌', '黯然失色', '望尘莫及', '判若云泥', '不可同日而语', '一个天上一个地下', |
||||
|
'退步', '落后', '过时', '淘汰', '落伍', '老掉牙', '跟不上时代', '技术落后', |
||||
|
'设计老旧', '理念陈旧', '不思进取', '固步自封', '原地踏步', '不进反退', |
||||
|
|
||||
|
'反复发作', '久治不愈', '迁延不愈', '慢性病', '终身病', '不治之症', '顽疾', |
||||
|
'痼疾', '老毛病', '历史问题', '积重难返', '沉疴难起', '病根难除', '尾大不掉', |
||||
|
'积弊已久', '系统性问题', '结构缺陷', '先天不足', '设计硬伤', '工艺缺陷', |
||||
|
|
||||
|
'三无产品', '无证经营', '非法添加', '违禁成分', '超标', '违规', '违法', '侵权', |
||||
|
'盗版', '仿冒', '抄袭', '剽窃', '假专利', '虚假认证', '伪造检测', '无批文', |
||||
|
'无许可', '被查处', '被下架', '被召回', '被曝光', '被通报', '被约谈', '被罚款', |
||||
|
'被起诉', '被告', '赔偿', '官司', '纠纷', '维权', '举报', '投诉', '315曝光', |
||||
|
|
||||
|
'污染', '破坏环境', '高耗能', '高污染', '不可降解', '有害垃圾', '资源浪费', |
||||
|
'过度包装', '血汗工厂', '压榨劳工', '童工', '不人道', '不道德', '无良企业', |
||||
|
'黑心商家', '资本作恶', '行业毒瘤', '社会危害', '道德沦丧', '价值观扭曲', |
||||
|
|
||||
|
'垃圾', '坑爹', '失望', '不满意', '没用', '无效', '不推荐', '没效果', |
||||
|
'不灵', '没用处', '不管用', '不见效', '不靠谱', '忽悠', '骗人', '副作用' |
||||
|
] |
||||
|
|
||||
|
medical_keywords = [ |
||||
|
'方法', '偏方', '妙招', '小妙招', '治疗', '疗法', '中医', '药方', |
||||
|
'配方', '秘方', '验方', '处方', '食疗', '调理', '养生', '保健', |
||||
|
'药材', '草药', '中药', '汤剂', '膏方', '丸剂', '针灸', '推拿', |
||||
|
'按摩', '刮痧', '拔罐', '艾灸', '穴位', '经络', '气血', '阴阳', |
||||
|
'五行', '寒热', '虚实', '内服', '外敷', '熏蒸', '泡脚', '泡澡', |
||||
|
'敷贴', '贴敷', '熬煮', '煎煮', '泡制', '炮制', '调养', '调理', |
||||
|
'药酒', '药茶', '药膳', '药浴', '药枕', '药包', '药粉', '药丸', |
||||
|
'药散', '药油', '药膏', '药贴', '药水', '药液', '药汁', '药汤', |
||||
|
'煎剂', '浸剂', '酊剂', '流浸膏', '浸膏', '散剂', '颗粒剂', |
||||
|
|
||||
|
'藏象学说', '脏腑理论', '三焦辨证', '卫气营血', '六经辨证', '八纲辨证', |
||||
|
'五行生克', '阴阳平衡', '天人相应', '整体观念', '辨证论治', '治未病', |
||||
|
'标本兼治', '同病异治', '异病同治', '正治反治', '扶正祛邪', '调整阴阳', |
||||
|
'调和气血', '因时制宜', '因地制宜', '因人制宜', '四气五味', '升降浮沉', |
||||
|
'归经理论', '君臣佐使', '方剂配伍', '药性理论', '七情和合', '十八反', |
||||
|
'十九畏', '中药炮制', '道地药材', '四诊合参', '脉诊', '舌诊', '望诊', |
||||
|
'闻诊', '问诊', '体质辨识', '九种体质', '经络循行', '奇经八脉', '十二经脉', |
||||
|
'经别', '经筋', '皮部', '腧穴', '原穴', '络穴', '郄穴', '背俞穴', '募穴', |
||||
|
'八会穴', '八脉交会穴', '下合穴', '交会穴', '五输穴', '子午流注', '灵龟八法', |
||||
|
|
||||
|
'刺络放血', '耳针', '头针', '眼针', '腹针', '腕踝针', '皮内针', '电针', |
||||
|
'水针', '穴位注射', '穴位埋线', '穴位贴敷', '穴位磁疗', '穴位激光', |
||||
|
'拔罐疗法', '走罐', '闪罐', '药罐', '刺血拔罐', '刮痧疗法', '砭石疗法', |
||||
|
'推拿手法', '一指禅', '㨰法', '揉法', '摩法', '擦法', '推法', '搓法', |
||||
|
'抖法', '振法', '按法', '点法', '捏法', '拿法', '捻法', '拍法', '击法', |
||||
|
'扳法', '摇法', '拔伸法', '牵引疗法', '小针刀', '刃针', '浮针', '腹诊推拿', |
||||
|
'脏腑推拿', '小儿推拿', '正骨', '整脊', '导引术', '气功疗法', '太极拳疗法', |
||||
|
'八段锦疗法', '五禽戏疗法', '六字诀', '易筋经', '中药熏洗', '中药雾化', |
||||
|
'中药离子导入', '中药灌肠', '中药鼻腔给药', '中药口腔喷雾', '中药外洗', |
||||
|
'中药湿敷', '中药热熨', '中药冷敷', '中药沐浴', '中药足浴', '中药香薰', |
||||
|
'中药蜡疗', '中药泥疗', '药线疗法', '药捻疗法', '箍围疗法', '切开疗法', |
||||
|
|
||||
|
'人参', '黄芪', '当归', '熟地', '白芍', '川芎', '丹参', '三七', '红花', |
||||
|
'桃仁', '赤芍', '丹皮', '紫草', '金银花', '连翘', '板蓝根', '大青叶', |
||||
|
'蒲公英', '鱼腥草', '黄芩', '黄连', '黄柏', '栀子', '夏枯草', '决明子', |
||||
|
'龙胆草', '苦参', '白鲜皮', '青蒿', '地骨皮', '银柴胡', '胡黄连', '大黄', |
||||
|
'芒硝', '番泻叶', '芦荟', '火麻仁', '郁李仁', '甘遂', '大戟', '芫花', |
||||
|
'牵牛子', '商陆', '独活', '威灵仙', '川乌', '草乌', '蕲蛇', '乌梢蛇', |
||||
|
'木瓜', '蚕沙', '伸筋草', '寻骨风', '松节', '海风藤', '青风藤', '丁公藤', |
||||
|
'昆明山海棠', '雪上一枝蒿', '秦艽', '防己', '桑枝', '豨莶草', '臭梧桐', |
||||
|
'海桐皮', '络石藤', '雷公藤', '老鹳草', '穿山龙', '丝瓜络', '五加皮', |
||||
|
'桑寄生', '狗脊', '千年健', '雪莲花', '鹿衔草', '石楠叶', '藿香', '佩兰', |
||||
|
'苍术', '厚朴', '砂仁', '白豆蔻', '草豆蔻', '草果', '茯苓', '薏苡仁', |
||||
|
'猪苓', '泽泻', '冬瓜皮', '玉米须', '葫芦', '香加皮', '枳椇子', '车前子', |
||||
|
'滑石', '木通', '通草', '瞿麦', '萹蓄', '地肤子', '海金沙', '石韦', |
||||
|
'冬葵子', '灯心草', '萆薢', '茵陈', '金钱草', '虎杖', '地耳草', '垂盆草', |
||||
|
'鸡骨草', '珍珠草', '附子', '干姜', '肉桂', '吴茱萸', '小茴香', '丁香', |
||||
|
'高良姜', '胡椒', '花椒', '荜茇', '荜澄茄', '陈皮', '青皮', '枳实', |
||||
|
'木香', '沉香', '檀香', '川楝子', '乌药', '荔枝核', '香附', '佛手', |
||||
|
'香橼', '玫瑰花', '绿萼梅', '娑罗子', '薤白', '大腹皮', '甘松', '九香虫', |
||||
|
'刀豆', '柿蒂', '山楂', '神曲', '麦芽', '谷芽', '莱菔子', '鸡内金', |
||||
|
'阿胶', '何首乌', '龙眼肉', '楮实子', '使君子', '苦楝皮', '槟榔', '南瓜子', |
||||
|
'鹤草芽', '雷丸', '鹤虱', '榧子', '芜荑', '大蓟', '小蓟', '地榆', |
||||
|
'槐花', '侧柏叶', '白茅根', '苎麻根', '羊蹄', '三七', '茜草', '蒲黄', |
||||
|
'花蕊石', '降香', '白及', '仙鹤草', '紫珠叶', '棕榈炭', '血余炭', '藕节', |
||||
|
'炮姜', '灶心土', '川芎', '延胡索', '郁金', '姜黄', '乳香', '没药', |
||||
|
'五灵脂', '夏天无', '枫香脂', '丹参', '红花', '桃仁', '益母草', '泽兰', |
||||
|
'牛膝', '鸡血藤', '王不留行', '月季花', '凌霄花', '土鳖虫', '自然铜', |
||||
|
'苏木', '骨碎补', '血竭', '儿茶', '刘寄奴', '莪术', '三棱', '水蛭', |
||||
|
'虻虫', '斑蝥', '穿山甲', '半夏', '天南星', '白附子', '白芥子', '皂荚', |
||||
|
'旋覆花', '白前', '前胡', '桔梗', '川贝母', '浙贝母', '瓜蒌', '竹茹', |
||||
|
'竹沥', '天竺黄', '昆布', '海藻', '黄药子', '海蛤壳', '海浮石', '瓦楞子', |
||||
|
'礞石', '苦杏仁', '紫苏子', '百部', '紫菀', '款冬花', '马兜铃', '枇杷叶', |
||||
|
'桑白皮', '葶苈子', '白果', '矮地茶', '洋金花', '华山参', '罗汉果', |
||||
|
'朱砂', '磁石', '龙骨', '琥珀', '珍珠', '酸枣仁', '柏子仁', '远志', |
||||
|
'合欢皮', '首乌藤', '石决明', '珍珠母', '牡蛎', '紫贝齿', '代赭石', |
||||
|
'刺蒺藜', '罗布麻叶', '羚羊角', '牛黄', '钩藤', '天麻', '地龙', '全蝎', |
||||
|
'蜈蚣', '僵蚕', '麝香', '冰片', '苏合香', '石菖蒲', '人参', '西洋参', |
||||
|
'党参', '太子参', '黄芪', '白术', '山药', '白扁豆', '甘草', '大枣', |
||||
|
'刺五加', '绞股蓝', '红景天', '沙棘', '饴糖', '蜂蜜', '鹿茸', '巴戟天', |
||||
|
'淫羊藿', '仙茅', '杜仲', '续断', '肉苁蓉', '锁阳', '补骨脂', '益智仁', |
||||
|
'菟丝子', '沙苑子', '蛤蚧', '核桃仁', '冬虫夏草', '紫河车', '当归', |
||||
|
'熟地黄', '白芍', '阿胶', '何首乌', '龙眼肉', '北沙参', '南沙参', '百合', |
||||
|
'麦冬', '天冬', '石斛', '玉竹', '黄精', '枸杞子', '墨旱莲', '女贞子', |
||||
|
'桑椹', '黑芝麻', '龟甲', '鳖甲', '麻黄根', '浮小麦', '糯稻根须', '五味子', |
||||
|
'乌梅', '五倍子', '罂粟壳', '诃子', '石榴皮', '肉豆蔻', '赤石脂', '禹余粮', |
||||
|
'山茱萸', '桑螵蛸', '海螵蛸', '金樱子', '莲子', '芡实', '椿皮', '鸡冠花', |
||||
|
|
||||
|
'四君子汤', '六君子汤', '香砂六君子汤', '参苓白术散', '补中益气汤', |
||||
|
'玉屏风散', '生脉散', '四物汤', '当归补血汤', '归脾汤', '八珍汤', |
||||
|
'十全大补汤', '炙甘草汤', '六味地黄丸', '知柏地黄丸', '杞菊地黄丸', |
||||
|
'麦味地黄丸', '都气丸', '左归丸', '大补阴丸', '一贯煎', '肾气丸', |
||||
|
'右归丸', '地黄饮子', '龟鹿二仙胶', '七宝美髯丹', '桂枝汤', '麻黄汤', |
||||
|
'九味羌活汤', '小青龙汤', '止嗽散', '银翘散', '桑菊饮', '麻黄杏仁甘草石膏汤', |
||||
|
'柴葛解肌汤', '升麻葛根汤', '败毒散', '参苏饮', '再造散', '加减葳蕤汤', |
||||
|
'大承气汤', '大陷胸汤', '大黄牡丹汤', '温脾汤', '麻子仁丸', '济川煎', |
||||
|
'十枣汤', '舟车丸', '疏凿饮子', '小柴胡汤', '蒿芩清胆汤', '四逆散', |
||||
|
'逍遥散', '痛泻要方', '半夏泻心汤', '白虎汤', '竹叶石膏汤', '清营汤', |
||||
|
'犀角地黄汤', '黄连解毒汤', '凉膈散', '普济消毒饮', '仙方活命饮', |
||||
|
'导赤散', '龙胆泻肝汤', '左金丸', '泻白散', '清胃散', '玉女煎', '芍药汤', |
||||
|
'白头翁汤', '青蒿鳖甲汤', '清骨散', '当归六黄汤', '理中丸', '小建中汤', |
||||
|
'吴茱萸汤', '四逆汤', '回阳救急汤', '当归四逆汤', '阳和汤', '四神丸', |
||||
|
'真人养脏汤', '金锁固精丸', '桑螵蛸散', '固冲汤', '固经丸', '易黄汤', |
||||
|
'朱砂安神丸', '天王补心丹', '酸枣仁汤', '甘麦大枣汤', '安宫牛黄丸', |
||||
|
'紫雪丹', '至宝丹', '苏合香丸', '紫金锭', '越鞠丸', '柴胡疏肝散', |
||||
|
'半夏厚朴汤', '瓜蒌薤白白酒汤', '枳实薤白桂枝汤', '天台乌药散', |
||||
|
'暖肝煎', '厚朴温中汤', '良附丸', '金铃子散', '丹参饮', '失笑散', |
||||
|
'桂枝茯苓丸', '鳖甲煎丸', '血府逐瘀汤', '补阳还五汤', '复元活血汤', |
||||
|
'温经汤', '生化汤', '活络效灵丹', '大黄䗪虫丸', '小活络丹', '川芎茶调散', |
||||
|
'大秦艽汤', '牵正散', '玉真散', '消风散', '羚角钩藤汤', '镇肝熄风汤', |
||||
|
'天麻钩藤饮', '大定风珠', '杏苏散', '桑杏汤', '清燥救肺汤', '麦门冬汤', |
||||
|
'养阴清肺汤', '百合固金汤', '平胃散', '藿香正气散', '茵陈蒿汤', |
||||
|
'八正散', '三仁汤', '甘露消毒丹', '连朴饮', '当归拈痛汤', '二妙散', |
||||
|
'五苓散', '猪苓汤', '防己黄芪汤', '苓桂术甘汤', '真武汤', '实脾散', |
||||
|
'萆薢分清饮', '羌活胜湿汤', '独活寄生汤', '二陈汤', '温胆汤', '茯苓丸', |
||||
|
'清气化痰丸', '小陷胸汤', '滚痰丸', '贝母瓜蒌散', '三子养亲汤', |
||||
|
'半夏白术天麻汤', '定痫丸', '保和丸', '枳实导滞丸', '木香槟榔丸', |
||||
|
'健脾丸', '枳实消痞丸', '葛花解酲汤', '乌梅丸', '肥儿丸', '布袋丸', |
||||
|
'化虫丸', '伐木丸', '犀黄丸', '透脓散', '小金丹', '内补黄芪汤', |
||||
|
'苇茎汤', '大黄牡丹汤', '薏苡附子败酱散', '阳和汤', |
||||
|
|
||||
|
'感冒', '咳嗽', '哮喘', '肺痈', '肺痨', '肺胀', '肺痿', '心悸', '胸痹', |
||||
|
'不寐', '健忘', '痴呆', '癫狂', '痫病', '胃痛', '痞满', '呕吐', '呃逆', |
||||
|
'噎膈', '腹痛', '泄泻', '痢疾', '便秘', '胁痛', '黄疸', '积聚', '鼓胀', |
||||
|
'头痛', '眩晕', '中风', '瘿病', '疟疾', '水肿', '淋证', '癃闭', '关格', |
||||
|
'遗精', '阳痿', '早泄', '不育', '腰痛', '消渴', '痹证', '痿证', '颤证', |
||||
|
'痉证', '内伤发热', '虚劳', '肥胖', '癌症', '肿瘤', '郁证', '血证', |
||||
|
'痰饮', '自汗', '盗汗', '厥证', '脱证', '虫病', '痹病', '湿阻', '中暑', |
||||
|
'冻伤', '烧伤', '毒蛇咬伤', '破伤风', '肠痈', '乳痈', '乳癖', '乳岩', |
||||
|
'瘰疬', '瘿瘤', '疝气', '脱肛', '痔疮', '肛裂', '肛瘘', '脱疽', '臁疮', |
||||
|
'丹毒', '流注', '走黄', '内陷', '疔疮', '疖', '痈', '有头疽', '无头疽', |
||||
|
'发颐', '流痰', '附骨疽', '环跳疽', '足发背', '手发背', '褥疮', '窦道', |
||||
|
'漏管', '蛇串疮', '湿疮', '瘾疹', '牛皮癣', '白疕', '粉刺', '酒齄鼻', |
||||
|
'瓜藤缠', '猫眼疮', '风瘙痒', '风热疮', '紫癜风', '白驳风', '油风', |
||||
|
'黧黑斑', '雀斑', '疣目', '鼠乳', '鸡眼', '胼胝', '皲裂', '冻疮', '烧伤', |
||||
|
'毒蛇咬伤', '破伤风', '狂犬病', '食物中毒', '药物中毒', '一氧化碳中毒', |
||||
|
'有机磷中毒', '铅中毒', '汞中毒', '月经不调', '痛经', '闭经', '崩漏', |
||||
|
'经行乳房胀痛', '经行头痛', '经行发热', '经行身痛', '经行泄泻', '经行吐衄', |
||||
|
'经行口糜', '经行风疹块', '经行眩晕', '经行浮肿', '经行情志异常', |
||||
|
'绝经前后诸证', '带下病', '妊娠恶阻', '妊娠腹痛', '异位妊娠', '胎漏', |
||||
|
'胎动不安', '滑胎', '胎萎不长', '胎死不下', '子满', '子肿', '子晕', |
||||
|
'子痫', '子嗽', '子淋', '妊娠小便不通', '难产', '产后血晕', '产后痉证', |
||||
|
'产后发热', '产后腹痛', '产后恶露不绝', '产后恶露不下', '产后大便难', |
||||
|
'产后排尿异常', '产后自汗', '盗汗', '产后身痛', '缺乳', '乳汁自出', |
||||
|
'不孕症', '阴痒', '阴疮', '阴挺', '妇人腹痛', '癥瘕', '盆腔炎', '脏躁', |
||||
|
'小儿感冒', '小儿咳嗽', '肺炎喘嗽', '哮喘', '鹅口疮', '口疮', '呕吐', |
||||
|
'泄泻', '厌食', '积滞', '疳证', '营养性缺铁性贫血', '惊风', '癫痫', |
||||
|
'多动症', '抽动症', '遗尿', '五迟', '五软', '解颅', '夜啼', '汗证', |
||||
|
'病毒性心肌炎', '注意力缺陷多动障碍', '过敏性紫癜', '皮肤黏膜淋巴结综合征', |
||||
|
|
||||
|
'中药药理', '中药化学', '中药制剂', '中药分析', '中药鉴定', '中药资源', |
||||
|
'GAP种植', '中药指纹图谱', '中药血清药化学', '中药代谢组学', '中药基因组学', |
||||
|
'中药蛋白组学', '中药网络药理学', '中药循证医学', '中医标准化', '中医信息化', |
||||
|
'中医人工智能', '中医大数据', '中医预防医学', '中医康复医学', '中医护理学', |
||||
|
'中医营养学', '中医心理学', '中医时间医学', '中医气象医学', '中医地理医学', |
||||
|
'中医体质学', '中医证候学', '中医治则学', '中医各家学说', '中医医史文献', |
||||
|
'中医古籍整理', '中医海外传播', '中医现代化', '中西医结合', '整合医学', |
||||
|
'精准中医', '系统生物学', '组学技术', '分子生物学', '细胞生物学', |
||||
|
'免疫药理学', '神经药理学', '心血管药理学', '抗肿瘤研究', '抗病毒研究', |
||||
|
'抗炎研究', '抗氧化研究', '调节免疫', '调节代谢', '调节肠道菌群', |
||||
|
'药代动力学', '药效动力学', '毒理学', '临床评价', '真实世界研究', |
||||
|
'随机对照试验', '队列研究', '病例对照研究', '系统评价', 'Meta分析', |
||||
|
'临床路径', '诊疗指南', '专家共识', '病证结合', '方证对应', '证候要素', |
||||
|
'证候靶点', '生物标志物', '疗效评价', '生存质量', '患者报告结局', |
||||
|
'中医适宜技术', '基层推广', '家庭医生', '医养结合', '健康中国', |
||||
|
|
||||
|
'藏医', '蒙医', '维医', '傣医', '壮医', '瑶医', '苗医', '彝医', '侗医', |
||||
|
'回医', '朝医', '哈萨克医', '畲医', '土家医', '羌医', '布依医', '仡佬医', |
||||
|
'鄂伦春医', '赫哲医', '达斡尔医', '景颇医', '阿昌医', '德昂医', '保安医', |
||||
|
'裕固医', '京医', '塔塔尔医', '独龙医', '门巴医', '珞巴医', '基诺医', |
||||
|
|
||||
|
'黄帝内经', '伤寒论', '金匮要略', '温病条辨', '神农本草经', '难经', |
||||
|
'脉经', '针灸甲乙经', '肘后备急方', '千金方', '外台秘要', '太平圣惠方', |
||||
|
'圣济总录', '太平惠民和剂局方', '本草纲目', '景岳全书', '医宗金鉴', |
||||
|
'张仲景', '华佗', '孙思邈', '李时珍', '扁鹊', '皇甫谧', '葛洪', |
||||
|
'陶弘景', '王叔和', '巢元方', '钱乙', '刘完素', '张从正', '李杲', |
||||
|
'朱震亨', '张景岳', '叶天士', '吴鞠通', '王清任', '傅青主' |
||||
|
] |
||||
|
|
||||
|
def is_question(comment): |
||||
|
# 替换评论中的关键词 '#张宝旬妙招#' |
||||
|
question_words = ['吗', '呢', '怎么', '什么', '?', '?', '是否', '有没有', '怎样', '几时', '为何', '为什么'] |
||||
|
if len(comment) <= 5: |
||||
|
return True |
||||
|
if any(word in comment for word in question_words): |
||||
|
return True |
||||
|
return False |
||||
|
|
||||
|
for commentItem in comments_list: |
||||
|
comment_id = commentItem['id'] |
||||
|
comment = commentItem['content'].replace('#张宝旬妙招#', '') |
||||
|
|
||||
|
if is_question(comment): |
||||
|
update_records.append((0, comment_id)) |
||||
|
continue |
||||
|
|
||||
|
img_pattern = re.compile(r'<img.*?\/>') |
||||
|
commentRemove = re.sub(img_pattern, '', comment) |
||||
|
commentRemove = re.sub(r'\s', '', commentRemove) |
||||
|
if len(commentRemove) <= 4: |
||||
|
update_records.append((0, comment_id)) |
||||
|
continue |
||||
|
|
||||
|
s = SnowNLP(comment) |
||||
|
sentiment_score = s.sentiments |
||||
|
|
||||
|
# print(comment) |
||||
|
# print(sentiment_score) |
||||
|
# print('*'*100) |
||||
|
|
||||
|
contains_positive = any(keyword in comment for keyword in positive_keywords) |
||||
|
contains_negative = any(keyword in comment for keyword in negative_keywords) |
||||
|
contains_medical = any(keyword in comment for keyword in medical_keywords) |
||||
|
|
||||
|
if (contains_medical and (contains_positive or contains_negative or sentiment_score > 0.7 or sentiment_score < 0.3)) or ((contains_positive or contains_negative) and (sentiment_score > 0.7 or sentiment_score < 0.3)): |
||||
|
# if contains_positive or contains_negative or sentiment_score > 0.7 or sentiment_score < 0.3: |
||||
|
# filtered_comments.append({ |
||||
|
# 'comment': comment, |
||||
|
# 'sentiment': sentiment_score, |
||||
|
# 'is_positive': contains_positive or sentiment_score > 0.7, |
||||
|
# 'is_negative': contains_negative or sentiment_score < 0.3, |
||||
|
# # 'is_positive': sentiment_score > 0.5, |
||||
|
# # 'is_negative': sentiment_score < 0.3, |
||||
|
# # 'has_medical_reference': contains_medical |
||||
|
# }) |
||||
|
update_records.append((1, comment_id)) |
||||
|
else: |
||||
|
update_records.append((0, comment_id)) |
||||
|
|
||||
|
update_database(update_records) |
||||
|
|
||||
|
return filtered_comments |
||||
|
|
||||
|
def update_database(records): |
||||
|
connection = pymysql.connect(**DB_CONFIG) |
||||
|
try: |
||||
|
with connection.cursor() as cursor: |
||||
|
sql = "UPDATE spider_weibo_comments SET is_search = %s WHERE id = %s" |
||||
|
cursor.executemany(sql, records) |
||||
|
connection.commit() |
||||
|
finally: |
||||
|
connection.close() |
||||
|
|
||||
|
def domain(): |
||||
|
contentdata = get_comments_from_db() |
||||
|
length = len(contentdata) |
||||
|
if length < 1000: |
||||
|
if length > 0: |
||||
|
filtered = filter_medical_comments(contentdata) |
||||
|
print(f'已经是最后一页,没有数据了:{length}') |
||||
|
exit() |
||||
|
else: |
||||
|
print(f'共{length}条数据') |
||||
|
filtered = filter_medical_comments(contentdata) |
||||
|
domain() |
||||
|
|
||||
|
# comments = [item['content'] for item in contentdata] |
||||
|
# filtered = filter_medical_comments(contentdata) |
||||
|
# comments = [] |
||||
|
# import json |
||||
|
# print(json.dumps(filtered, ensure_ascii=False, indent=None)) |
||||
|
# print(f"筛选出 {len(filtered)} 条相关评论:") |
||||
|
# for item in filtered: |
||||
|
# print(f"评论: {item['comment']}") |
||||
|
# print(f"情感得分: {item['sentiment']:.2f}") |
||||
|
# print(f"是否正面: {item['is_positive']}") |
||||
|
# print(f"是否负面: {item['is_negative']}") |
||||
|
# print("-" * 50) |
||||
|
|
||||
|
if __name__ == "__main__": |
||||
|
domain() |
||||
|
|
@ -0,0 +1,17 @@ |
|||||
|
<?php |
||||
|
include_once(dirname(dirname(__FILE__))."/library/publicBase.php"); |
||||
|
|
||||
|
date_default_timezone_set("Asia/Shanghai"); |
||||
|
|
||||
|
$obj = new mBase(); |
||||
|
|
||||
|
$rdobj = $obj->initRedis(); |
||||
|
|
||||
|
$cookies[] = '_2A25FcbH_DeRhGeFG71sY9CbOyDuIHXVmD0s3rDV8PUNbmtB-LROgkW9NeUknrDWZVELgZ5_bHazkmfeqimpsJvrC'; // 吴龙超 |
||||
|
$cookies[] = '_2A25Fcc-2DeRhGeRJ6VAW9SvIzjiIHXVmD01-rDV8PUNbmtAYLRjzkW9NUqgeP0nu8ldMw0ltx31xfvHmVmqDM2xN'; // 王朋达 |
||||
|
$cookies[] = '_2A25FakNADeRhGedJ71UQ9S7PzjyIHXVmBtqIrDV8PUJbkNB-LRX_kW1NVhe7zEjMdJLzXvbT9ck1Q-L9YYtLiCpV'; // 张琪 |
||||
|
$cookies[] = '_2AkMfKU18f8NxqwFRmvoVzmPrZI11wwvEieKpdbynJRMxHRl-yT9xqmoitRB6NKljk7EQIbC2ibY-BOeJTItlu8LSPOXb'; // 刘梦琪 |
||||
|
$cookies[] = '_2A25FcbInDeRhGeFK4lEU9ijJwzSIHXVmD0vvrDV8PUNbmtAYLVLTkW9NQtaHPZxefLemxY2kP3YN6l7tyfSJeWvl'; // 张宇鹏 |
||||
|
|
||||
|
$rdobj->set(_RC_WEIBO_LOGIN_COOKIE2, json_encode($cookies)); |
||||
|
exit; |
@ -0,0 +1,338 @@ |
|||||
|
<?php |
||||
|
class CommentAnalyzer { |
||||
|
// 关键词和正则表达式配置 |
||||
|
private $config = [ |
||||
|
'positive' => [ |
||||
|
'keywords' => [ |
||||
|
'有效', '好用', '好了', '治愈', '见效', '不错', '管用', '有用', |
||||
|
'灵验', '缓解', '痊愈', '见效', '神效', '推荐', '感谢', '好了', |
||||
|
'管用', '有效果', '有效果', '奏效', '改善', '见效快', '神奇', |
||||
|
'有效', '有效果', '有效果', '有效果', '有效果', '有效果' |
||||
|
], |
||||
|
'patterns' => [ |
||||
|
'/(喝|吃|用)了?\s*\d+\s*次?[就]?(好|痊愈|缓解|不咳|不痛)了/u', |
||||
|
'/(效果|结果)\s*(非常|很)?\s*(好|明显|不错|显著|惊人)/u', |
||||
|
'/(尝试|试了|用了)\s*\d+\s*[天个]?\s*(就)?(好|痊愈|缓解|见效)/u', |
||||
|
'/(咳嗽|咳|痛|疼|症状)\s*(明显)?\s*(减轻|缓解|消失|好了)/u', |
||||
|
'/(感谢|谢谢)\s*(分享|博主|张医生|宝旬)/u' |
||||
|
] |
||||
|
], |
||||
|
'negative' => [ |
||||
|
'keywords' => [ |
||||
|
'无效', '没用', '不好', '没效果', '不行', '不见效', '没好转', |
||||
|
'加重', '恶化', '白费', '失望', '避雷', '没用', '没效果', |
||||
|
'无效果', '无改善', '没作用', '不灵', '骗人', '忽悠', '上当' |
||||
|
], |
||||
|
'patterns' => [ |
||||
|
'/(还是|依然|仍旧|仍然)\s*(咳|难受|痛|疼|没效果)/u', |
||||
|
'/(一点|完全|根本|丝毫)\s*没(效果|用|好转|作用|改善)/u', |
||||
|
'/(不仅|不但)\s*没(好|改善).*(反而|而且)\s*(加重|恶化)/u', |
||||
|
'/(浪费|白费|白忙|白折腾)\s*(时间|精力)/u', |
||||
|
'/(失望|后悔|上当|骗人|忽悠|别信)\s*(了|吧|!)/u' |
||||
|
] |
||||
|
] |
||||
|
]; |
||||
|
|
||||
|
/** |
||||
|
* 分析评论情感 |
||||
|
* |
||||
|
* @param string $comment 评论内容 |
||||
|
* @return string 情感分类: 'positive', 'negative' 或 'neutral' |
||||
|
*/ |
||||
|
public function analyze($comment) { |
||||
|
// 预处理:去除多余空格和特殊字符 |
||||
|
$comment = $this->preprocess($comment); |
||||
|
|
||||
|
// 1. 检查否定词(优先级高) |
||||
|
foreach ($this->config['negative']['keywords'] as $word) { |
||||
|
if (mb_strpos($comment, $word) !== false) { |
||||
|
return 'negative'; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// 2. 检查肯定词 |
||||
|
foreach ($this->config['positive']['keywords'] as $word) { |
||||
|
if (mb_strpos($comment, $word) !== false) { |
||||
|
return 'positive'; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// 3. 检查否定句式 |
||||
|
foreach ($this->config['negative']['patterns'] as $pattern) { |
||||
|
if (preg_match($pattern, $comment)) { |
||||
|
return 'negative'; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// 4. 检查肯定句式 |
||||
|
foreach ($this->config['positive']['patterns'] as $pattern) { |
||||
|
if (preg_match($pattern, $comment)) { |
||||
|
return 'positive'; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
return 'neutral'; |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* 预处理评论内容 |
||||
|
*/ |
||||
|
private function preprocess($comment) { |
||||
|
// 移除多余空格 |
||||
|
$comment = preg_replace('/\s+/u', ' ', $comment); |
||||
|
// 移除常见标点符号(保留中文字符) |
||||
|
$comment = preg_replace('/[^\p{Han}\p{P}\w\s]/u', '', $comment); |
||||
|
return trim($comment); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// ===================== 使用示例 ===================== |
||||
|
$analyzer = new CommentAnalyzer(); |
||||
|
|
||||
|
// 微博内容 |
||||
|
$weibo = "针灸匠张宝旬\n原创\n25-7-7 14:41\n发布于 北京\n来自 华为 Mate X5 典藏版\n#张宝旬妙招# 刀片桑用花椒蒸梨。按图示做。汤水清甜微麻,一点不辣,还带股淡淡的花椒香味,吃梨肉、喝梨汤,效果好。 "; |
||||
|
|
||||
|
// 示例评论 |
||||
|
$comments = [ |
||||
|
"去年冬天寒咳,直接切梨加花椒加少许水蒸半个小时,喝几次就好了", |
||||
|
"试了完全没效果,咳嗽更严重了", |
||||
|
"花椒蒸梨一点用都没有,避雷!", |
||||
|
"喝了三天,依然咳得睡不着", |
||||
|
"效果非常明显,第二天就不咳了", |
||||
|
"这个方子对我很管用,咳嗽明显减轻了", |
||||
|
"按照方法做了,但感觉没什么变化", |
||||
|
"张医生的方法总是这么神奇,感谢分享!", |
||||
|
"孩子咳嗽试了这个方法,结果反而加重了", |
||||
|
"蒸梨的时候花椒放多了,味道有点怪", |
||||
|
"这个方法简单易行,推荐给大家", |
||||
|
"试了两次,效果不明显,可能不适合我", |
||||
|
"中医小妙招真是博大精深", |
||||
|
"花椒蒸梨?这是什么奇怪的组合", |
||||
|
"喝了当晚就不怎么咳了,太有效了!", |
||||
|
"完全没好转,白忙活一场", |
||||
|
"这个方法在抖音上看到过,亲测有效", |
||||
|
"咳嗽没缓解,反而胃不舒服了", |
||||
|
"张宝旬医生的方法值得信赖", |
||||
|
"没坚持喝,不知道效果如何" |
||||
|
]; |
||||
|
|
||||
|
// 分析评论并分类 |
||||
|
$results = [ |
||||
|
'positive' => [], |
||||
|
'negative' => [] |
||||
|
]; |
||||
|
|
||||
|
foreach ($comments as $comment) { |
||||
|
$result = $analyzer->analyze($comment); |
||||
|
if ($result !== 'neutral') { |
||||
|
$results[$result][] = $comment; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// ===================== 显示结果 ===================== |
||||
|
?> |
||||
|
<!DOCTYPE html> |
||||
|
<html lang="zh-CN"> |
||||
|
<head> |
||||
|
<meta charset="UTF-8"> |
||||
|
<meta name="viewport" content="width=device-width, initial-scale=1.0"> |
||||
|
<title>中医小妙招评论分析工具</title> |
||||
|
<style> |
||||
|
* { |
||||
|
margin: 0; |
||||
|
padding: 0; |
||||
|
box-sizing: border-box; |
||||
|
font-family: 'Microsoft YaHei', sans-serif; |
||||
|
} |
||||
|
body { |
||||
|
background-color: #f5f7fa; |
||||
|
color: #333; |
||||
|
line-height: 1.6; |
||||
|
padding: 20px; |
||||
|
} |
||||
|
.container { |
||||
|
max-width: 1200px; |
||||
|
margin: 0 auto; |
||||
|
background: white; |
||||
|
border-radius: 10px; |
||||
|
box-shadow: 0 0 20px rgba(0, 0, 0, 0.1); |
||||
|
overflow: hidden; |
||||
|
} |
||||
|
header { |
||||
|
background: linear-gradient(135deg, #6a11cb 0%, #2575fc 100%); |
||||
|
color: white; |
||||
|
padding: 25px 30px; |
||||
|
position: relative; |
||||
|
} |
||||
|
h1 { |
||||
|
font-size: 28px; |
||||
|
margin-bottom: 10px; |
||||
|
} |
||||
|
.subtitle { |
||||
|
font-size: 16px; |
||||
|
opacity: 0.9; |
||||
|
} |
||||
|
.weibo-card { |
||||
|
background: #eef5ff; |
||||
|
border-left: 4px solid #2575fc; |
||||
|
padding: 20px; |
||||
|
margin: 20px; |
||||
|
border-radius: 5px; |
||||
|
} |
||||
|
.weibo-content { |
||||
|
font-size: 18px; |
||||
|
line-height: 1.7; |
||||
|
color: #1a1a1a; |
||||
|
} |
||||
|
.weibo-meta { |
||||
|
color: #666; |
||||
|
font-size: 14px; |
||||
|
margin-top: 10px; |
||||
|
} |
||||
|
.results-container { |
||||
|
display: flex; |
||||
|
flex-wrap: wrap; |
||||
|
padding: 20px; |
||||
|
} |
||||
|
.result-column { |
||||
|
flex: 1; |
||||
|
min-width: 300px; |
||||
|
padding: 15px; |
||||
|
} |
||||
|
.result-header { |
||||
|
font-size: 20px; |
||||
|
padding: 15px 0; |
||||
|
margin-bottom: 15px; |
||||
|
border-bottom: 2px solid; |
||||
|
display: flex; |
||||
|
align-items: center; |
||||
|
} |
||||
|
.positive .result-header { |
||||
|
color: #28a745; |
||||
|
border-bottom-color: #28a745; |
||||
|
} |
||||
|
.negative .result-header { |
||||
|
color: #dc3545; |
||||
|
border-bottom-color: #dc3545; |
||||
|
} |
||||
|
.result-header i { |
||||
|
margin-right: 10px; |
||||
|
font-size: 24px; |
||||
|
} |
||||
|
.comment-list { |
||||
|
list-style: none; |
||||
|
} |
||||
|
.comment-item { |
||||
|
background: #f8f9fa; |
||||
|
border-radius: 8px; |
||||
|
padding: 15px; |
||||
|
margin-bottom: 15px; |
||||
|
box-shadow: 0 2px 5px rgba(0,0,0,0.05); |
||||
|
border-left: 4px solid; |
||||
|
transition: transform 0.2s; |
||||
|
} |
||||
|
.comment-item:hover { |
||||
|
transform: translateY(-3px); |
||||
|
box-shadow: 0 5px 15px rgba(0,0,0,0.1); |
||||
|
} |
||||
|
.positive .comment-item { |
||||
|
border-left-color: #28a745; |
||||
|
background: #f0fff4; |
||||
|
} |
||||
|
.negative .comment-item { |
||||
|
border-left-color: #dc3545; |
||||
|
background: #fff0f0; |
||||
|
} |
||||
|
.comment-text { |
||||
|
margin-bottom: 8px; |
||||
|
font-size: 16px; |
||||
|
} |
||||
|
.comment-label { |
||||
|
display: inline-block; |
||||
|
padding: 3px 8px; |
||||
|
border-radius: 4px; |
||||
|
font-size: 12px; |
||||
|
font-weight: bold; |
||||
|
} |
||||
|
.positive .comment-label { |
||||
|
background: #28a745; |
||||
|
color: white; |
||||
|
} |
||||
|
.negative .comment-label { |
||||
|
background: #dc3545; |
||||
|
color: white; |
||||
|
} |
||||
|
.stats { |
||||
|
background: #f8f9fa; |
||||
|
padding: 20px; |
||||
|
text-align: center; |
||||
|
border-top: 1px solid #eee; |
||||
|
font-size: 18px; |
||||
|
color: #555; |
||||
|
} |
||||
|
.highlight { |
||||
|
font-weight: bold; |
||||
|
font-size: 24px; |
||||
|
color: #2575fc; |
||||
|
margin: 0 5px; |
||||
|
} |
||||
|
@media (max-width: 768px) { |
||||
|
.results-container { |
||||
|
flex-direction: column; |
||||
|
} |
||||
|
.result-column { |
||||
|
min-width: 100%; |
||||
|
} |
||||
|
} |
||||
|
</style> |
||||
|
</head> |
||||
|
<body> |
||||
|
<div class="container"> |
||||
|
<header> |
||||
|
<h1>中医小妙招评论分析工具</h1> |
||||
|
<p class="subtitle">自动筛选用户对中医方法有效性的反馈</p> |
||||
|
</header> |
||||
|
|
||||
|
<div class="weibo-card"> |
||||
|
<div class="weibo-content"><?= nl2br(htmlspecialchars($weibo)) ?></div> |
||||
|
<div class="weibo-meta">博主:针灸匠张宝旬 | 发布时间:2025-07-07</div> |
||||
|
</div> |
||||
|
|
||||
|
<div class="stats"> |
||||
|
分析结果:共 <span class="highlight"><?= count($comments) ?></span> 条评论, |
||||
|
其中有效反馈 <span class="highlight"><?= count($results['positive']) ?></span> 条, |
||||
|
无效反馈 <span class="highlight"><?= count($results['negative']) ?></span> 条 |
||||
|
</div> |
||||
|
|
||||
|
<div class="results-container"> |
||||
|
<div class="result-column positive"> |
||||
|
<div class="result-header"> |
||||
|
<span>✅ 有效反馈 (<?= count($results['positive']) ?>)</span> |
||||
|
</div> |
||||
|
<ul class="comment-list"> |
||||
|
<?php foreach ($results['positive'] as $comment): ?> |
||||
|
<li class="comment-item"> |
||||
|
<div class="comment-text"><?= htmlspecialchars($comment) ?></div> |
||||
|
<span class="comment-label">有效</span> |
||||
|
</li> |
||||
|
<?php endforeach; ?> |
||||
|
</ul> |
||||
|
</div> |
||||
|
|
||||
|
<div class="result-column negative"> |
||||
|
<div class="result-header"> |
||||
|
<span>❌ 无效反馈 (<?= count($results['negative']) ?>)</span> |
||||
|
</div> |
||||
|
<ul class="comment-list"> |
||||
|
<?php foreach ($results['negative'] as $comment): ?> |
||||
|
<li class="comment-item"> |
||||
|
<div class="comment-text"><?= htmlspecialchars($comment) ?></div> |
||||
|
<span class="comment-label">无效</span> |
||||
|
</li> |
||||
|
<?php endforeach; ?> |
||||
|
</ul> |
||||
|
</div> |
||||
|
</div> |
||||
|
</div> |
||||
|
</body> |
||||
|
</html> |
@ -0,0 +1,17 @@ |
|||||
|
# -*- coding: utf-8 -*- |
||||
|
from snownlp import seg |
||||
|
from snownlp import SnowNLP |
||||
|
import pymysql |
||||
|
import configparser |
||||
|
import os |
||||
|
import re |
||||
|
|
||||
|
comment = ' <img alt="[赞]" title="[赞]" src="https://face.t.sinajs.cn/t4/appstyle/expression/ext/normal/e6/2018new_zan_org.png" /> <img alt="[赞]" title="[赞]" src="https://face.t.sinajs.cn/t4/appstyle/expression/ext/normal/e6/2018new_zan_org.png" /> ' |
||||
|
|
||||
|
non_img_pattern = re.compile(r'<img.*?\/>') |
||||
|
comment = re.sub(non_img_pattern, '', comment) |
||||
|
comment = re.sub(r'\s', '', comment) |
||||
|
|
||||
|
print(len(comment)) |
||||
|
print(comment) |
||||
|
|
@ -0,0 +1,210 @@ |
|||||
|
<!DOCTYPE html> |
||||
|
<html lang="en"> |
||||
|
|
||||
|
<head> |
||||
|
<meta charset="UTF-8"> |
||||
|
<meta name="viewport" content="width=device-width, initial-scale=1.0"> |
||||
|
<title>知识库</title> |
||||
|
<link rel="stylesheet" href="{$smarty.const.CSS_URL}/css/common.css?v={$smarty.const.CSS_JS_VERSION}"> |
||||
|
<link rel="stylesheet" href="{$smarty.const.CSS_URL}/css/index.css?v={$smarty.const.CSS_JS_VERSION}8"> |
||||
|
<script src="{$smarty.const.CSS_URL}/js/jquery-3.6.0.min.js"></script> |
||||
|
{literal} |
||||
|
<style> |
||||
|
.comment-con,.comment-detail,.small-image-wrapper,.small-video-cover{ |
||||
|
margin-top: 20px; |
||||
|
} |
||||
|
.comment-title{ |
||||
|
margin-bottom: 20px; |
||||
|
color: green; |
||||
|
} |
||||
|
.small-image-wrapper{ |
||||
|
margin-bottom: 20px; |
||||
|
} |
||||
|
.small-image-wrapper{ |
||||
|
display: flex; |
||||
|
flex-wrap: wrap; |
||||
|
column-gap: 10px; |
||||
|
row-gap: 10px; |
||||
|
} |
||||
|
.comment-txt img{ |
||||
|
width: 20px; |
||||
|
height: 20px; |
||||
|
} |
||||
|
.small-image-img{ |
||||
|
width: 100%; |
||||
|
height: 100%; |
||||
|
margin-bottom: 20px; |
||||
|
} |
||||
|
#next-page,#prev-page{ |
||||
|
cursor: pointer; |
||||
|
} |
||||
|
</style> |
||||
|
{/literal} |
||||
|
</head> |
||||
|
|
||||
|
<body> |
||||
|
<div class="home-page"> |
||||
|
{include file="include/header.html"} |
||||
|
<div class="home-main-content"> |
||||
|
<div class="tab-list index-nav-wrap flex"> |
||||
|
<ul class="tab-wrap"> |
||||
|
<li class="index-nav-wrap-li" type="1"><span {if $smarty.get.search_type == "1"}class="active"{/if}>选中评论</span></li> |
||||
|
<li class="index-nav-wrap-li" type="0"><span {if $smarty.get.search_type == "0"}class="active"{/if}>剔除评论</span></li> |
||||
|
<li class="index-nav-wrap-li" type="-1"><span {if $smarty.get.search_type == "-1"}class="active"{/if}>全部评论</span></li> |
||||
|
</ul> |
||||
|
</div> |
||||
|
|
||||
|
<div class="comment-con"> |
||||
|
<div class="comment-title">微博内容:</div> |
||||
|
|
||||
|
<div class="list_item_top flex"> |
||||
|
<div class="list_item_top_l"> |
||||
|
<span class="green">微博</span> |
||||
|
<b class="refer_text"></b> |
||||
|
<span class="name">张宝旬</span> |
||||
|
<span>录入:{$data.created_at}</span> |
||||
|
<a href="https://m.weibo.cn/detail/{$data.wid}" target="_blank">微博地址</a> |
||||
|
</div> |
||||
|
</div> |
||||
|
|
||||
|
<div class="comment-detail"> |
||||
|
{$data.text} |
||||
|
</div> |
||||
|
|
||||
|
{if $data.pic_arr} |
||||
|
<div class="small-image-wrapper"> |
||||
|
{foreach from=$data.pic_arr item=item} |
||||
|
<div class="small-image"> |
||||
|
<span class="small-image-desc">图片</span> |
||||
|
<img src="{$item}" alt="Small Image" class="small-image-img"> |
||||
|
<img class="delete-btn hide" src="../images/delete-icon.svg" alt="Small Image"> |
||||
|
</div> |
||||
|
{/foreach} |
||||
|
</div> |
||||
|
{/if} |
||||
|
|
||||
|
{if $data.video_url} |
||||
|
<div class="small-video-cover" href="{$data.video_url}"> |
||||
|
<span class="small-image-desc">视频</span> |
||||
|
<img class="small-video-show" src="{if $data.video_cover}{$data.video_cover}{else}'../images/vedio_img.png'{/if}" alt="Video Cover"> |
||||
|
<img src="../images/play.svg" class="play-video" alt="...丢了"> |
||||
|
</div> |
||||
|
{/if} |
||||
|
|
||||
|
{if $comment_list} |
||||
|
<div class="comment-list"> |
||||
|
<div class="comment-title">评论内容:</div> |
||||
|
{foreach from=$comment_list key=key item=item} |
||||
|
<div class="list_item_top flex" style="margin-top: 20px;" idattr="{$item.id}"> |
||||
|
<div class="list_item_top_l"> |
||||
|
<!-- <span class="green">评论</span> --> |
||||
|
<b class="refer_text"></b> |
||||
|
<span class="name">{$key+1}.{$item.screen_name}</span> |
||||
|
<span>{$item.comment_time} {$item.source}</span> |
||||
|
</div> |
||||
|
</div> |
||||
|
|
||||
|
<div class="list_item_top flex" style="margin-top: 10px;"> |
||||
|
<div class="list_item_top_l"> |
||||
|
<span class="green">评论</span> |
||||
|
<b class="refer_text"></b> |
||||
|
<span class="name comment-txt">{$item.content}</span> |
||||
|
<span class="{if $item.is_search==1}green{elseif $item.is_search==0}red{elseif $item.is_search==0}grey{/if}">{$search_status_list[$item.is_search]}</span> |
||||
|
</div> |
||||
|
</div> |
||||
|
{/foreach} |
||||
|
</div> |
||||
|
{/if} |
||||
|
</div> |
||||
|
|
||||
|
<div class="pagination" v-if="total > 0"> |
||||
|
<img id="prev-page" src="/images/prev.svg" alt=""> |
||||
|
|
||||
|
<ul id="page-numbers"> |
||||
|
{if $last_page <= 6} |
||||
|
{section name=num loop=$last_page} |
||||
|
<span class="{if $cur_page == ($smarty.section.num.index+1)}pagActive{/if}" page="{$smarty.section.num.index+1}">{$smarty.section.num.index+1}</span> |
||||
|
{/section} |
||||
|
{/if} |
||||
|
|
||||
|
{if $last_page > 6} |
||||
|
{if $cur_page <= 5} |
||||
|
{section name=num loop=5} |
||||
|
<span class="{if $cur_page == ($smarty.section.num.index+1)}pagActive{/if}" page="{$smarty.section.num.index+1}">{$smarty.section.num.index+1}</span> |
||||
|
{/section} |
||||
|
<span>...</span> |
||||
|
<span page="{$last_page}">{$last_page}</span> |
||||
|
{elseif $cur_page > $last_page-5} |
||||
|
<span page="1">1</span> |
||||
|
<span>...</span> |
||||
|
|
||||
|
{section name=num loop=5} |
||||
|
<span class="{if $cur_page == ($last_page-5+$smarty.section.num.index+1)}pagActive{/if}" page="{$last_page-5+$smarty.section.num.index+1}">{$last_page-5+$smarty.section.num.index+1}</span> |
||||
|
{/section} |
||||
|
|
||||
|
{else} |
||||
|
<span page="1">1</span> |
||||
|
<span>...</span> |
||||
|
{section name=num loop=5} |
||||
|
<span class="{if $cur_page == ($cur_page-2+$smarty.section.num.index)}pagActive{/if}" page="{$cur_page-2+$smarty.section.num.index}">{$cur_page-2+$smarty.section.num.index}</span> |
||||
|
{/section} |
||||
|
<span>...</span> |
||||
|
<span page="{$last_page}">{$last_page}</span> |
||||
|
{/if} |
||||
|
{/if} |
||||
|
</ul> |
||||
|
|
||||
|
<img id="next-page" src="/images/next.svg" alt=""> |
||||
|
<div class="input-page"> |
||||
|
<span>前往</span> |
||||
|
<input type="number" id="jump-to-page" min="1" placeholder="页码"> |
||||
|
<span>页</span> |
||||
|
</div> |
||||
|
<button id="go-to-page">跳转</button> |
||||
|
</div> |
||||
|
|
||||
|
</div> |
||||
|
{include file="include/footer.html"} |
||||
|
</div> |
||||
|
|
||||
|
<div id="hidecomments" style="display: none;"></div> |
||||
|
<div id="last_page" style="display: none;">{$last_page}</div> |
||||
|
|
||||
|
</body> |
||||
|
|
||||
|
{literal} |
||||
|
<script> |
||||
|
$('.index-nav-wrap-li').click(function(){ |
||||
|
var type = $(this).attr('type') |
||||
|
window.location.href = location.href.replace(/\&search\_type=(\-)?\d+/, '').replace(/\&page=\d+/, '') +'&search_type='+type |
||||
|
}) |
||||
|
|
||||
|
$('#page-numbers span').click(function(){ |
||||
|
var page = $(this).attr('page') |
||||
|
if(!page) return; |
||||
|
location.href = location.href.replace(/\&page=\d+/, '') +'&page='+page |
||||
|
}) |
||||
|
|
||||
|
$('#next-page').click(function(){ |
||||
|
var page = parseInt($('#page-numbers span.pagActive').attr('page')) + 1 |
||||
|
if(page > parseInt($('#last_page').text())) return; |
||||
|
|
||||
|
location.href = location.href.replace(/\&page=\d+/, '') +'&page='+page |
||||
|
}) |
||||
|
|
||||
|
$('#prev-page').click(function(){ |
||||
|
var page = parseInt($('#page-numbers span.pagActive').attr('page')) - 1 |
||||
|
if(page < 1) return; |
||||
|
location.href = location.href.replace(/\&page=\d+/, '') +'&page='+page |
||||
|
}) |
||||
|
|
||||
|
$('#go-to-page').click(function(){ |
||||
|
var page = $('#jump-to-page').val() |
||||
|
if(page > parseInt($('#last_page').text())) page = parseInt($('#last_page').text()) |
||||
|
if(page < 1) page = 1 |
||||
|
location.href = location.href.replace(/\&page=\d+/, '') +'&page='+page |
||||
|
}) |
||||
|
</script> |
||||
|
{/literal} |
||||
|
|
||||
|
</html> |
@ -0,0 +1,170 @@ |
|||||
|
<!DOCTYPE html> |
||||
|
<html lang="en"> |
||||
|
|
||||
|
<head> |
||||
|
<meta charset="UTF-8"> |
||||
|
<meta name="viewport" content="width=device-width, initial-scale=1.0"> |
||||
|
<title>知识库</title> |
||||
|
<link rel="stylesheet" href="{$smarty.const.CSS_URL}/css/common.css?v={$smarty.const.CSS_JS_VERSION}"> |
||||
|
<link rel="stylesheet" href="{$smarty.const.CSS_URL}/css/index.css?v={$smarty.const.CSS_JS_VERSION}"> |
||||
|
<script src="{$smarty.const.CSS_URL}/js/jquery-3.6.0.min.js"></script> |
||||
|
</head> |
||||
|
|
||||
|
<body> |
||||
|
<div class="home-page"> |
||||
|
{include file="include/header.html"} |
||||
|
|
||||
|
<div class="home-main-content"> |
||||
|
<div class="home-main"> |
||||
|
<div class="tab-list index-nav-wrap flex"> |
||||
|
<ul class="tab-wrap" style="display: none;"> |
||||
|
<li class="index-nav-wrap-li" type="1"><span class="active">待审批</span></li> |
||||
|
<li class="index-nav-wrap-li" type="2"><span>已审批</span></li> |
||||
|
<li class="index-nav-wrap-li" type="3"><span>已删除</span></li> |
||||
|
</ul> |
||||
|
<button class="addNewBtn" style="display: none;"> |
||||
|
<img src="{$smarty.const.CSS_URL}/images/add.svg" alt="">新增自录入 |
||||
|
</button> |
||||
|
</div> |
||||
|
|
||||
|
|
||||
|
<div class="list_all" id="data-list"> |
||||
|
</div> |
||||
|
|
||||
|
|
||||
|
<!-- 数据列表 --> |
||||
|
<!-- <ul id="data-list"></ul> --> |
||||
|
|
||||
|
<!-- 分页控件 --> |
||||
|
<div class="pagination hide"> |
||||
|
<img id="prev-page" src="{$smarty.const.CSS_URL}/images/prev.svg" alt=""> |
||||
|
<ul id="page-numbers"></ul> |
||||
|
<img id="next-page" src="{$smarty.const.CSS_URL}/images/next.svg" alt=""> |
||||
|
<div class="input-page"> |
||||
|
<span>前往</span> |
||||
|
<input type="number" id="jump-to-page" min="1" placeholder="页码"> |
||||
|
<span>页</span> |
||||
|
</div> |
||||
|
<button id="go-to-page">跳转</button> |
||||
|
</div> |
||||
|
|
||||
|
<!-- 放大后的图片容器 --> |
||||
|
<div id="large-image-container"> |
||||
|
<h2>预览</h2> |
||||
|
<img id="large-image" src="{$smarty.const.CSS_URL}/images/viewimg1.png" alt="Large Image"> |
||||
|
<span id="close-btn2">×</span> |
||||
|
</div> |
||||
|
|
||||
|
<!-- 放大后的视频容器 --> |
||||
|
<div id="large-video-container"> |
||||
|
<video id="large-video" controls> |
||||
|
<source id="large-viedo-url" src="" type="video/mp4"> |
||||
|
Your browser does not support the video tag. |
||||
|
</video> |
||||
|
<button id="close-btn">×</button> |
||||
|
</div> |
||||
|
|
||||
|
<!-- 弹框 --> |
||||
|
<div class="modal-overlay"></div> |
||||
|
<div class="modal"> |
||||
|
<div class="modal_top"> |
||||
|
<b id="header_title">编辑</b> |
||||
|
<img src="{$smarty.const.CSS_URL}/images/close_modal.svg" id="close_modal" alt=""> |
||||
|
</div> |
||||
|
|
||||
|
<div class="add-form"> |
||||
|
<div class="form-item radio-form"> |
||||
|
<input type="hidden" id="id" value=""> |
||||
|
<div class="form-left"> |
||||
|
录入形式 |
||||
|
</div> |
||||
|
<div class="form-right radio-wrap flex"> |
||||
|
<div class="radio_box radio_box_active"> |
||||
|
<input value="1" type="radio"id="edu1"> |
||||
|
<div></div> |
||||
|
<label >信息段录入</label> |
||||
|
</div> |
||||
|
<div class="radio_box"> |
||||
|
<input value="2" type="radio"id="edu2"> |
||||
|
<div></div> |
||||
|
<label >问答式录入</label> |
||||
|
</div> |
||||
|
</div> |
||||
|
</div> |
||||
|
<div class="form-item normal-form" style="display: none;"> |
||||
|
<div class="form-left"> |
||||
|
提问信息 |
||||
|
</div> |
||||
|
<div class="form-right"> |
||||
|
<div class="text-area-container"> |
||||
|
<textarea class="edit-input normal-input" ></textarea> |
||||
|
<div class="char-count wordNum">0/200</div> |
||||
|
</div> |
||||
|
</div> |
||||
|
</div> |
||||
|
|
||||
|
<div class="form-item"> |
||||
|
<div class="form-left common-input"> |
||||
|
回答信息 |
||||
|
</div> |
||||
|
<div class="form-right"> |
||||
|
<div class="text-area-container"> |
||||
|
<textarea class="edit-input answer-input" ></textarea> |
||||
|
<div class="char-count1 wordNum">0/200</div> |
||||
|
</div> |
||||
|
</div> |
||||
|
</div> |
||||
|
</div> |
||||
|
{literal} |
||||
|
<script> |
||||
|
const maxLength = 100; // 设置最大字数限制 |
||||
|
$('.normal-input').on('input', function () { |
||||
|
var currentLength = $(this).val().length; |
||||
|
$('.char-count').text(currentLength + '/' + maxLength); |
||||
|
if (currentLength > maxLength) { |
||||
|
$('.normal-input').val($('.normal-input').val().slice(0, maxLength)) |
||||
|
$('.char-count').text(maxLength + '/' + maxLength); |
||||
|
} |
||||
|
}); |
||||
|
$('.answer-input').on('input', function () { |
||||
|
var currentLength = $(this).val().length; |
||||
|
$('.char-count1').text(currentLength + '/' + maxLength); |
||||
|
if (currentLength > maxLength) { |
||||
|
$('.answer-input').val($('.answer-input').val().slice(0, maxLength)) |
||||
|
$('.char-count1').text(maxLength + '/' + maxLength); |
||||
|
} |
||||
|
}); |
||||
|
</script> |
||||
|
{/literal} |
||||
|
<div class="img_list2" style="display: none;"></div> |
||||
|
<div class="preview"></div> |
||||
|
|
||||
|
<div class="modal_upload_btn flex hide"> |
||||
|
<div class="flex modal_btns"> |
||||
|
<button id="upload-image-btn" class="button upload_btn"> |
||||
|
<img src="{$smarty.const.CSS_URL}/images/img_upload.svg" alt="">上传图片 |
||||
|
</button> |
||||
|
<button id="upload-video-btn" class="button upload_btn"> |
||||
|
<img src="{$smarty.const.CSS_URL}/images/vedio_upload.svg" alt="">上传视频 |
||||
|
</button> |
||||
|
</div> |
||||
|
<p>支持 jpg、png、mp4 格式,单个文件不超过 10MB</p> |
||||
|
</div> |
||||
|
<input type="file" id="upload-image" accept="image/*" style="display: none;" multiple> |
||||
|
<input type="file" id="upload-video" accept="video/*" style="display: none;" multiple> |
||||
|
<div class="buttons flex"> |
||||
|
<button id="submit" class="button ">仅保存</button> |
||||
|
<button id="savePass" class="button button-primary">保存并通过审批</button> |
||||
|
</div> |
||||
|
</div> |
||||
|
</div> |
||||
|
</div> |
||||
|
{include file="include/footer.html"} |
||||
|
</div> |
||||
|
|
||||
|
<div id="hidecomments" style="display: none;"></div> |
||||
|
|
||||
|
</body> |
||||
|
<script src="{$smarty.const.CSS_URL}/js/index.js?v={$smarty.const.CSS_JS_VERSION}89"></script> |
||||
|
|
||||
|
</html> |
Loading…
Reference in new issue