Browse Source

Merge branch 'zbx_weibo' of gitea.intra.yunpaper.com:zhangqi/knowledge into zbx_weibo

zbx_weibo
longchao 4 weeks ago
parent
commit
3cb337d6c4
  1. 326
      tools/es.php
  2. 92
      tools/es_add_comments.php
  3. 74
      tools/es_search.php
  4. 95
      tools/es_setting.php

326
tools/es.php

@ -0,0 +1,326 @@
<?php
use Elasticsearch\ClientBuilder;
class ES
{
//ES客户端链接
private $client;
private $index_name;
/**
* 初始化ES连接
* ES constructor.
*/
public function __construct($index)
{
$params = array(
'127.0.0.1:9200'
);
$this->client = ClientBuilder::create()->setHosts($params)->build();
$this->index_name = $index;
return $this->client;
}
/**
* 判断索引是否存在
* @param string $index_name
* @return bool|mixed|string
*/
public function exists_index($index_name = 'test_ik')
{
$params = [
'index' => $index_name
];
try {
return $this->client->indices()->exists($params);
} catch (\Exception $e) {
return false;
}
}
/**
* 创建索引
* @param string $index_name
* @return array|mixed|string
*/
public function create_index($index_name = 'test_ik')
{
$params = [
'index' => $index_name,
'body' => [
'settings' => [
'number_of_shards' => 3,
'number_of_replicas' => 0
]
]
];
try {
return $this->client->indices()->create($params);
} catch (\Exception $e) {
return false;
}
}
/**
* 删除索引
* @param string $index_name
* @return array
*/
public function delete_index($index_name = 'test_ik')
{
$params = ['index' => $index_name];
$response = $this->client->indices()->delete($params);
return $response;
}
/**
* 添加文档
* @param $params
* $params = [
* 'index' => "es",
* 'type' => "article",
* "body" => [
* "title" => "",
* ]
* ];
* @return array
*/
public function add_doc($params)
{
return $this->client->index($params);
}
/**
* 判断文档存在
* @param int $id
* @param string $index_name
* @param string $type_name
* @return array|bool
*/
public function exists_doc($id = 1, $index_name = 'test_ik', $type_name = 'goods')
{
$params = [
'index' => $index_name,
'type' => $type_name,
'id' => $id
];
$response = $this->client->exists($params);
return $response;
}
/**
* 获取文档
* @param int $id
* @param string $index_name
* @param string $type_name
* @return array
*/
public function get_doc($id = 1, $index_name = 'test_ik', $type_name = 'goods')
{
$params = [
'index' => $index_name,
'type' => $type_name,
'id' => $id
];
$response = $this->client->get($params);
return $response;
}
/**
* 修改文档
*$params = [
'index' => "es",
'type' => "article",
'id' => "OIwzxXgBzF70K-DobSSC",
"body" => [
"doc" => [
"title" => "6100万颗心的共同记忆 再次C位亮相,闪耀全球!",
"desn" => "刚刚过去的这个清明节,与往年一样,有人凭寄哀思,有人缅怀忠魂。但也有一些瞬间,让人记起久久不能释怀,给这个特殊节气增添了一些格外不同的味道。"
]
]
];
* @param array $params
* @return array
*/
public function update_doc($params = [])
{
$response = $this->client->update($params);
return $response;
}
/**
* 删除文档
* @param int $id
* @param string $index_name
* @param string $type_name
* @return array
*/
public function delete_doc($id = 1, $index_name = 'test_ik', $type_name = 'goods')
{
$params = [
'index' => $index_name,
'type' => $type_name,
'id' => $id
];
$response = $this->client->delete($params);
return $response;
}
/**
* 搜索文档 (分页,排序,权重,过滤)
* @param string $index_name
* @param string $type_name
* @param array $body
$body = [
'query' => [
'bool' => [
'should' => [
[
'match' => [
'cate_name' => [
'query' => $keywords,
'boost' => 4, // 权重大
]
]
],
[
'match' => [
'goods_name' => [
'query' => $keywords,
'boost' => 3,
]
]
],
[
'match' => [
'goods_introduce' => [
'query' => $keywords,
'boost' => 2,
]
]
]
],
],
],
'sort' => ['id'=>['order'=>'desc']],
'from' => $from,
'size' => $size
];
* @return array
*/
public function search_doc($index_name = "test_ik", $type_name = "goods", $body = [])
{
$params = [
'index' => $index_name,
'type' => $type_name,
'body' => $body
];
$results = $this->client->search($params);
return $results;
}
/**
* @param string $index_name 搜索列表
* @param string $type_name
* @return array|callable
*
*/
public function select_doc($index_name = 'test_ik', $type_name = 'goods'){
$params=[
'index' => $index_name,
'type' => $type_name,
];
$response = $this->client->search($params);
return $response;
}
/**
* @param string $index_name // 高亮显示
* @param string $type_name
* @param array $body
* @return array|callable
*
*
$body=[
'query' => [
'match' => [
'desn' => '董事长'
]
],
'highlight' => [
'pre_tags' => ["<b class='key' style='color:red'>"],
'post_tags' => ["</b>"],
'fields' => [
"desn" => new \stdClass()
]
],
];
*/
public function highlight_doc($index_name = 'test_ik', $type_name = 'goods' , $body = []){
$params=[
'index' => $index_name,
'type' => $type_name,
'body' => $body
];
$response = $this->client->search($params);
return $response;
}
public function create_weibo_index() {
$res = $this->exists_index($this->index_name);
if($res) return true;
$params = [
'index' => $this->index_name,
'body' => [
'settings' => [
'number_of_shards' => 2,
'number_of_replicas' => 1,
'analysis' => [
'analyzer' => [
'ik_analyzer' => [
'type' => 'custom',
'tokenizer' => 'ik_max_word'
]
]
]
],
'mappings' => [
'properties' => [
'id' => ['type' => 'long'],
'uid' => ['type' => 'long'],
'wid' => ['type' => 'long'],
'content' => [
'type' => 'text',
'analyzer' => 'ik_max_word',
// 'fields' => [
// 'semantic' => [
// 'type' => 'dense_vector',
// 'dims' => 768
// ]
// ]
],
'created_at' => ['type' => 'date'],
'comments_count' => ['type' => 'long'],
'attitudes_count' => ['type' => 'long'],
'status' => ['type' => 'long']
]
]
]
];
try {
return $this->client->indices()->create($params);
} catch (\Exception $e) {
return false;
}
}
}

92
tools/es_add_comments.php

@ -0,0 +1,92 @@
<?php
// nohup php /data1/www/zhishiku.kuailelunwen.com/tools/es_add_comments.php &
include_once(dirname(dirname(__FILE__))."/vendor/autoload.php");
include_once(dirname(dirname(__FILE__))."/tools/es.php");
function parseDbCnf($dbflag) {
$configs = parse_ini_file(dirname(dirname(__FILE__)).'/config/database.ini', true);
$config = $configs[strtolower($dbflag)];
$host_port = $config['master'];
$hps = explode(',', $host_port);
$random = rand(0, count($hps)-1);
$hp = $hps[$random];
list($host, $port) = explode(':', $hp);
$cnf = array();
$cnf['host'] = trim($host);
$cnf['port'] = trim($port);
$cnf['user'] = trim($config['user']);
$cnf['pwd'] = trim($config['passwd']);
$cnf['db'] = trim($config['db']);
return $cnf;
}
$mysqlconfig = parseDbCnf('simplyphp');
$servername = $mysqlconfig['host'];
$username = $mysqlconfig['user'];
$password = $mysqlconfig['pwd'];
$dbname = $mysqlconfig['db'];
$obj = new \ES('weibo');
$conn = new mysqli($servername, $username, $password, $dbname);
if ($conn->connect_error) {
die("连接失败: " . $conn->connect_error);
}
$conn->set_charset("utf8mb4");
$limit = 5000;
$min_id = 0;
$log_path = '/datacenter/zhishiku/es_comment.log';
$log_path_success = '/datacenter/zhishiku/es_success_comment.log';
$log_path_err = '/datacenter/zhishiku/es_error_comment.log';
for($page=0;;$page++){
$sql = "SELECT * FROM spider_weibo_comments where id>".$min_id." order by id asc limit ".$limit;
$result = $conn->query($sql);
if ($result->num_rows > 0) {
while($row = $result->fetch_assoc()) {
$min_id = $row['id'];
if(str_replace(" ", "", $row["content"])=="") continue;
$data['id'] = $row['id']+0;
$data['uid'] = $row['uid']+0;
$data['weibo_id'] = $row['weibo_id']+0;
$data['content'] = $row['content'];
if (!empty($row['comment_time'])) {
$data['comment_time'] = date('c', strtotime($row['comment_time']));
} else {
$data['comment_time'] = null;
}
$params['index'] = 'comments';
$params['type'] = 'doc';
$params['id'] = $data['id'];
$params['body'] = $data;
try {
$resc = $obj->add_doc($params);
if($resc['result'] != 'created') {
error_log('error:'.json_encode($data)."\n", 3, $log_path_err);
}else{
error_log($resc['_id']."|{$min_id}\n", 3, $log_path_success);
}
} catch (\Exception $th) {
error_log('excption:'.$th->getMessage().'|'.json_encode($data).'|'.$min_id."\n", 3, $log_path_err);
}
}
error_log((($page*$limit)+$result->num_rows)."|".$page."\n", 3, $log_path);
if($result->num_rows<$limit) break;
} else {
error_log((($page*$limit)+$result->num_rows)."|".$page."\n", 3, $log_path);
break;
}
}
$conn->close();
?>

74
tools/es_search.php

@ -0,0 +1,74 @@
<?php
error_reporting(0);
ini_set('display_errors', 0);
// nohup php /data1/www/zhishiku.kuailelunwen.com/tools/es_search.php &
include_once(dirname(dirname(__FILE__))."/vendor/autoload.php");
include_once(dirname(dirname(__FILE__))."/tools/es.php");
$data['status'] = false;
$data['info'] = '';
$data['data'] = [];
$obj = new \ES('weibo');
$from = 0;
$size = 25;
$keywords = $_POST['key'];
$token = $_POST['token'];
// $keywords = '发烧';
// $token = "ahckexb@!@#!@#$%cdasd%$";
if($token != "ahckexb@!@#!@#$%cdasd%$") {
echo 'succ';
exit;
}
if(empty($keywords)) {
$data['info'] = '错误';
echo json_encode($data, true);
exit;
}
$body = [
'query' => [
'bool' => [
'should' => [
[
'match' => [
'content' => [
'query' => $keywords,
'boost' => 4,
]
]
],
// [
// 'match' => [
// 'abs' => [
// 'query' => $keywords,
// 'boost' => 3,
// ]
// ]
// ]
],
],
],
'from' => $from,
'size' => $size
];
$res = $obj->search_doc('weibo', 'doc', $body);
$hits = $res['hits']['hits'];
if(empty($hits)){
$data['info'] = '数据为空';
echo json_encode($data, true);
exit;
}
$rdata = [];
foreach($hits as $val) {
$rdata[] = $val['_source'];
}
$data['status'] = true;
$data['data'] = $rdata;
echo json_encode($data, true);
exit;

95
tools/es_setting.php

@ -0,0 +1,95 @@
<?php
// nohup php /data1/www/zhishiku.kuailelunwen.com/tools/es_setting.php &
include_once(dirname(dirname(__FILE__))."/vendor/autoload.php");
include_once(dirname(dirname(__FILE__))."/tools/es.php");
function parseDbCnf($dbflag) {
$configs = parse_ini_file(dirname(dirname(__FILE__)).'/config/database.ini', true);
$config = $configs[strtolower($dbflag)];
$host_port = $config['master'];
$hps = explode(',', $host_port);
$random = rand(0, count($hps)-1);
$hp = $hps[$random];
list($host, $port) = explode(':', $hp);
$cnf = array();
$cnf['host'] = trim($host);
$cnf['port'] = trim($port);
$cnf['user'] = trim($config['user']);
$cnf['pwd'] = trim($config['passwd']);
$cnf['db'] = trim($config['db']);
return $cnf;
}
$mysqlconfig = parseDbCnf('simplyphp');
$servername = $mysqlconfig['host'];
$username = $mysqlconfig['user'];
$password = $mysqlconfig['pwd'];
$dbname = $mysqlconfig['db'];
$obj = new \ES('weibo');
$conn = new mysqli($servername, $username, $password, $dbname);
if ($conn->connect_error) {
die("连接失败: " . $conn->connect_error);
}
$conn->set_charset("utf8mb4");
$limit = 5000;
$min_id = 0;
$log_path = '/datacenter/zhishiku/es.log';
$log_path_success = '/datacenter/zhishiku/es_success.log';
$log_path_err = '/datacenter/zhishiku/es_error.log';
for($page=0;;$page++){
$sql = "SELECT * FROM spider_weibo where uid=2282201403 and id>".$min_id." order by id asc limit ".$limit;
$result = $conn->query($sql);
if ($result->num_rows > 0) {
while($row = $result->fetch_assoc()) {
$min_id = $row['id'];
if(str_replace(" ", "", $row["text"])=="") continue;
$data['id'] = $row['id']+0;
$data['uid'] = $row['uid']+0;
$data['wid'] = $row['wid']+0;
$data['content'] = $row['text'];
if (!empty($row['created_at'])) {
$data['created_at'] = date('c', strtotime($row['created_at']));
} else {
$data['created_at'] = null;
}
$data['comments_count'] = $row['comments_count']+0;
$data['attitudes_count'] = $row['attitudes_count']+0;
$data['reposts_count'] = $row['reposts_count']+0;
$data['status'] = $row['status']+0;
$params['index'] = 'weibo';
$params['type'] = 'doc';
$params['id'] = $data['id'];
$params['body'] = $data;
try {
$resc = $obj->add_doc($params);
if($resc['result'] != 'created') {
error_log('error:'.json_encode($data)."\n", 3, $log_path_err);
}else{
error_log($resc['_id']."|{$min_id}\n", 3, $log_path_success);
}
} catch (\Exception $th) {
error_log('excption:'.$th->getMessage().'|'.json_encode($data).'|'.$min_id."\n", 3, $log_path_err);
}
}
error_log((($page*$limit)+$result->num_rows)."|".$page."\n", 3, $log_path);
if($result->num_rows<$limit) break;
} else {
error_log((($page*$limit)+$result->num_rows)."|".$page."\n", 3, $log_path);
break;
}
}
$conn->close();
?>
Loading…
Cancel
Save