4 changed files with 587 additions and 0 deletions
@ -0,0 +1,326 @@ |
|||
<?php |
|||
use Elasticsearch\ClientBuilder; |
|||
|
|||
class ES |
|||
{ |
|||
//ES客户端链接 |
|||
private $client; |
|||
private $index_name; |
|||
|
|||
/** |
|||
* 初始化ES连接 |
|||
* ES constructor. |
|||
*/ |
|||
public function __construct($index) |
|||
{ |
|||
$params = array( |
|||
'127.0.0.1:9200' |
|||
); |
|||
|
|||
$this->client = ClientBuilder::create()->setHosts($params)->build(); |
|||
$this->index_name = $index; |
|||
|
|||
return $this->client; |
|||
} |
|||
|
|||
/** |
|||
* 判断索引是否存在 |
|||
* @param string $index_name |
|||
* @return bool|mixed|string |
|||
*/ |
|||
public function exists_index($index_name = 'test_ik') |
|||
{ |
|||
$params = [ |
|||
'index' => $index_name |
|||
]; |
|||
try { |
|||
return $this->client->indices()->exists($params); |
|||
} catch (\Exception $e) { |
|||
return false; |
|||
} |
|||
} |
|||
|
|||
/** |
|||
* 创建索引 |
|||
* @param string $index_name |
|||
* @return array|mixed|string |
|||
*/ |
|||
public function create_index($index_name = 'test_ik') |
|||
{ |
|||
$params = [ |
|||
'index' => $index_name, |
|||
'body' => [ |
|||
'settings' => [ |
|||
'number_of_shards' => 3, |
|||
'number_of_replicas' => 0 |
|||
] |
|||
] |
|||
]; |
|||
try { |
|||
return $this->client->indices()->create($params); |
|||
} catch (\Exception $e) { |
|||
return false; |
|||
} |
|||
} |
|||
|
|||
/** |
|||
* 删除索引 |
|||
* @param string $index_name |
|||
* @return array |
|||
*/ |
|||
public function delete_index($index_name = 'test_ik') |
|||
{ |
|||
$params = ['index' => $index_name]; |
|||
$response = $this->client->indices()->delete($params); |
|||
return $response; |
|||
} |
|||
|
|||
/** |
|||
* 添加文档 |
|||
* @param $params |
|||
* $params = [ |
|||
* 'index' => "es", |
|||
* 'type' => "article", |
|||
* "body" => [ |
|||
* "title" => "", |
|||
* ] |
|||
* ]; |
|||
* @return array |
|||
*/ |
|||
public function add_doc($params) |
|||
{ |
|||
return $this->client->index($params); |
|||
} |
|||
|
|||
/** |
|||
* 判断文档存在 |
|||
* @param int $id |
|||
* @param string $index_name |
|||
* @param string $type_name |
|||
* @return array|bool |
|||
*/ |
|||
public function exists_doc($id = 1, $index_name = 'test_ik', $type_name = 'goods') |
|||
{ |
|||
$params = [ |
|||
'index' => $index_name, |
|||
'type' => $type_name, |
|||
'id' => $id |
|||
]; |
|||
|
|||
$response = $this->client->exists($params); |
|||
return $response; |
|||
} |
|||
|
|||
/** |
|||
* 获取文档 |
|||
* @param int $id |
|||
* @param string $index_name |
|||
* @param string $type_name |
|||
* @return array |
|||
*/ |
|||
public function get_doc($id = 1, $index_name = 'test_ik', $type_name = 'goods') |
|||
{ |
|||
$params = [ |
|||
'index' => $index_name, |
|||
'type' => $type_name, |
|||
'id' => $id |
|||
]; |
|||
|
|||
$response = $this->client->get($params); |
|||
return $response; |
|||
} |
|||
|
|||
/** |
|||
* 修改文档 |
|||
*$params = [ |
|||
'index' => "es", |
|||
'type' => "article", |
|||
'id' => "OIwzxXgBzF70K-DobSSC", |
|||
"body" => [ |
|||
"doc" => [ |
|||
"title" => "6100万颗心的共同记忆 再次C位亮相,闪耀全球!", |
|||
"desn" => "刚刚过去的这个清明节,与往年一样,有人凭寄哀思,有人缅怀忠魂。但也有一些瞬间,让人记起久久不能释怀,给这个特殊节气增添了一些格外不同的味道。" |
|||
] |
|||
] |
|||
]; |
|||
* @param array $params |
|||
* @return array |
|||
*/ |
|||
public function update_doc($params = []) |
|||
{ |
|||
$response = $this->client->update($params); |
|||
return $response; |
|||
} |
|||
|
|||
/** |
|||
* 删除文档 |
|||
* @param int $id |
|||
* @param string $index_name |
|||
* @param string $type_name |
|||
* @return array |
|||
*/ |
|||
public function delete_doc($id = 1, $index_name = 'test_ik', $type_name = 'goods') |
|||
{ |
|||
$params = [ |
|||
'index' => $index_name, |
|||
'type' => $type_name, |
|||
'id' => $id |
|||
]; |
|||
|
|||
$response = $this->client->delete($params); |
|||
return $response; |
|||
} |
|||
|
|||
/** |
|||
* 搜索文档 (分页,排序,权重,过滤) |
|||
* @param string $index_name |
|||
* @param string $type_name |
|||
* @param array $body |
|||
$body = [ |
|||
'query' => [ |
|||
'bool' => [ |
|||
'should' => [ |
|||
[ |
|||
'match' => [ |
|||
'cate_name' => [ |
|||
'query' => $keywords, |
|||
'boost' => 4, // 权重大 |
|||
] |
|||
] |
|||
], |
|||
[ |
|||
'match' => [ |
|||
'goods_name' => [ |
|||
'query' => $keywords, |
|||
'boost' => 3, |
|||
] |
|||
] |
|||
], |
|||
[ |
|||
'match' => [ |
|||
'goods_introduce' => [ |
|||
'query' => $keywords, |
|||
'boost' => 2, |
|||
] |
|||
] |
|||
] |
|||
], |
|||
], |
|||
], |
|||
'sort' => ['id'=>['order'=>'desc']], |
|||
'from' => $from, |
|||
'size' => $size |
|||
]; |
|||
* @return array |
|||
*/ |
|||
public function search_doc($index_name = "test_ik", $type_name = "goods", $body = []) |
|||
{ |
|||
$params = [ |
|||
'index' => $index_name, |
|||
'type' => $type_name, |
|||
'body' => $body |
|||
]; |
|||
|
|||
$results = $this->client->search($params); |
|||
return $results; |
|||
} |
|||
|
|||
/** |
|||
* @param string $index_name 搜索列表 |
|||
* @param string $type_name |
|||
* @return array|callable |
|||
* |
|||
*/ |
|||
public function select_doc($index_name = 'test_ik', $type_name = 'goods'){ |
|||
$params=[ |
|||
'index' => $index_name, |
|||
'type' => $type_name, |
|||
]; |
|||
$response = $this->client->search($params); |
|||
return $response; |
|||
} |
|||
|
|||
/** |
|||
* @param string $index_name // 高亮显示 |
|||
* @param string $type_name |
|||
* @param array $body |
|||
* @return array|callable |
|||
* |
|||
* |
|||
$body=[ |
|||
'query' => [ |
|||
'match' => [ |
|||
'desn' => '董事长' |
|||
] |
|||
], |
|||
'highlight' => [ |
|||
'pre_tags' => ["<b class='key' style='color:red'>"], |
|||
'post_tags' => ["</b>"], |
|||
'fields' => [ |
|||
"desn" => new \stdClass() |
|||
] |
|||
], |
|||
]; |
|||
*/ |
|||
public function highlight_doc($index_name = 'test_ik', $type_name = 'goods' , $body = []){ |
|||
$params=[ |
|||
'index' => $index_name, |
|||
'type' => $type_name, |
|||
'body' => $body |
|||
|
|||
]; |
|||
$response = $this->client->search($params); |
|||
return $response; |
|||
} |
|||
|
|||
public function create_weibo_index() { |
|||
$res = $this->exists_index($this->index_name); |
|||
if($res) return true; |
|||
|
|||
$params = [ |
|||
'index' => $this->index_name, |
|||
'body' => [ |
|||
'settings' => [ |
|||
'number_of_shards' => 2, |
|||
'number_of_replicas' => 1, |
|||
'analysis' => [ |
|||
'analyzer' => [ |
|||
'ik_analyzer' => [ |
|||
'type' => 'custom', |
|||
'tokenizer' => 'ik_max_word' |
|||
] |
|||
] |
|||
] |
|||
], |
|||
'mappings' => [ |
|||
'properties' => [ |
|||
'id' => ['type' => 'long'], |
|||
'uid' => ['type' => 'long'], |
|||
'wid' => ['type' => 'long'], |
|||
'content' => [ |
|||
'type' => 'text', |
|||
'analyzer' => 'ik_max_word', |
|||
// 'fields' => [ |
|||
// 'semantic' => [ |
|||
// 'type' => 'dense_vector', |
|||
// 'dims' => 768 |
|||
// ] |
|||
// ] |
|||
], |
|||
'created_at' => ['type' => 'date'], |
|||
'comments_count' => ['type' => 'long'], |
|||
'attitudes_count' => ['type' => 'long'], |
|||
'status' => ['type' => 'long'] |
|||
] |
|||
] |
|||
] |
|||
]; |
|||
|
|||
try { |
|||
return $this->client->indices()->create($params); |
|||
} catch (\Exception $e) { |
|||
return false; |
|||
} |
|||
} |
|||
|
|||
} |
@ -0,0 +1,92 @@ |
|||
<?php |
|||
// nohup php /data1/www/zhishiku.kuailelunwen.com/tools/es_add_comments.php & |
|||
include_once(dirname(dirname(__FILE__))."/vendor/autoload.php"); |
|||
include_once(dirname(dirname(__FILE__))."/tools/es.php"); |
|||
|
|||
function parseDbCnf($dbflag) { |
|||
$configs = parse_ini_file(dirname(dirname(__FILE__)).'/config/database.ini', true); |
|||
$config = $configs[strtolower($dbflag)]; |
|||
|
|||
$host_port = $config['master']; |
|||
|
|||
$hps = explode(',', $host_port); |
|||
$random = rand(0, count($hps)-1); |
|||
$hp = $hps[$random]; |
|||
|
|||
list($host, $port) = explode(':', $hp); |
|||
|
|||
$cnf = array(); |
|||
$cnf['host'] = trim($host); |
|||
$cnf['port'] = trim($port); |
|||
$cnf['user'] = trim($config['user']); |
|||
$cnf['pwd'] = trim($config['passwd']); |
|||
$cnf['db'] = trim($config['db']); |
|||
return $cnf; |
|||
} |
|||
|
|||
$mysqlconfig = parseDbCnf('simplyphp'); |
|||
$servername = $mysqlconfig['host']; |
|||
$username = $mysqlconfig['user']; |
|||
$password = $mysqlconfig['pwd']; |
|||
$dbname = $mysqlconfig['db']; |
|||
|
|||
$obj = new \ES('weibo'); |
|||
$conn = new mysqli($servername, $username, $password, $dbname); |
|||
if ($conn->connect_error) { |
|||
die("连接失败: " . $conn->connect_error); |
|||
} |
|||
$conn->set_charset("utf8mb4"); |
|||
|
|||
$limit = 5000; |
|||
$min_id = 0; |
|||
|
|||
$log_path = '/datacenter/zhishiku/es_comment.log'; |
|||
$log_path_success = '/datacenter/zhishiku/es_success_comment.log'; |
|||
$log_path_err = '/datacenter/zhishiku/es_error_comment.log'; |
|||
|
|||
for($page=0;;$page++){ |
|||
$sql = "SELECT * FROM spider_weibo_comments where id>".$min_id." order by id asc limit ".$limit; |
|||
$result = $conn->query($sql); |
|||
if ($result->num_rows > 0) { |
|||
while($row = $result->fetch_assoc()) { |
|||
$min_id = $row['id']; |
|||
|
|||
if(str_replace(" ", "", $row["content"])=="") continue; |
|||
|
|||
$data['id'] = $row['id']+0; |
|||
|
|||
$data['uid'] = $row['uid']+0; |
|||
$data['weibo_id'] = $row['weibo_id']+0; |
|||
$data['content'] = $row['content']; |
|||
|
|||
if (!empty($row['comment_time'])) { |
|||
$data['comment_time'] = date('c', strtotime($row['comment_time'])); |
|||
} else { |
|||
$data['comment_time'] = null; |
|||
} |
|||
|
|||
$params['index'] = 'comments'; |
|||
$params['type'] = 'doc'; |
|||
$params['id'] = $data['id']; |
|||
$params['body'] = $data; |
|||
|
|||
try { |
|||
$resc = $obj->add_doc($params); |
|||
if($resc['result'] != 'created') { |
|||
error_log('error:'.json_encode($data)."\n", 3, $log_path_err); |
|||
}else{ |
|||
error_log($resc['_id']."|{$min_id}\n", 3, $log_path_success); |
|||
} |
|||
} catch (\Exception $th) { |
|||
error_log('excption:'.$th->getMessage().'|'.json_encode($data).'|'.$min_id."\n", 3, $log_path_err); |
|||
} |
|||
} |
|||
error_log((($page*$limit)+$result->num_rows)."|".$page."\n", 3, $log_path); |
|||
if($result->num_rows<$limit) break; |
|||
} else { |
|||
error_log((($page*$limit)+$result->num_rows)."|".$page."\n", 3, $log_path); |
|||
break; |
|||
} |
|||
} |
|||
$conn->close(); |
|||
?> |
@ -0,0 +1,74 @@ |
|||
<?php |
|||
error_reporting(0); |
|||
ini_set('display_errors', 0); |
|||
|
|||
// nohup php /data1/www/zhishiku.kuailelunwen.com/tools/es_search.php & |
|||
include_once(dirname(dirname(__FILE__))."/vendor/autoload.php"); |
|||
include_once(dirname(dirname(__FILE__))."/tools/es.php"); |
|||
|
|||
$data['status'] = false; |
|||
$data['info'] = ''; |
|||
$data['data'] = []; |
|||
|
|||
$obj = new \ES('weibo'); |
|||
$from = 0; |
|||
$size = 25; |
|||
$keywords = $_POST['key']; |
|||
$token = $_POST['token']; |
|||
|
|||
// $keywords = '发烧'; |
|||
// $token = "ahckexb@!@#!@#$%cdasd%$"; |
|||
|
|||
if($token != "ahckexb@!@#!@#$%cdasd%$") { |
|||
echo 'succ'; |
|||
exit; |
|||
} |
|||
|
|||
if(empty($keywords)) { |
|||
$data['info'] = '错误'; |
|||
echo json_encode($data, true); |
|||
exit; |
|||
} |
|||
|
|||
$body = [ |
|||
'query' => [ |
|||
'bool' => [ |
|||
'should' => [ |
|||
[ |
|||
'match' => [ |
|||
'content' => [ |
|||
'query' => $keywords, |
|||
'boost' => 4, |
|||
] |
|||
] |
|||
], |
|||
// [ |
|||
// 'match' => [ |
|||
// 'abs' => [ |
|||
// 'query' => $keywords, |
|||
// 'boost' => 3, |
|||
// ] |
|||
// ] |
|||
// ] |
|||
], |
|||
], |
|||
], |
|||
'from' => $from, |
|||
'size' => $size |
|||
]; |
|||
$res = $obj->search_doc('weibo', 'doc', $body); |
|||
$hits = $res['hits']['hits']; |
|||
if(empty($hits)){ |
|||
$data['info'] = '数据为空'; |
|||
echo json_encode($data, true); |
|||
exit; |
|||
} |
|||
|
|||
$rdata = []; |
|||
foreach($hits as $val) { |
|||
$rdata[] = $val['_source']; |
|||
} |
|||
$data['status'] = true; |
|||
$data['data'] = $rdata; |
|||
echo json_encode($data, true); |
|||
exit; |
@ -0,0 +1,95 @@ |
|||
<?php |
|||
// nohup php /data1/www/zhishiku.kuailelunwen.com/tools/es_setting.php & |
|||
include_once(dirname(dirname(__FILE__))."/vendor/autoload.php"); |
|||
include_once(dirname(dirname(__FILE__))."/tools/es.php"); |
|||
|
|||
function parseDbCnf($dbflag) { |
|||
$configs = parse_ini_file(dirname(dirname(__FILE__)).'/config/database.ini', true); |
|||
$config = $configs[strtolower($dbflag)]; |
|||
|
|||
$host_port = $config['master']; |
|||
|
|||
$hps = explode(',', $host_port); |
|||
$random = rand(0, count($hps)-1); |
|||
$hp = $hps[$random]; |
|||
|
|||
list($host, $port) = explode(':', $hp); |
|||
|
|||
$cnf = array(); |
|||
$cnf['host'] = trim($host); |
|||
$cnf['port'] = trim($port); |
|||
$cnf['user'] = trim($config['user']); |
|||
$cnf['pwd'] = trim($config['passwd']); |
|||
$cnf['db'] = trim($config['db']); |
|||
return $cnf; |
|||
} |
|||
|
|||
$mysqlconfig = parseDbCnf('simplyphp'); |
|||
$servername = $mysqlconfig['host']; |
|||
$username = $mysqlconfig['user']; |
|||
$password = $mysqlconfig['pwd']; |
|||
$dbname = $mysqlconfig['db']; |
|||
|
|||
$obj = new \ES('weibo'); |
|||
$conn = new mysqli($servername, $username, $password, $dbname); |
|||
if ($conn->connect_error) { |
|||
die("连接失败: " . $conn->connect_error); |
|||
} |
|||
|
|||
$conn->set_charset("utf8mb4"); |
|||
|
|||
$limit = 5000; |
|||
$min_id = 0; |
|||
|
|||
$log_path = '/datacenter/zhishiku/es.log'; |
|||
$log_path_success = '/datacenter/zhishiku/es_success.log'; |
|||
$log_path_err = '/datacenter/zhishiku/es_error.log'; |
|||
|
|||
for($page=0;;$page++){ |
|||
$sql = "SELECT * FROM spider_weibo where uid=2282201403 and id>".$min_id." order by id asc limit ".$limit; |
|||
$result = $conn->query($sql); |
|||
if ($result->num_rows > 0) { |
|||
while($row = $result->fetch_assoc()) { |
|||
$min_id = $row['id']; |
|||
|
|||
if(str_replace(" ", "", $row["text"])=="") continue; |
|||
|
|||
$data['id'] = $row['id']+0; |
|||
$data['uid'] = $row['uid']+0; |
|||
$data['wid'] = $row['wid']+0; |
|||
$data['content'] = $row['text']; |
|||
|
|||
if (!empty($row['created_at'])) { |
|||
$data['created_at'] = date('c', strtotime($row['created_at'])); |
|||
} else { |
|||
$data['created_at'] = null; |
|||
} |
|||
$data['comments_count'] = $row['comments_count']+0; |
|||
$data['attitudes_count'] = $row['attitudes_count']+0; |
|||
$data['reposts_count'] = $row['reposts_count']+0; |
|||
$data['status'] = $row['status']+0; |
|||
|
|||
$params['index'] = 'weibo'; |
|||
$params['type'] = 'doc'; |
|||
$params['id'] = $data['id']; |
|||
$params['body'] = $data; |
|||
try { |
|||
$resc = $obj->add_doc($params); |
|||
if($resc['result'] != 'created') { |
|||
error_log('error:'.json_encode($data)."\n", 3, $log_path_err); |
|||
}else{ |
|||
error_log($resc['_id']."|{$min_id}\n", 3, $log_path_success); |
|||
} |
|||
} catch (\Exception $th) { |
|||
error_log('excption:'.$th->getMessage().'|'.json_encode($data).'|'.$min_id."\n", 3, $log_path_err); |
|||
} |
|||
} |
|||
error_log((($page*$limit)+$result->num_rows)."|".$page."\n", 3, $log_path); |
|||
if($result->num_rows<$limit) break; |
|||
} else { |
|||
error_log((($page*$limit)+$result->num_rows)."|".$page."\n", 3, $log_path); |
|||
break; |
|||
} |
|||
} |
|||
$conn->close(); |
|||
?> |
Loading…
Reference in new issue