diff --git a/tools/es.php b/tools/es.php new file mode 100644 index 0000000..3d114f0 --- /dev/null +++ b/tools/es.php @@ -0,0 +1,326 @@ +client = ClientBuilder::create()->setHosts($params)->build(); + $this->index_name = $index; + + return $this->client; + } + + /** + * 判断索引是否存在 + * @param string $index_name + * @return bool|mixed|string + */ + public function exists_index($index_name = 'test_ik') + { + $params = [ + 'index' => $index_name + ]; + try { + return $this->client->indices()->exists($params); + } catch (\Exception $e) { + return false; + } + } + + /** + * 创建索引 + * @param string $index_name + * @return array|mixed|string + */ + public function create_index($index_name = 'test_ik') + { + $params = [ + 'index' => $index_name, + 'body' => [ + 'settings' => [ + 'number_of_shards' => 3, + 'number_of_replicas' => 0 + ] + ] + ]; + try { + return $this->client->indices()->create($params); + } catch (\Exception $e) { + return false; + } + } + + /** + * 删除索引 + * @param string $index_name + * @return array + */ + public function delete_index($index_name = 'test_ik') + { + $params = ['index' => $index_name]; + $response = $this->client->indices()->delete($params); + return $response; + } + + /** + * 添加文档 + * @param $params + * $params = [ + * 'index' => "es", + * 'type' => "article", + * "body" => [ + * "title" => "", + * ] + * ]; + * @return array + */ + public function add_doc($params) + { + return $this->client->index($params); + } + + /** + * 判断文档存在 + * @param int $id + * @param string $index_name + * @param string $type_name + * @return array|bool + */ + public function exists_doc($id = 1, $index_name = 'test_ik', $type_name = 'goods') + { + $params = [ + 'index' => $index_name, + 'type' => $type_name, + 'id' => $id + ]; + + $response = $this->client->exists($params); + return $response; + } + + /** + * 获取文档 + * @param int $id + * @param string $index_name + * @param string $type_name + * @return array + */ + public function get_doc($id = 1, $index_name = 'test_ik', $type_name = 'goods') + { + $params = [ + 'index' => $index_name, + 'type' => $type_name, + 'id' => $id + ]; + + $response = $this->client->get($params); + return $response; + } + + /** + * 修改文档 + *$params = [ + 'index' => "es", + 'type' => "article", + 'id' => "OIwzxXgBzF70K-DobSSC", + "body" => [ + "doc" => [ + "title" => "6100万颗心的共同记忆 再次C位亮相,闪耀全球!", + "desn" => "刚刚过去的这个清明节,与往年一样,有人凭寄哀思,有人缅怀忠魂。但也有一些瞬间,让人记起久久不能释怀,给这个特殊节气增添了一些格外不同的味道。" + ] + ] + ]; + * @param array $params + * @return array + */ + public function update_doc($params = []) + { + $response = $this->client->update($params); + return $response; + } + + /** + * 删除文档 + * @param int $id + * @param string $index_name + * @param string $type_name + * @return array + */ + public function delete_doc($id = 1, $index_name = 'test_ik', $type_name = 'goods') + { + $params = [ + 'index' => $index_name, + 'type' => $type_name, + 'id' => $id + ]; + + $response = $this->client->delete($params); + return $response; + } + + /** + * 搜索文档 (分页,排序,权重,过滤) + * @param string $index_name + * @param string $type_name + * @param array $body + $body = [ + 'query' => [ + 'bool' => [ + 'should' => [ + [ + 'match' => [ + 'cate_name' => [ + 'query' => $keywords, + 'boost' => 4, // 权重大 + ] + ] + ], + [ + 'match' => [ + 'goods_name' => [ + 'query' => $keywords, + 'boost' => 3, + ] + ] + ], + [ + 'match' => [ + 'goods_introduce' => [ + 'query' => $keywords, + 'boost' => 2, + ] + ] + ] + ], + ], + ], + 'sort' => ['id'=>['order'=>'desc']], + 'from' => $from, + 'size' => $size + ]; + * @return array + */ + public function search_doc($index_name = "test_ik", $type_name = "goods", $body = []) + { + $params = [ + 'index' => $index_name, + 'type' => $type_name, + 'body' => $body + ]; + + $results = $this->client->search($params); + return $results; + } + + /** + * @param string $index_name 搜索列表 + * @param string $type_name + * @return array|callable + * + */ + public function select_doc($index_name = 'test_ik', $type_name = 'goods'){ + $params=[ + 'index' => $index_name, + 'type' => $type_name, + ]; + $response = $this->client->search($params); + return $response; + } + + /** + * @param string $index_name // 高亮显示 + * @param string $type_name + * @param array $body + * @return array|callable + * + * + $body=[ + 'query' => [ + 'match' => [ + 'desn' => '董事长' + ] + ], + 'highlight' => [ + 'pre_tags' => [""], + 'post_tags' => [""], + 'fields' => [ + "desn" => new \stdClass() + ] + ], + ]; + */ + public function highlight_doc($index_name = 'test_ik', $type_name = 'goods' , $body = []){ + $params=[ + 'index' => $index_name, + 'type' => $type_name, + 'body' => $body + + ]; + $response = $this->client->search($params); + return $response; + } + + public function create_weibo_index() { + $res = $this->exists_index($this->index_name); + if($res) return true; + + $params = [ + 'index' => $this->index_name, + 'body' => [ + 'settings' => [ + 'number_of_shards' => 2, + 'number_of_replicas' => 1, + 'analysis' => [ + 'analyzer' => [ + 'ik_analyzer' => [ + 'type' => 'custom', + 'tokenizer' => 'ik_max_word' + ] + ] + ] + ], + 'mappings' => [ + 'properties' => [ + 'id' => ['type' => 'long'], + 'uid' => ['type' => 'long'], + 'wid' => ['type' => 'long'], + 'content' => [ + 'type' => 'text', + 'analyzer' => 'ik_max_word', + // 'fields' => [ + // 'semantic' => [ + // 'type' => 'dense_vector', + // 'dims' => 768 + // ] + // ] + ], + 'created_at' => ['type' => 'date'], + 'comments_count' => ['type' => 'long'], + 'attitudes_count' => ['type' => 'long'], + 'status' => ['type' => 'long'] + ] + ] + ] + ]; + + try { + return $this->client->indices()->create($params); + } catch (\Exception $e) { + return false; + } + } + +} \ No newline at end of file diff --git a/tools/es_add_comments.php b/tools/es_add_comments.php new file mode 100644 index 0000000..33102cd --- /dev/null +++ b/tools/es_add_comments.php @@ -0,0 +1,92 @@ +connect_error) { + die("连接失败: " . $conn->connect_error); +} +$conn->set_charset("utf8mb4"); + +$limit = 5000; +$min_id = 0; + +$log_path = '/datacenter/zhishiku/es_comment.log'; +$log_path_success = '/datacenter/zhishiku/es_success_comment.log'; +$log_path_err = '/datacenter/zhishiku/es_error_comment.log'; + +for($page=0;;$page++){ + $sql = "SELECT * FROM spider_weibo_comments where id>".$min_id." order by id asc limit ".$limit; + $result = $conn->query($sql); + if ($result->num_rows > 0) { + while($row = $result->fetch_assoc()) { + $min_id = $row['id']; + + if(str_replace(" ", "", $row["content"])=="") continue; + + $data['id'] = $row['id']+0; + + $data['uid'] = $row['uid']+0; + $data['weibo_id'] = $row['weibo_id']+0; + $data['content'] = $row['content']; + + if (!empty($row['comment_time'])) { + $data['comment_time'] = date('c', strtotime($row['comment_time'])); + } else { + $data['comment_time'] = null; + } + + $params['index'] = 'comments'; + $params['type'] = 'doc'; + $params['id'] = $data['id']; + $params['body'] = $data; + + try { + $resc = $obj->add_doc($params); + if($resc['result'] != 'created') { + error_log('error:'.json_encode($data)."\n", 3, $log_path_err); + }else{ + error_log($resc['_id']."|{$min_id}\n", 3, $log_path_success); + } + } catch (\Exception $th) { + error_log('excption:'.$th->getMessage().'|'.json_encode($data).'|'.$min_id."\n", 3, $log_path_err); + } + } + error_log((($page*$limit)+$result->num_rows)."|".$page."\n", 3, $log_path); + if($result->num_rows<$limit) break; + } else { + error_log((($page*$limit)+$result->num_rows)."|".$page."\n", 3, $log_path); + break; + } +} +$conn->close(); +?> diff --git a/tools/es_search.php b/tools/es_search.php new file mode 100644 index 0000000..a887788 --- /dev/null +++ b/tools/es_search.php @@ -0,0 +1,74 @@ + [ + 'bool' => [ + 'should' => [ + [ + 'match' => [ + 'content' => [ + 'query' => $keywords, + 'boost' => 4, + ] + ] + ], + // [ + // 'match' => [ + // 'abs' => [ + // 'query' => $keywords, + // 'boost' => 3, + // ] + // ] + // ] + ], + ], + ], + 'from' => $from, + 'size' => $size +]; +$res = $obj->search_doc('weibo', 'doc', $body); +$hits = $res['hits']['hits']; +if(empty($hits)){ + $data['info'] = '数据为空'; + echo json_encode($data, true); + exit; +} + +$rdata = []; +foreach($hits as $val) { + $rdata[] = $val['_source']; +} +$data['status'] = true; +$data['data'] = $rdata; +echo json_encode($data, true); +exit; diff --git a/tools/es_setting.php b/tools/es_setting.php new file mode 100644 index 0000000..836f8eb --- /dev/null +++ b/tools/es_setting.php @@ -0,0 +1,95 @@ +connect_error) { + die("连接失败: " . $conn->connect_error); +} + +$conn->set_charset("utf8mb4"); + +$limit = 5000; +$min_id = 0; + +$log_path = '/datacenter/zhishiku/es.log'; +$log_path_success = '/datacenter/zhishiku/es_success.log'; +$log_path_err = '/datacenter/zhishiku/es_error.log'; + +for($page=0;;$page++){ + $sql = "SELECT * FROM spider_weibo where uid=2282201403 and id>".$min_id." order by id asc limit ".$limit; + $result = $conn->query($sql); + if ($result->num_rows > 0) { + while($row = $result->fetch_assoc()) { + $min_id = $row['id']; + + if(str_replace(" ", "", $row["text"])=="") continue; + + $data['id'] = $row['id']+0; + $data['uid'] = $row['uid']+0; + $data['wid'] = $row['wid']+0; + $data['content'] = $row['text']; + + if (!empty($row['created_at'])) { + $data['created_at'] = date('c', strtotime($row['created_at'])); + } else { + $data['created_at'] = null; + } + $data['comments_count'] = $row['comments_count']+0; + $data['attitudes_count'] = $row['attitudes_count']+0; + $data['reposts_count'] = $row['reposts_count']+0; + $data['status'] = $row['status']+0; + + $params['index'] = 'weibo'; + $params['type'] = 'doc'; + $params['id'] = $data['id']; + $params['body'] = $data; + try { + $resc = $obj->add_doc($params); + if($resc['result'] != 'created') { + error_log('error:'.json_encode($data)."\n", 3, $log_path_err); + }else{ + error_log($resc['_id']."|{$min_id}\n", 3, $log_path_success); + } + } catch (\Exception $th) { + error_log('excption:'.$th->getMessage().'|'.json_encode($data).'|'.$min_id."\n", 3, $log_path_err); + } + } + error_log((($page*$limit)+$result->num_rows)."|".$page."\n", 3, $log_path); + if($result->num_rows<$limit) break; + } else { + error_log((($page*$limit)+$result->num_rows)."|".$page."\n", 3, $log_path); + break; + } +} +$conn->close(); +?>