diff --git a/README.md b/README.md index 34617dc..b5fa706 100644 --- a/README.md +++ b/README.md @@ -1,30 +1,35 @@ -### 欢迎使用Markdown - -##### 新增图片上传功能 - -现在可以在文章中插入图片! - -- 您可以插入外链图片,或上传本地图片到文档中。 -- 可上传的单张图片最大20M,支持PNG、JPG格式。 -- 若有其他疑问,欢迎咨询官网在线客服。 - - - -### Welcome to the Markdown - -##### New feature! Insert pictures in your articles now! - -You can insert pictures from external links, or upload ones. - -The maximum size of the picture to upload is 20M. PNG and JPG are better. - -Have any other questions, please contact our official customer service. - - - -$$ -s_{\Delta B} = \sqrt{\frac{1}{n-1} \sum_{i=1}^{n} (\Delta B_i - \overline{\Delta B})^2} -$$ - - -1 \ No newline at end of file +## 安装环境 + +```bash +conda create -n your_env_name python=3.8 +pip install -r requirements.txt +``` + +## 启动项目 +启动此项目前必须启动 vllm-main 项目 + +```bash +bash run_app_nohub_flask_drop_rewrite_request.sh +bash run_app_nohub_search_redis.sh +``` + +## 测试 + + +{ + "texts": { + "1": "内 容 摘 要", + "2": "本文基于开放经济条件下外部均衡的调节理论,利用乘数论、吸收论、弹性论对日本近年来始终保持的超宽松货币政策实施效果分别进行理论分析和实证分析,充分说明持续的贬值并不能改善国际收支,指出了安倍经济学的缺点。", + "3": "本篇文章主要分为四个部分,首先介绍了日本无限量化宽松政策的历史背景和日本国际收支的概况,接着介绍安倍经济学,并用乘数论和吸收论对其政策效果进行理论分析;随后依据弹性论进行建立模型,并进行脉冲响应分析,通过实证分析说明日元贬值并不能很好地改善国际收支,最后进行全文总结。", + "4": "本文对一国制定经济政策有一定的实际意义,为国家内外经济平衡提供了借鉴和经验,也给经济研究者们带来了更深入的思考。", + "5": "关键词:日元贬值 外部均衡 乘数论 吸收论 弹性论 实证分析", + "6": "开放条件下外部均衡及货币政策影响分析:以日元贬值为例", + "7": "2023年12月19日,日本央行在货币政策会议宣布,将继续维持超宽松货币政策,将长期利率控制目标上限维持在1%,把短期利率维持在负0.1%的水平。" + }, + "text_type": "chapter" +} +```bash +curl -H "Content-Type: application/json" -X POST -d '{"texts": {"1": "内 容 摘 要","2": "本文基于开放经济条件下外部均衡的调节理论,利用乘数论、吸收论、弹性论对日本近年来始终保持的超宽松货币政策实施效果分别进行理论分析和实证分析,充分说明持续的贬值并不能改善国际收支,指出了安倍经济学的缺点。","3": "本篇文章主要分为四个部分,首先介绍了日本无限量化宽松政策的历史背景和日本国际收支的概况,接着介绍安倍经济学,并用乘数论和吸收论对其政策效果进行理论分析;随后依据弹性论进行建立模型,并进行脉冲响应分析,通过实证分析说明日元贬值并不能很好地改善国际收支,最后进行全文总结。"},"text_type": "chapter"}' http://101.37.83.210:14002/predict +curl -H "Content-Type: application/json" -X POST -d '{"id": "b412fc98-bdd7-11ee-8d23-d5e5c66dd02e"}' http://101.37.83.210:14003/search +``` +返回"status_code"不出现 400 则调用成功 \ No newline at end of file diff --git a/flask_drop_rewrite_request.py b/flask_drop_rewrite_request.py index 45a8458..651ae67 100644 --- a/flask_drop_rewrite_request.py +++ b/flask_drop_rewrite_request.py @@ -10,6 +10,7 @@ import time import re import logging import concurrent.futures +import socket logging.basicConfig(level=logging.DEBUG, # 控制台打印的日志级别 @@ -41,7 +42,50 @@ pantten_biaoti_0 = '^[1-9一二三四五六七八九ⅠⅡⅢⅣⅤⅥⅦⅧⅨ] pantten_biaoti_1 = '^第[一二三四五六七八九]章\s{0,}?[\u4e00-\u9fa5a-zA-Z]+' pantten_biaoti_2 = '^[0-9.]+\s{0,}?[\u4e00-\u9fa5a-zA-Z]+' pantten_biaoti_3 = '^[((][1-9一二三四五六七八九ⅠⅡⅢⅣⅤⅥⅦⅧⅨ][)_)][、.]{0,}?\s{0,}?[\u4e00-\u9fa5a-zA-Z]+' -chatgpt_url = "http://192.168.31.145:12001/predict" + +def get_host_ip(): + """ + 查询本机ip地址 + :return: ip + """ + try: + s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + s.connect(('8.8.8.8', 80)) + ip = s.getsockname()[0] + finally: + s.close() + + return ip + +chatgpt_url_predict = "http://{}:12000/predict".format(str(get_host_ip())) +chatgpt_url_search = "http://{}:12000/search".format(str(get_host_ip())) + + +def smtp_f(name): + # 在下面的代码行中使用断点来调试脚本。 + import smtplib + from email.mime.text import MIMEText + from email.header import Header + + sender = '838878981@qq.com' # 发送邮箱 + receivers = ['838878981@qq.com'] # 接收邮箱 + auth_code = "jfqtutaiwrtdbcge" # 授权码 + + message = MIMEText('降重项目出错,紧急', 'plain', 'utf-8') + message['From'] = Header("Sender<%s>" % sender) # 发送者 + message['To'] = Header("Receiver<%s>" % receivers[0]) # 接收者 + + subject = name + message['Subject'] = Header(subject, 'utf-8') + + try: + server = smtplib.SMTP_SSL('smtp.qq.com', 465) + server.login(sender, auth_code) + server.sendmail(sender, receivers, message.as_string()) + print("邮件发送成功") + server.close() + except smtplib.SMTPException: + print("Error: 无法发送邮件") class log: @@ -63,6 +107,7 @@ class log: print(dt, *args, file=f, **kwargs) + def dialog_line_parse(url, text): """ 将数据输入模型进行分析并输出结果 @@ -140,18 +185,109 @@ def chulichangju_1(text, snetence_id, chulipangban_return_list, short_num): return chulipangban_return_list +# def get_multiple_urls(urls): +# with concurrent.futures.ThreadPoolExecutor() as executor: +# future_to_url = {executor.submit(dialog_line_parse, url[1], url[2]): url for url in urls} +# +# +# results = [] +# for future in concurrent.futures.as_completed(future_to_url): +# url = future_to_url[future] +# try: +# data = future.result() +# results.append((url, data)) +# except Exception as e: +# results.append((url, f"Error: {str(e)}")) +# return results + +def request_api_chatgpt(prompt): + data = { + "texts": prompt + } + response = requests.post( + chatgpt_url_predict, + json=data, + timeout=100000 + ) + if response.status_code == 200: + return response.json() + else: + # logger.error( + # "【{}】 Failed to get a proper response from remote " + # "server. Status Code: {}. Response: {}" + # "".format(url, response.status_code, response.text) + # ) + print("Failed to get a proper response from remote " + "server. Status Code: {}. Response: {}" + "".format(response.status_code, response.text)) + return {} + + +def uuid_search(uuid): + data = { + "id": uuid + } + response = requests.post( + chatgpt_url_search, + json=data, + timeout=100000 + ) + if response.status_code == 200: + return response.json() + else: + # logger.error( + # "【{}】 Failed to get a proper response from remote " + # "server. Status Code: {}. Response: {}" + # "".format(url, response.status_code, response.text) + # ) + print("Failed to get a proper response from remote " + "server. Status Code: {}. Response: {}" + "".format(response.status_code, response.text)) + return {} + + +def uuid_search_mp(results): + + results_list = [""] * len(results) + while True: + tiaochu_bool = True + + for i in results_list: + if i == "": + tiaochu_bool = False + break + + if tiaochu_bool == True: + break + + for i in range(len(results)): + uuid = results[i]["texts"]["id"] + + result = uuid_search(uuid) + if result["code"] == 200: + results_list[i] = result["text"] + time.sleep(3) + return results_list + + def get_multiple_urls(urls): + + input_values = [] + + for i in urls: + input_values.append(i[1]) with concurrent.futures.ThreadPoolExecutor() as executor: - future_to_url = {executor.submit(dialog_line_parse, url[1], url[2]): url for url in urls} - results = [] - for future in concurrent.futures.as_completed(future_to_url): - url = future_to_url[future] - try: - data = future.result() - results.append((url, data)) - except Exception as e: - results.append((url, f"Error: {str(e)}")) - return results + # 使用map方法并发地调用worker_function + results = list(executor.map(request_api_chatgpt, input_values)) + + with concurrent.futures.ThreadPoolExecutor() as executor: + # 使用map方法并发地调用worker_function + results = list(executor.map(uuid_search_mp, [results])) + + return_list = [] + for i,j in zip(urls, results[0]): + return_list.append([i, j]) + return return_list def chulipangban_test_1(snetence_id, text): @@ -312,13 +448,12 @@ def post_sentence_ulit(sentence, text_info): # sentence = sentence[:-1] sentence = sentence.strip("\n") - if sentence[-1] != text_info[0][-1]: - if is_chinese(text_info[0][-1]) == True: - if is_chinese(sentence[-1]) != True: - sentence = sentence[:-1] - else: - sentence = sentence[:-1] + text_info[0][-1] - + # if sentence[-1] != text_info[0][-1]: + # if is_chinese(text_info[0][-1]) == True: + # if is_chinese(sentence[-1]) != True: + # sentence = sentence[:-1] + # else: + # sentence = sentence[:-1] + text_info[0][-1] else: sentence = text_info[0] return sentence @@ -335,9 +470,9 @@ def pre_sentence_ulit(sentence): if len(sentence) > 9: if sentence[-1] != "。": - text = f"User:改写下面这段文字,要求意思接近但是改动幅度比较大,字数只能多不能少,短句前后词跟上下句衔接不能有错误,并且如果结尾有标点符号,标点不能改变,如果结尾没有标点符号,不能擅自添加标点符号:\n{sentence}\nAssistant:" + text = f"User: 改写下面半这句话,要求意思接近但是改动幅度比较大,字数只能多不能少,短句前后词跟上下句衔接不能有错误:\n{sentence}\nAssistant:" else: - text = f"User:改写下面这句话,要求意思接近但是改动幅度比较大,字数只能多不能少:\n{sentence}\nAssistant:" + text = f"User: 改写下面这句话,要求意思接近但是改动幅度比较大,字数只能多不能少:\n{sentence}\nAssistant:" else: text = f"下面词不做任何变化:\n{sentence}" @@ -422,14 +557,15 @@ def main(texts: dict): input_data = [] for i in range(len(text_sentence)): - input_data.append([i, chatgpt_url, {"texts": text_sentence[i]}]) + # input_data.append([i, chatgpt_url, {"texts": text_sentence[i]}]) + input_data.append([i, text_sentence[i]]) results = get_multiple_urls(input_data) generated_text_list = [""] * len(input_data) for url, result in results: # print(f"Result for {url}: {result}") - generated_text_list[url[0]] = result["data"] + generated_text_list[url[0]] = result for i in range(len(generated_text_list)): @@ -506,6 +642,7 @@ def classify(): # 调用模型,设置最大batch_size if texts_list != []: return_text = {"texts": texts_list, "probabilities": None, "status_code": 200} else: + smtp_f("drop_weight_rewrite_increase") return_text = {"texts": texts_list, "probabilities": None, "status_code": 400} load_result_path = "./new_data_logs/{}.json".format(query_id)