数据处理代码,为了生成chatgpt数据
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

191 lines
6.3 KiB

2 years ago
import time
from tqdm import tqdm
import random
import requests
import json
import threading
from threading import Thread
import redis
lock = threading.RLock()
pool = redis.ConnectionPool(host='104.244.90.248', port=63179, max_connections=50, db=10, password='Zhicheng123*')
redis_ = redis.Redis(connection_pool=pool, decode_responses=True)
with open("api_key.txt", "r",) as f:
a = f.read()
a = a.split("\n")
redis_key_name_openaikey_list = "openaikey_list"
redis_zirenwu = "redis_zirenwu"
api_key_list = []
for i in a:
api_key_list.append(str(i.split("----")[-1]))
for i in api_key_list:
redis_.rpush(redis_key_name_openaikey_list, i)
lock = threading.RLock()
path_output = "paper_prompt_title_6"
2 years ago
prompt_dict = {
"mulu_prompt": "为论文题目“{}”生成目录,要求只有一级标题和二级标题,一级标题使用中文数字 例如一、xxx;二级标题使用阿拉伯数字 例如1.1 xxx;一级标题不少于7个;每个一级标题至少包含3个二级标题",
"beijing_prompt": "以“{}”为论文题目,写一段题目来源的背景,要求字数在200字以内",
"zongjie_prompt": "以“{}”为论文题目,写一个论文简短总结,要求在300字以内",
"zongshu_prompt": "请写出以《{}》为课题的国内外研究状况综述,字数在800字左右",
"yanjiubeijingyiyi_prompt": "请分别写出以《{}》为课题的研究背景和意义,字数不少于1000字",
"jianjie_prompt": "请帮我生成《{}》为题目的研究内容,包括整体简介和分最少三个方面总结"
}
with open("./data/题目3.txt", encoding="utf-8") as f:
text = f.read()
text_list = text.split("\n")
title_list = []
for i in text_list:
title_list.append(i.split("@@@@@")[0])
random.shuffle(title_list)
print(len(title_list))
zirenwu_list = []
for title in title_list:
for prompt in prompt_dict:
zirenwu_list.append((prompt, str(prompt_dict[prompt]).format(title)))
for i in zirenwu_list:
redis_.rpush(redis_zirenwu, str(i))
# def request_api_chatgpt(api_key, prompt):
# OPENAI_API_KEY = api_key
# url = "https://api.openai.com/v1/chat/completions"
# headers = {
# "Content-Type": "application/json",
# "Authorization": f"Bearer {OPENAI_API_KEY}"
# }
# data = {
# "model": "gpt-3.5-turbo",
# "messages": [
# {"role": "user", "content": prompt},
# ],
# "temperature": 0.5
# }
# response = requests.post(url,
# headers=headers,
# data=json.dumps(data),
# timeout=240)
# print("response", response)
#
# return response
#
# def task(api_key, title):
# try:
# for pormpt_dan in prompt_dict:
# name = pormpt_dan.split("_")[0]
# print("pormpt_dan", pormpt_dan)
# print("prompt_dict", prompt_dict)
# prompt = str(prompt_dict[pormpt_dan]).format(title)
# print("api_key", api_key)
# print("prompt", prompt)
# response = request_api_chatgpt(api_key, prompt)
# res = response.json()
# text = res["choices"][0]["message"]["content"]
# lock.acquire()
#
# with open("/home/majiahui/mulu_ner/data/paper_prompt_title_new/title_{}_data.txt".format(name), mode="a") as f:
# f.write(title)
# f.write("\n**********************************************\n")
# f.write(text)
# f.write("\n++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n")
# lock.release()
# time.sleep(2)
# lock.acquire()
# api_key_list.append(api_key)
# lock.release()
# print(1)
# except:
# print()
# time.sleep(5)
# lock.acquire()
# api_key_list.append(api_key)
# lock.release()
# print(2)
def request_api_chatgpt(api_key, task_type, prompt):
global api_key_list
global zirenwu_list
try:
OPENAI_API_KEY = api_key
url = "https://api.openai.com/v1/chat/completions"
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {OPENAI_API_KEY}"
}
data = {
"model": "gpt-3.5-turbo",
"messages": [
{"role": "user", "content": prompt},
],
"temperature": 0.5
}
response = requests.post(url,
headers=headers,
data=json.dumps(data),
timeout=240)
res = response.json()
text = res["choices"][0]["message"]["content"]
lock.acquire()
# api_key_list.append(api_key)
redis_.rpush(redis_key_name_openaikey_list, api_key)
with open("/home/majiahui/mulu_ner/data/{}/title_{}_data.txt".format(path_output, task_type), mode="a") as f:
2 years ago
f.write(prompt)
f.write("*" * 20)
2 years ago
f.write(text)
f.write("@" * 20)
2 years ago
lock.release()
except:
print("task_type_bad", task_type)
print("api_key_bad", api_key)
time.sleep(5)
lock.acquire()
redis_.rpush(redis_key_name_openaikey_list, api_key)
redis_.rpush(redis_zirenwu, str((task_type, prompt)))
lock.release()
if __name__ == '__main__':
while True:
if redis_.llen(redis_zirenwu) == 0:
time.sleep(1)
continue
elif redis_.llen(redis_zirenwu) != 0 and redis_.llen(redis_key_name_openaikey_list) != 0:
lock.acquire()
api_key = redis_.lpop(redis_key_name_openaikey_list)
api_key = api_key.decode('UTF-8')
dan_zirenwu = redis_.lpop(redis_zirenwu)
dan_zirenwu = dan_zirenwu.decode('UTF-8')
lock.release()
# dan_zirenwu = zirenwu_list.pop(0)
dan_zirenwu = eval(dan_zirenwu)
task_type, prompt = dan_zirenwu[0], dan_zirenwu[1]
t = Thread(target=request_api_chatgpt, args=(api_key, task_type, prompt))
t.start()
elif redis_.llen(redis_key_name_openaikey_list) == 0:
time.sleep(1)
continue
else:
time.sleep(1)
continue