数据处理代码,为了生成chatgpt数据
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 

188 lines
6.2 KiB

import time
from tqdm import tqdm
import random
import requests
import json
import threading
from threading import Thread
import redis
lock = threading.RLock()
pool = redis.ConnectionPool(host='104.244.90.248', port=63179, max_connections=50, db=10, password='Zhicheng123*')
redis_ = redis.Redis(connection_pool=pool, decode_responses=True)
with open("api_key.txt", "r",) as f:
a = f.read()
a = a.split("\n")
redis_key_name_openaikey_list = "openaikey_list"
redis_zirenwu = "redis_zirenwu"
api_key_list = []
for i in a:
api_key_list.append(str(i.split("----")[-1]))
for i in api_key_list:
redis_.rpush(redis_key_name_openaikey_list, i)
lock = threading.RLock()
prompt_dict = {
"mulu_prompt": "为论文题目“{}”生成目录,要求只有一级标题和二级标题,一级标题使用中文数字 例如一、xxx;二级标题使用阿拉伯数字 例如1.1 xxx;一级标题不少于7个;每个一级标题至少包含3个二级标题",
"beijing_prompt": "以“{}”为论文题目,写一段题目来源的背景,要求字数在200字以内",
"zongjie_prompt": "以“{}”为论文题目,写一个论文简短总结,要求在300字以内",
"zongshu_prompt": "请写出以《{}》为课题的国内外研究状况综述,字数在800字左右",
"yanjiubeijingyiyi_prompt": "请分别写出以《{}》为课题的研究背景和意义,字数不少于1000字",
"jianjie_prompt": "请帮我生成《{}》为题目的研究内容,包括整体简介和分最少三个方面总结"
}
with open("./data/题目3.txt", encoding="utf-8") as f:
text = f.read()
text_list = text.split("\n")
title_list = []
for i in text_list:
title_list.append(i.split("@@@@@")[0])
random.shuffle(title_list)
print(len(title_list))
zirenwu_list = []
for title in title_list:
for prompt in prompt_dict:
zirenwu_list.append((prompt, str(prompt_dict[prompt]).format(title)))
for i in zirenwu_list:
redis_.rpush(redis_zirenwu, str(i))
# def request_api_chatgpt(api_key, prompt):
# OPENAI_API_KEY = api_key
# url = "https://api.openai.com/v1/chat/completions"
# headers = {
# "Content-Type": "application/json",
# "Authorization": f"Bearer {OPENAI_API_KEY}"
# }
# data = {
# "model": "gpt-3.5-turbo",
# "messages": [
# {"role": "user", "content": prompt},
# ],
# "temperature": 0.5
# }
# response = requests.post(url,
# headers=headers,
# data=json.dumps(data),
# timeout=240)
# print("response", response)
#
# return response
#
# def task(api_key, title):
# try:
# for pormpt_dan in prompt_dict:
# name = pormpt_dan.split("_")[0]
# print("pormpt_dan", pormpt_dan)
# print("prompt_dict", prompt_dict)
# prompt = str(prompt_dict[pormpt_dan]).format(title)
# print("api_key", api_key)
# print("prompt", prompt)
# response = request_api_chatgpt(api_key, prompt)
# res = response.json()
# text = res["choices"][0]["message"]["content"]
# lock.acquire()
#
# with open("/home/majiahui/mulu_ner/data/paper_prompt_title_new/title_{}_data.txt".format(name), mode="a") as f:
# f.write(title)
# f.write("\n**********************************************\n")
# f.write(text)
# f.write("\n++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n")
# lock.release()
# time.sleep(2)
# lock.acquire()
# api_key_list.append(api_key)
# lock.release()
# print(1)
# except:
# print()
# time.sleep(5)
# lock.acquire()
# api_key_list.append(api_key)
# lock.release()
# print(2)
def request_api_chatgpt(api_key, task_type, prompt):
global api_key_list
global zirenwu_list
try:
OPENAI_API_KEY = api_key
url = "https://api.openai.com/v1/chat/completions"
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {OPENAI_API_KEY}"
}
data = {
"model": "gpt-3.5-turbo",
"messages": [
{"role": "user", "content": prompt},
],
"temperature": 0.5
}
response = requests.post(url,
headers=headers,
data=json.dumps(data),
timeout=240)
res = response.json()
text = res["choices"][0]["message"]["content"]
lock.acquire()
# api_key_list.append(api_key)
redis_.rpush(redis_key_name_openaikey_list, api_key)
with open("/home/majiahui/mulu_ner/data/paper_prompt_title_3/title_{}_data.txt".format(task_type), mode="a") as f:
f.write(prompt)
f.write("**************")
f.write(text)
f.write("\n")
lock.release()
except:
print("task_type_bad", task_type)
print("api_key_bad", api_key)
time.sleep(5)
lock.acquire()
redis_.rpush(redis_key_name_openaikey_list, api_key)
redis_.rpush(redis_zirenwu, str((task_type, prompt)))
lock.release()
if __name__ == '__main__':
while True:
if redis_.llen(redis_zirenwu) == 0:
time.sleep(1)
continue
elif redis_.llen(redis_zirenwu) != 0 and redis_.llen(redis_key_name_openaikey_list) != 0:
lock.acquire()
api_key = redis_.lpop(redis_key_name_openaikey_list)
api_key = api_key.decode('UTF-8')
dan_zirenwu = redis_.lpop(redis_zirenwu)
dan_zirenwu = dan_zirenwu.decode('UTF-8')
lock.release()
# dan_zirenwu = zirenwu_list.pop(0)
dan_zirenwu = eval(dan_zirenwu)
task_type, prompt = dan_zirenwu[0], dan_zirenwu[1]
t = Thread(target=request_api_chatgpt, args=(api_key, task_type, prompt))
t.start()
elif redis_.llen(redis_key_name_openaikey_list) == 0:
time.sleep(1)
continue
else:
time.sleep(1)
continue