数据处理代码,为了生成chatgpt数据
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 

127 lines
4.0 KiB

import time
from tqdm import tqdm
import random
import requests
import json
import threading
from threading import Thread
import redis
lock = threading.RLock()
pool = redis.ConnectionPool(host='104.244.90.248', port=63179, max_connections=50, db=10, password='Zhicheng123*')
redis_ = redis.Redis(connection_pool=pool, decode_responses=True)
with open("api_key.txt", "r",) as f:
a = f.read()
a = a.split("\n")
redis_key_name_openaikey_list = "openaikey_list"
redis_zirenwu = "redis_zirenwu"
api_key_list = []
for i in a:
api_key_list.append(str(i.split("----")[-1]))
for i in api_key_list:
redis_.rpush(redis_key_name_openaikey_list, i)
lock = threading.RLock()
zhuyaoneirong_prompt = "“《基于单片机的多功能充电控制器设计》:研制一款基于单片机的多功能充电控制器,包括硬件和软件设计。最终成果是" \
"研制一台可对多种类型蓄电池充电的控制器实物,并以一个特定蓄电池充电为例,验证所设计控制器的可用性”," \
"以上面话术为标准。根据论文题目为《{}》生成这种格式的一段话,要求200个字以内",
with open("./data/题目2.txt", encoding="utf-8") as f:
text = f.read()
text_list = text.split("\n")
random.shuffle(text_list)
text_list = text_list[:6000]
title_list = []
for i in text_list:
title_list.append(i.split("@@@@@")[0])
random.shuffle(title_list)
print(len(title_list))
zirenwu_list = []
for title in title_list:
zirenwu_list.append(("zhuyaoneirong_prompt", str(zhuyaoneirong_prompt).format(title)))
for i in zirenwu_list:
redis_.rpush(redis_zirenwu, str(i))
def request_api_chatgpt(api_key, task_type, prompt):
try:
OPENAI_API_KEY = api_key
url = "https://api.openai.com/v1/chat/completions"
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {OPENAI_API_KEY}"
}
data = {
"model": "gpt-3.5-turbo",
"messages": [
{"role": "user", "content": prompt},
],
"temperature": 0.5
}
response = requests.post(url,
headers=headers,
data=json.dumps(data),
timeout=240)
res = response.json()
text = res["choices"][0]["message"]["content"]
lock.acquire()
# api_key_list.append(api_key)
redis_.rpush(redis_key_name_openaikey_list, api_key)
with open("/home/majiahui/mulu_ner/data/paper_prompt_title_3/title_{}_data.txt".format(task_type), mode="a") as f:
f.write(prompt)
f.write("**************")
f.write(text)
f.write("\n")
lock.release()
except:
print("task_type_bad", task_type)
print("api_key_bad", api_key)
time.sleep(5)
lock.acquire()
redis_.rpush(redis_key_name_openaikey_list, api_key)
redis_.rpush(redis_zirenwu, str((task_type, prompt)))
lock.release()
if __name__ == '__main__':
while True:
if redis_.llen(redis_zirenwu) == 0:
time.sleep(1)
continue
elif redis_.llen(redis_zirenwu) != 0 and redis_.llen(redis_key_name_openaikey_list) != 0:
lock.acquire()
api_key = redis_.lpop(redis_key_name_openaikey_list)
api_key = api_key.decode('UTF-8')
dan_zirenwu = redis_.lpop(redis_zirenwu)
dan_zirenwu = dan_zirenwu.decode('UTF-8')
lock.release()
# dan_zirenwu = zirenwu_list.pop(0)
dan_zirenwu = eval(dan_zirenwu)
task_type, prompt = dan_zirenwu[0], dan_zirenwu[1]
t = Thread(target=request_api_chatgpt, args=(api_key, task_type, prompt))
t.start()
elif redis_.llen(redis_key_name_openaikey_list) == 0:
time.sleep(1)
continue
else:
time.sleep(1)
continue