数据处理代码,为了生成chatgpt数据
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

126 lines
3.6 KiB

2 years ago
import time
import os
from tqdm import tqdm
import random
import requests
import json
import threading
from threading import Thread
import redis
import uuid
lock = threading.RLock()
pool = redis.ConnectionPool(host='104.244.90.248', port=63179, max_connections=50, db=10, password='Zhicheng123*')
redis_ = redis.Redis(connection_pool=pool, decode_responses=True)
with open("api_key.txt", "r",) as f:
a = f.read()
a = a.split("\n")
redis_key_name_openaikey_bad_list = "openaikey_bad_list"
redis_key_name_openaikey_list = "openaikey_list"
redis_zirenwu = "redis_zirenwu"
api_key_list = []
for i in a:
api_key_list.append(str(i.split("----")[-1]))
for i in api_key_list:
redis_.rpush(redis_key_name_openaikey_list, i)
lock = threading.RLock()
file = r'./data/title_mulu_to_/zhaiyao_prompt.txt'
zirenwu_list = []
with open(file, encoding="utf-8") as f:
type_prompt = file.split("/")[-1].split(".")[0]
texts = f.readlines()
for i in texts:
zirenwu_list.append((i, type_prompt))
import random
random.shuffle(zirenwu_list)
for i in zirenwu_list:
redis_.rpush(redis_zirenwu, str(i))
def request_api_chatgpt(api_key, prompt, type_prompt):
id_ = uuid.uuid1()
try:
OPENAI_API_KEY = api_key
url = "https://api.openai.com/v1/chat/completions"
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {OPENAI_API_KEY}"
}
data = {
"model": "gpt-3.5-turbo",
"messages": [
{"role": "user", "content": prompt},
],
"temperature": 0.5
}
response = requests.post(url,
headers=headers,
data=json.dumps(data),
timeout=240)
res = response.json()
text = res["choices"][0]["message"]["content"]
lock.acquire()
# api_key_list.append(api_key)
redis_.rpush(redis_key_name_openaikey_list, api_key)
path = f"/home/majiahui/mulu_ner/data/paper_prompt_title_3_2/{type_prompt}/"
bool_ = os.path.exists(path)
if bool_ == False:
os.makedirs(path)
with open(path + f"/{id_}_data.txt", mode="w") as f:
f.write("@@@@@@@@@@@@@@@@@@")
f.write(prompt)
f.write("**************")
f.write(text)
f.write("\n")
f.close()
lock.release()
time.sleep(5)
except:
time.sleep(20)
lock.acquire()
redis_.rpush(redis_key_name_openaikey_list, api_key)
redis_.rpush(redis_zirenwu, str((type_prompt, prompt)))
lock.release()
if __name__ == '__main__':
while True:
if redis_.llen(redis_zirenwu) == 0:
time.sleep(1)
continue
elif redis_.llen(redis_zirenwu) != 0 and redis_.llen(redis_key_name_openaikey_list) != 0:
lock.acquire()
api_key = redis_.lpop(redis_key_name_openaikey_list)
api_key = api_key.decode('UTF-8')
dan_zirenwu = redis_.lpop(redis_zirenwu)
dan_zirenwu = dan_zirenwu.decode('UTF-8')
lock.release()
# dan_zirenwu = zirenwu_list.pop(0)
dan_zirenwu = eval(dan_zirenwu)
prompt, type_prompt = dan_zirenwu[0], dan_zirenwu[1]
t = Thread(target=request_api_chatgpt, args=(api_key, prompt, type_prompt))
t.start()
elif redis_.llen(redis_key_name_openaikey_list) == 0:
time.sleep(1)
continue
else:
time.sleep(1)
continue