数据处理代码,为了生成chatgpt数据
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 

126 lines
3.6 KiB

import time
import os
from tqdm import tqdm
import random
import requests
import json
import threading
from threading import Thread
import redis
import uuid
lock = threading.RLock()
pool = redis.ConnectionPool(host='104.244.90.248', port=63179, max_connections=50, db=10, password='Zhicheng123*')
redis_ = redis.Redis(connection_pool=pool, decode_responses=True)
with open("api_key.txt", "r",) as f:
a = f.read()
a = a.split("\n")
redis_key_name_openaikey_bad_list = "openaikey_bad_list"
redis_key_name_openaikey_list = "openaikey_list"
redis_zirenwu = "redis_zirenwu"
api_key_list = []
for i in a:
api_key_list.append(str(i.split("----")[-1]))
for i in api_key_list:
redis_.rpush(redis_key_name_openaikey_list, i)
lock = threading.RLock()
file = 'data/title_mulu_to_/small_title_prompt_2_10000_40000.txt'
zirenwu_list = []
with open(file, encoding="utf-8") as f:
type_prompt = file.split("/")[-1].split(".")[0]
texts = f.read()
texts_list = texts.split("\n")
for i in texts_list:
zirenwu_list.append((i, type_prompt))
import random
random.shuffle(zirenwu_list)
for i in zirenwu_list:
redis_.rpush(redis_zirenwu, str(i))
def request_api_chatgpt(api_key, prompt, type_prompt):
id_ = uuid.uuid1()
try:
OPENAI_API_KEY = api_key
url = "https://api.openai.com/v1/chat/completions"
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {OPENAI_API_KEY}"
}
data = {
"model": "gpt-3.5-turbo",
"messages": [
{"role": "user", "content": prompt},
],
"temperature": 0.5
}
response = requests.post(url,
headers=headers,
data=json.dumps(data),
timeout=240)
res = response.json()
text = res["choices"][0]["message"]["content"]
lock.acquire()
# api_key_list.append(api_key)
redis_.rpush(redis_key_name_openaikey_list, api_key)
path = f"/home/majiahui/mulu_ner/data/paper_prompt_title_3_2_10000_40000/{type_prompt}/"
bool_ = os.path.exists(path)
if bool_ == False:
os.makedirs(path)
with open(path + f"/{id_}_data.txt", mode="w") as f:
f.write("@@@@@@@@@@@@@@@@@@")
f.write(prompt)
f.write("**************")
f.write(text)
f.write("\n")
f.close()
lock.release()
time.sleep(5)
except:
time.sleep(20)
lock.acquire()
redis_.rpush(redis_key_name_openaikey_list, api_key)
redis_.rpush(redis_zirenwu, str((type_prompt, prompt)))
lock.release()
if __name__ == '__main__':
while True:
if redis_.llen(redis_zirenwu) == 0:
time.sleep(1)
continue
elif redis_.llen(redis_zirenwu) != 0 and redis_.llen(redis_key_name_openaikey_list) != 0:
lock.acquire()
api_key = redis_.lpop(redis_key_name_openaikey_list)
api_key = api_key.decode('UTF-8')
dan_zirenwu = redis_.lpop(redis_zirenwu)
dan_zirenwu = dan_zirenwu.decode('UTF-8')
lock.release()
# dan_zirenwu = zirenwu_list.pop(0)
dan_zirenwu = eval(dan_zirenwu)
prompt, type_prompt = dan_zirenwu[0], dan_zirenwu[1]
t = Thread(target=request_api_chatgpt, args=(api_key, prompt, type_prompt))
t.start()
elif redis_.llen(redis_key_name_openaikey_list) == 0:
time.sleep(1)
continue
else:
time.sleep(1)
continue