generate_ppt/ppt_api.py

from docx import Document
import platform
import os
import concurrent.futures
os.environ['ALL_PROXY'] = 'http://127.0.0.1:10809'
import docx
import json
import re
from docx.document import Document
from docx.oxml.table import CT_Tbl
from docx.oxml.text.paragraph import CT_P
from docx.table import _Cell, Table
from docx.text.paragraph import Paragraph
import requests
import random
import time
from flask import Flask, render_template, request, redirect, url_for, jsonify
from werkzeug.utils import secure_filename
app = Flask(__name__)


# 上传文件存储目录
UPLOAD_FOLDER = 'uploads'
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER

# 正则表达式
RE_CHINA_NUMS = "[1-9].(.*)"
# 允许的文件类型
ALLOWED_EXTENSIONS = {'docx'}


pantten_mulu = '目录(.*?)致谢'
pantten_xiaobiaoti = "{}(.*?){}"
pantten_yijibiaoti = '^([一二三四五六七八九])、(.*)'
pantten_erjibiaoti = '^[0-9](\.[0-9]\d*){1}\s{1,}?.*$'
pantten_content_tiaoshu = '[0-9]\.{1}\s{0,}?(.*)'

pantten_yijibiaoti_content = '^[一二三四五六七八九]、(.*)'
pantten_erjibiaoti_content = '^[0-9]\.[0-9]\s{1,}?(.*)$'

prompt_two_title_min_max = "为论文题目“{}”生成中文目录，要求只有一级标题，二级标题，一级标题使用中文数字 例如一、xxx；二级标题使用阿拉伯数字 例如1.1 xxx；一级标题生成{}个；每个一级标题包含{}-{}个二级标题"
prompt_two_title_not_min_max = "为论文题目“{}”生成中文目录，要求只有一级标题，二级标题，一级标题使用中文数字 例如一、xxx；二级标题使用阿拉伯数字 例如1.1 xxx；一级标题生成{}个；每个一级标题包含{}个二级标题"

pantten_title = "为论文题目“(.*?)”生成中文目录"
pantten_xiaobiaoti_geshu = "每个一级标题包含(.*?)个"
pantten_dabiaoti_geshu = "一级标题生成(.*?)个"

mulusuojian = "请问把以下目录缩减成只有4个一级标题作为ppt的题目，请问留下原始目录中的哪4个一级标题最合适，一级标题必须在原始目录中\n{}\n"

self_api = "http://192.168.31.149:12004/predict"
gpt_api = "https://api.openai.com/v1/chat/completions"


class log:
    def __init__(self):
        pass

    def log(*args, **kwargs):
        format = '%Y/%m/%d-%H:%M:%S'
        format_h = '%Y-%m-%d'
        value = time.localtime(int(time.time()))
        dt = time.strftime(format, value)
        dt_log_file = time.strftime(format_h, value)
        log_file = 'log_file/access-%s' % dt_log_file + ".log"
        if not os.path.exists(log_file):
            with open(os.path.join(log_file), 'w', encoding='utf-8') as f:
                print(dt, *args, file=f, **kwargs)
        else:
            with open(os.path.join(log_file), 'a+', encoding='utf-8') as f:
                print(dt, *args, file=f, **kwargs)


def allowed_file(filename):
    return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS


def iter_block_items(parent):
    """
    Yield each paragraph and table child within *parent*, in document order.
    Each returned value is an instance of either Table or Paragraph. *parent*
    would most commonly be a reference to a main Document object, but
    also works for a _Cell object, which itself can contain paragraphs and tables.
    """
    if isinstance(parent, Document):
        parent_elm = parent.element.body
    elif isinstance(parent, _Cell):
        parent_elm = parent._tc
    else:
        raise ValueError("something's not right")

    for child in parent_elm.iterchildren():
        if isinstance(child, CT_P):
            yield Paragraph(child, parent)
        elif isinstance(child, CT_Tbl):
            yield Table(child, parent)


def read_table(table):
    return [[cell.text for cell in row.cells] for row in table.rows]


def read_word(word_path):
    paper_text = []
    doc = docx.Document(word_path)
    for block in iter_block_items(doc):
        if isinstance(block, Paragraph):
            paper_text.append(block.text)
        elif isinstance(block, Table):
            table_list = read_table(block)
            table_list_new = []
            for row in table_list:
                table_list_new.append("<td>" + "</td>\n<td>".join(row) + "</td>")
            table_str = "\n<tr>\n" + "\n</tr>\n<tr>\n".join(table_list_new) + "\n</tr>\n"
            table_str = "<tbStart>\n<table>" + table_str + "</table>\n\n<tbEnd>"
            paper_text.append(table_str)
    paper_text = "\n".join(paper_text)
    return paper_text


def getText(fileName):
    '''

    '''
    doc = docx.Document(fileName)
    TextList = []
    for paragraph in doc.paragraphs:
        TextList.append(paragraph.text)

    return '\n'.join(TextList)


def request_selfmodel_api(prompt):
    url = "http://192.168.31.149:12004/predict"
    data = {
        "model": "gpt-4-turbo-preview",
        "messages": [
            {"role": "user", "content": prompt}
        ],
        "top_p": 0.7,
        "temperature": 0.95
    }
    response = requests.post(
        url,
        json=data,
        timeout=100000
    )

    return response.json()


def request_chatgpt_api(prompt):
    '''

    '''
    OPENAI_API_KEY = "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"
    url = "https://api.openai.com/v1/chat/completions"
    # url = "https://one.aiskt.com"
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {OPENAI_API_KEY}"
    }
    data = {
        "model": "gpt-4-turbo-preview",
        "messages": [
            {"role": "user", "content": prompt}
        ],
        "top_p": 0.7,
        "temperature": 0.95
    }
    response = requests.post(url,
                             headers=headers,
                             data=json.dumps(data),
                             timeout=1200)

    return response.json()


def yanzhengyijibiaoti(mulu, res):
    '''
    判断生成的大标题是否可用
    :param mulu:
    :param res:
    :return:
    '''

    mulu_list = str(mulu).split("\n")

    dabiaoti_list = []
    dabiaoti_res_list = []
    for i in mulu_list:
        res_re = re.findall(pantten_yijibiaoti, i)
        if res_re != []:
            dabiaoti_list.append(re.findall(pantten_yijibiaoti, i)[0])
    for i in dabiaoti_list:
        if i[1].strip() in res:
            dabiaoti_res_list.append("、".join(i))

    if len(dabiaoti_res_list) == 4:
        return_bool = True
    else:
        return_bool = False

    return return_bool, dabiaoti_res_list


def get_document_structure(file_path):
    doc = docx.Document(file_path)
    structure = []
    for paragraph in doc.paragraphs:
        style = paragraph.style
        if style.name.startswith("Heading"):
            level = int(style.name[7:])
            text = paragraph.text
            structure.append((level, text))
    return structure

def print_document_structure(structure):
    for level, text in structure:
        print(f"{'    ' * (level - 1)}- {text}")

def catalogue_extract(structure):

    catalogue_list = []
    for level, text in structure[:-1]:
        text = str(text).strip(" ").strip("\n")
        erji_p = re.findall(pantten_erjibiaoti, text)
        yiji_p = re.findall(pantten_yijibiaoti, text)
        if erji_p == [] and yiji_p == []:
            continue
        catalogue_list.append(str(text).strip(" ").strip("\n"))
    catalogue_str = "\n".join(catalogue_list)
    return catalogue_list, catalogue_str


def main(path):

    # 判断系统
    system = platform.system()
    if system == 'Linux':
        file_name = path.split("/")[-1].split(".")[0]
    else:
        file_name = path.split("\\")[-1].split(".")[0]

    text_1 = json.dumps(read_word(path), ensure_ascii=False)
    print(text_1)

    # mulu_str = re.findall(pantten_mulu, text_1)[0]
    # print(mulu_str)
    # mulu_list_xuhao = str(mulu_str).split("\\n")
    #
    #
    # mulu_list = []
    # for i in mulu_list_xuhao:
    #     if i != "":
    #         mulu_list.append(i.split("\\t")[0])
    #
    # mulu_list.append("致谢")
    #
    # print(mulu_list)

    document_structure = get_document_structure(path)
    mulu_list, catalogue_str = catalogue_extract(document_structure)

    text = text_1.split("目录")[-1].strip("\\n")

    yijibiaoti = ""
    paper_content = {}
    for i in range(len(mulu_list) - 1):
        title = mulu_list[i].strip(" ").strip("\\n")
        print(mulu_list[i])
        print(mulu_list[i + 1])
        print(re.findall(pantten_xiaobiaoti.format(mulu_list[i], mulu_list[i + 1]), text))
        content = str(re.findall(pantten_xiaobiaoti.format(mulu_list[i], mulu_list[i + 1]), text)[0]).strip(
            " ").strip("\\n")
        # print(title)
        # print(content)

        yijibiaoti_res = re.findall(pantten_yijibiaoti, title)
        erjibiaoti_res = re.findall(pantten_erjibiaoti, title)
        if yijibiaoti_res != []:
            # title = "、".join([yijibiaoti_res[0][1], yijibiaoti_res[0][1].strip()])
            paper_content[title] = {}
            yijibiaoti = title
            continue

        elif erjibiaoti_res != []:
            paper_content[yijibiaoti][title] = content.replace("\\n", "\n")

        else:
            paper_content[yijibiaoti][title] += "\n".join(title + content)

    while True:
        mulu_str = "\n".join(mulu_list[:-1])
        prompt = f'请问把以下目录缩减成只有4个一级标题作为ppt的题目，请问留下原始目录中的哪4个一级标题最合适，一级标题必须在原始目录中\n{mulu_str}\n'

        try:
            res = request_chatgpt_api(prompt)['choices'][0]['message']['content']
        except:
            continue

        # res = '''根据您提供的目录内容，如果要将其缩减为只包含4个一级标题的PPT题目，建议选择以下四个一级标题，因为它们分别代表了研究的引入、理论框架、实际应用与实践，以及未来展望，从而形成了一个完整的研究过程和内容框架：
        #
        # 1. 一、绪论
        # 2. 二、电影网站设计的基本概念
        # 3. 三、Python在电影网站设计中的应用
        # 4. 五、电影网站设计的实践与展望
        #
        # 这样的选择既涵盖了研究的背景、目的与意义（绪论），也包括了研究的理论基础（电影网站设计的基本概念），以及研究的实际操作和技术实现（Python在电影网站设计中的应用），最后还有对项目实践经验的总结和对未来发展的展望（电影网站设计的实践与展望）。这四个部分共同构成了一个完整的研究报告或项目介绍的框架，能够全面展示电影网站设计项目的各个方面。
        # '''

        shaixuan_bool, dabiaoti_res_list = yanzhengyijibiaoti("\n".join(mulu_list), res.replace("\n", "\\n"))
        if shaixuan_bool == True:
            break


    index_zahnweifu = 0
    zhanweifu = []
    content_1 = []
    catalogue = []
    for yijibiaoti in dabiaoti_res_list:
        content_2 = []
        yijibiaoti_content = re.findall(pantten_yijibiaoti_content, yijibiaoti)[0]
        catalogue.append(yijibiaoti_content)
        for erjibiaoti in paper_content[yijibiaoti]:
            num = random.randint(2, 6)
            content = paper_content[yijibiaoti][erjibiaoti]
            # res = request_selfmodel_api(
            #     f'任务：生成段落主要内容\n请对以下内容进行提取信息，只需要提取{str(num)}条主要内容，使用条数罗列下面这段话的主要信息，例如1. xxx\n2.xxx \n' + content)[
            #     'choices'][0]['message']['content']

            zhanweifu.append(f'任务：生成段落主要内容\n请对以下内容进行提取信息，只需要提取{str(num)}条主要内容，使用条数罗列下面这段话的主要信息，例如1. xxx\n2.xxx \n' + content)

            content_2.append({
                "title_small": re.findall(pantten_erjibiaoti_content, erjibiaoti)[0],
                "content_3": index_zahnweifu
            })
            index_zahnweifu += 1
        content_1.append({
            "title_big": yijibiaoti_content,
            "content_2": content_2
        })

    with concurrent.futures.ThreadPoolExecutor(100) as executor:
        results = executor.map(request_selfmodel_api, zhanweifu)

    zhanweifu = []
    for result in results:
        res = result['choices'][0]['message']['content']

        tiaoshu_list = str(res).split("\n")

        tiaoshu_list_new = []
        for dantiao in tiaoshu_list:
            tiaoshu_list_new.append(re.findall(pantten_content_tiaoshu, dantiao)[0].strip())
        zhanweifu.append(tiaoshu_list_new)

    content_1_new = []

    for yijibiaoti_content in content_1:
        content_2_new = []
        title_big = yijibiaoti_content["title_big"]
        for erjibiaoti_content in yijibiaoti_content["content_2"]:
            title_small = erjibiaoti_content["title_small"]
            content_3 = zhanweifu[erjibiaoti_content["content_3"]]
            content_2_new.append({
                "title_small": title_small,
                "content_3": content_3
            })
        content_1_new.append({
            "title_big": title_big,
            "content_2": content_2_new
        })


    data_new = {
        "title": file_name,
        "catalogue": catalogue,
        "content_1": content_1_new
    }

    # with open("data/ceshi.json", "w", encoding="utf-8") as f:
    #     f.write(json.dumps(data_new, ensure_ascii=False, indent=2))

    return data_new

@app.route('/predict', methods=['POST'])
def upload_file():

    if 'file' not in request.files:
        return "1"

    file = request.files.get('file')

    if file and allowed_file(file.filename):
        path = os.path.join(app.config['UPLOAD_FOLDER'], file.filename)
        print(path)
        file.save(path)
        # file.save(file.filename)

        result = main(path)
        return_text = {"texts": result, "probabilities": None, "status_code": 200}

        log.log('start at',
                'filename:{}, result:{}'.format(
                    path, return_text))
        return jsonify(return_text)
    else:
        return "不允许的文件类型"

if __name__ == "__main__":
    app.run(host="0.0.0.0", port=21000, threaded=True)
首次提交 2 years ago			`from docx import Document`
修改成并行请求大模型，效率大幅度提高 2 years ago			`import platform`
首次提交 2 years ago			`import os`
修改成并行请求大模型，效率大幅度提高 2 years ago			`import concurrent.futures`
首次提交 2 years ago			`os.environ['ALL_PROXY'] = 'http://127.0.0.1:10809'`
			`import docx`
			`import json`
			`import re`
			`from docx.document import Document`
			`from docx.oxml.table import CT_Tbl`
			`from docx.oxml.text.paragraph import CT_P`
			`from docx.table import _Cell, Table`
			`from docx.text.paragraph import Paragraph`
			`import requests`
			`import random`
			`import time`
			`from flask import Flask, render_template, request, redirect, url_for, jsonify`
			`from werkzeug.utils import secure_filename`
			`app = Flask(__name__)`


			`# 上传文件存储目录`
			`UPLOAD_FOLDER = 'uploads'`
			`app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER`

			`# 正则表达式`
			`RE_CHINA_NUMS = "[1-9].(.*)"`
			`# 允许的文件类型`
			`ALLOWED_EXTENSIONS = {'docx'}`


			`pantten_mulu = '目录(.*?)致谢'`
			`pantten_xiaobiaoti = "{}(.*?){}"`
			`pantten_yijibiaoti = '^([一二三四五六七八九])、(.*)'`
			`pantten_erjibiaoti = '^[0-9](\.[0-9]\d){1}\s{1,}?.$'`
			`pantten_content_tiaoshu = '[0-9]\.{1}\s{0,}?(.*)'`

			`pantten_yijibiaoti_content = '^[一二三四五六七八九]、(.*)'`
			`pantten_erjibiaoti_content = '^[0-9]\.[0-9]\s{1,}?(.*)$'`

			`prompt_two_title_min_max = "为论文题目“{}”生成中文目录，要求只有一级标题，二级标题，一级标题使用中文数字例如一、xxx；二级标题使用阿拉伯数字例如1.1 xxx；一级标题生成{}个；每个一级标题包含{}-{}个二级标题"`
			`prompt_two_title_not_min_max = "为论文题目“{}”生成中文目录，要求只有一级标题，二级标题，一级标题使用中文数字例如一、xxx；二级标题使用阿拉伯数字例如1.1 xxx；一级标题生成{}个；每个一级标题包含{}个二级标题"`

			`pantten_title = "为论文题目“(.*?)”生成中文目录"`
			`pantten_xiaobiaoti_geshu = "每个一级标题包含(.*?)个"`
			`pantten_dabiaoti_geshu = "一级标题生成(.*?)个"`

			`mulusuojian = "请问把以下目录缩减成只有4个一级标题作为ppt的题目，请问留下原始目录中的哪4个一级标题最合适，一级标题必须在原始目录中\n{}\n"`

			`self_api = "http://192.168.31.149:12004/predict"`
			`gpt_api = "https://api.openai.com/v1/chat/completions"`


			`class log:`
			`def __init__(self):`
			`pass`

			`def log(args, *kwargs):`
			`format = '%Y/%m/%d-%H:%M:%S'`
			`format_h = '%Y-%m-%d'`
			`value = time.localtime(int(time.time()))`
			`dt = time.strftime(format, value)`
			`dt_log_file = time.strftime(format_h, value)`
			`log_file = 'log_file/access-%s' % dt_log_file + ".log"`
			`if not os.path.exists(log_file):`
			`with open(os.path.join(log_file), 'w', encoding='utf-8') as f:`
			`print(dt, args, file=f, *kwargs)`
			`else:`
			`with open(os.path.join(log_file), 'a+', encoding='utf-8') as f:`
			`print(dt, args, file=f, *kwargs)`


			`def allowed_file(filename):`
			`return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS`


			`def iter_block_items(parent):`
			`"""`
			`Yield each paragraph and table child within parent, in document order.`
			`Each returned value is an instance of either Table or Paragraph. parent`
			`would most commonly be a reference to a main Document object, but`
			`also works for a _Cell object, which itself can contain paragraphs and tables.`
			`"""`
			`if isinstance(parent, Document):`
			`parent_elm = parent.element.body`
			`elif isinstance(parent, _Cell):`
			`parent_elm = parent._tc`
			`else:`
			`raise ValueError("something's not right")`

			`for child in parent_elm.iterchildren():`
			`if isinstance(child, CT_P):`
			`yield Paragraph(child, parent)`
			`elif isinstance(child, CT_Tbl):`
			`yield Table(child, parent)`


			`def read_table(table):`
			`return [[cell.text for cell in row.cells] for row in table.rows]`


			`def read_word(word_path):`
			`paper_text = []`
			`doc = docx.Document(word_path)`
			`for block in iter_block_items(doc):`
			`if isinstance(block, Paragraph):`
			`paper_text.append(block.text)`
			`elif isinstance(block, Table):`
			`table_list = read_table(block)`
			`table_list_new = []`
			`for row in table_list:`
			`table_list_new.append("<td>" + "</td>\n<td>".join(row) + "</td>")`
			`table_str = "\n<tr>\n" + "\n</tr>\n<tr>\n".join(table_list_new) + "\n</tr>\n"`
			`table_str = "<tbStart>\n<table>" + table_str + "</table>\n\n<tbEnd>"`
			`paper_text.append(table_str)`
			`paper_text = "\n".join(paper_text)`
			`return paper_text`


			`def getText(fileName):`
修改成并行请求大模型，效率大幅度提高，修改bug 2 years ago			`'''`

			`'''`
首次提交 2 years ago			`doc = docx.Document(fileName)`
			`TextList = []`
			`for paragraph in doc.paragraphs:`
			`TextList.append(paragraph.text)`

			`return '\n'.join(TextList)`


			`def request_selfmodel_api(prompt):`
			`url = "http://192.168.31.149:12004/predict"`
			`data = {`
			`"model": "gpt-4-turbo-preview",`
			`"messages": [`
			`{"role": "user", "content": prompt}`
			`],`
			`"top_p": 0.7,`
			`"temperature": 0.95`
			`}`
			`response = requests.post(`
			`url,`
			`json=data,`
			`timeout=100000`
			`)`

			`return response.json()`


			`def request_chatgpt_api(prompt):`
修改成并行请求大模型，效率大幅度提高，修改bug 2 years ago			`'''`

			`'''`
首次提交 2 years ago			`OPENAI_API_KEY = "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"`
			`url = "https://api.openai.com/v1/chat/completions"`
			`# url = "https://one.aiskt.com"`
			`headers = {`
			`"Content-Type": "application/json",`
			`"Authorization": f"Bearer {OPENAI_API_KEY}"`
			`}`
			`data = {`
			`"model": "gpt-4-turbo-preview",`
			`"messages": [`
			`{"role": "user", "content": prompt}`
			`],`
			`"top_p": 0.7,`
			`"temperature": 0.95`
			`}`
			`response = requests.post(url,`
			`headers=headers,`
			`data=json.dumps(data),`
			`timeout=1200)`

			`return response.json()`


			`def yanzhengyijibiaoti(mulu, res):`
			`'''`
			`判断生成的大标题是否可用`
			`:param mulu:`
			`:param res:`
			`:return:`
			`'''`

			`mulu_list = str(mulu).split("\n")`

			`dabiaoti_list = []`
			`dabiaoti_res_list = []`
			`for i in mulu_list:`
			`res_re = re.findall(pantten_yijibiaoti, i)`
			`if res_re != []:`
			`dabiaoti_list.append(re.findall(pantten_yijibiaoti, i)[0])`
			`for i in dabiaoti_list:`
			`if i[1].strip() in res:`
			`dabiaoti_res_list.append("、".join(i))`

			`if len(dabiaoti_res_list) == 4:`
			`return_bool = True`
			`else:`
			`return_bool = False`

			`return return_bool, dabiaoti_res_list`


修改成并行请求大模型，效率大幅度提高，修改bug 2 years ago			`def get_document_structure(file_path):`
			`doc = docx.Document(file_path)`
			`structure = []`
			`for paragraph in doc.paragraphs:`
			`style = paragraph.style`
			`if style.name.startswith("Heading"):`
			`level = int(style.name[7:])`
			`text = paragraph.text`
			`structure.append((level, text))`
			`return structure`

			`def print_document_structure(structure):`
			`for level, text in structure:`
			`print(f"{' ' * (level - 1)}- {text}")`

			`def catalogue_extract(structure):`

			`catalogue_list = []`
			`for level, text in structure[:-1]:`
修改bug，识别三级标题 2 years ago			`text = str(text).strip(" ").strip("\n")`
			`erji_p = re.findall(pantten_erjibiaoti, text)`
			`yiji_p = re.findall(pantten_yijibiaoti, text)`
			`if erji_p == [] and yiji_p == []:`
			`continue`
修改成并行请求大模型，效率大幅度提高，修改bug 2 years ago			`catalogue_list.append(str(text).strip(" ").strip("\n"))`
			`catalogue_str = "\n".join(catalogue_list)`
			`return catalogue_list, catalogue_str`


修改成并行请求大模型，效率大幅度提高 2 years ago			`def main(path):`
修改成并行请求大模型，效率大幅度提高，修改bug 2 years ago
			`# 判断系统`
修改成并行请求大模型，效率大幅度提高 2 years ago			`system = platform.system()`
			`if system == 'Linux':`
			`file_name = path.split("/")[-1].split(".")[0]`
			`else:`
			`file_name = path.split("\\")[-1].split(".")[0]`

			`text_1 = json.dumps(read_word(path), ensure_ascii=False)`
首次提交 2 years ago			`print(text_1)`

修改成并行请求大模型，效率大幅度提高，修改bug 2 years ago			`# mulu_str = re.findall(pantten_mulu, text_1)[0]`
			`# print(mulu_str)`
			`# mulu_list_xuhao = str(mulu_str).split("\\n")`
			`#`
			`#`
			`# mulu_list = []`
			`# for i in mulu_list_xuhao:`
			`# if i != "":`
			`# mulu_list.append(i.split("\\t")[0])`
			`#`
			`# mulu_list.append("致谢")`
			`#`
			`# print(mulu_list)`

			`document_structure = get_document_structure(path)`
			`mulu_list, catalogue_str = catalogue_extract(document_structure)`

			`text = text_1.split("目录")[-1].strip("\\n")`
首次提交 2 years ago
			`yijibiaoti = ""`
			`paper_content = {}`
			`for i in range(len(mulu_list) - 1):`
			`title = mulu_list[i].strip(" ").strip("\\n")`
修改成并行请求大模型，效率大幅度提高，修改bug 2 years ago			`print(mulu_list[i])`
			`print(mulu_list[i + 1])`
			`print(re.findall(pantten_xiaobiaoti.format(mulu_list[i], mulu_list[i + 1]), text))`
			`content = str(re.findall(pantten_xiaobiaoti.format(mulu_list[i], mulu_list[i + 1]), text)[0]).strip(`
首次提交 2 years ago			`" ").strip("\\n")`
			`# print(title)`
			`# print(content)`

			`yijibiaoti_res = re.findall(pantten_yijibiaoti, title)`
			`erjibiaoti_res = re.findall(pantten_erjibiaoti, title)`
			`if yijibiaoti_res != []:`
			`# title = "、".join([yijibiaoti_res[0][1], yijibiaoti_res[0][1].strip()])`
			`paper_content[title] = {}`
			`yijibiaoti = title`
			`continue`

			`elif erjibiaoti_res != []:`
			`paper_content[yijibiaoti][title] = content.replace("\\n", "\n")`

			`else:`
			`paper_content[yijibiaoti][title] += "\n".join(title + content)`

			`while True:`
			`mulu_str = "\n".join(mulu_list[:-1])`
			`prompt = f'请问把以下目录缩减成只有4个一级标题作为ppt的题目，请问留下原始目录中的哪4个一级标题最合适，一级标题必须在原始目录中\n{mulu_str}\n'`

修改成并行请求大模型，效率大幅度提高，修改bug 2 years ago			`try:`
			`res = request_chatgpt_api(prompt)['choices'][0]['message']['content']`
			`except:`
			`continue`
首次提交 2 years ago
修改成并行请求大模型，效率大幅度提高，修改bug 2 years ago			`# res = '''根据您提供的目录内容，如果要将其缩减为只包含4个一级标题的PPT题目，建议选择以下四个一级标题，因为它们分别代表了研究的引入、理论框架、实际应用与实践，以及未来展望，从而形成了一个完整的研究过程和内容框架：`
			`#`
			`# 1. 一、绪论`
			`# 2. 二、电影网站设计的基本概念`
			`# 3. 三、Python在电影网站设计中的应用`
			`# 4. 五、电影网站设计的实践与展望`
			`#`
			`# 这样的选择既涵盖了研究的背景、目的与意义（绪论），也包括了研究的理论基础（电影网站设计的基本概念），以及研究的实际操作和技术实现（Python在电影网站设计中的应用），最后还有对项目实践经验的总结和对未来发展的展望（电影网站设计的实践与展望）。这四个部分共同构成了一个完整的研究报告或项目介绍的框架，能够全面展示电影网站设计项目的各个方面。`
			`# '''`
首次提交 2 years ago
			`shaixuan_bool, dabiaoti_res_list = yanzhengyijibiaoti("\n".join(mulu_list), res.replace("\n", "\\n"))`
			`if shaixuan_bool == True:`
			`break`

修改成并行请求大模型，效率大幅度提高 2 years ago
			`index_zahnweifu = 0`
			`zhanweifu = []`
首次提交 2 years ago			`content_1 = []`
			`catalogue = []`
			`for yijibiaoti in dabiaoti_res_list:`
			`content_2 = []`
			`yijibiaoti_content = re.findall(pantten_yijibiaoti_content, yijibiaoti)[0]`
			`catalogue.append(yijibiaoti_content)`
			`for erjibiaoti in paper_content[yijibiaoti]:`
			`num = random.randint(2, 6)`
			`content = paper_content[yijibiaoti][erjibiaoti]`
修改成并行请求大模型，效率大幅度提高 2 years ago			`# res = request_selfmodel_api(`
			`# f'任务：生成段落主要内容\n请对以下内容进行提取信息，只需要提取{str(num)}条主要内容，使用条数罗列下面这段话的主要信息，例如1. xxx\n2.xxx \n' + content)[`
			`# 'choices'][0]['message']['content']`

			`zhanweifu.append(f'任务：生成段落主要内容\n请对以下内容进行提取信息，只需要提取{str(num)}条主要内容，使用条数罗列下面这段话的主要信息，例如1. xxx\n2.xxx \n' + content)`

首次提交 2 years ago			`content_2.append({`
			`"title_small": re.findall(pantten_erjibiaoti_content, erjibiaoti)[0],`
修改成并行请求大模型，效率大幅度提高 2 years ago			`"content_3": index_zahnweifu`
首次提交 2 years ago			`})`
修改成并行请求大模型，效率大幅度提高 2 years ago			`index_zahnweifu += 1`
首次提交 2 years ago			`content_1.append({`
			`"title_big": yijibiaoti_content,`
			`"content_2": content_2`
			`})`

修改成并行请求大模型，效率大幅度提高 2 years ago			`with concurrent.futures.ThreadPoolExecutor(100) as executor:`
			`results = executor.map(request_selfmodel_api, zhanweifu)`

			`zhanweifu = []`
			`for result in results:`
			`res = result['choices'][0]['message']['content']`

			`tiaoshu_list = str(res).split("\n")`

			`tiaoshu_list_new = []`
			`for dantiao in tiaoshu_list:`
			`tiaoshu_list_new.append(re.findall(pantten_content_tiaoshu, dantiao)[0].strip())`
			`zhanweifu.append(tiaoshu_list_new)`

			`content_1_new = []`

			`for yijibiaoti_content in content_1:`
			`content_2_new = []`
			`title_big = yijibiaoti_content["title_big"]`
			`for erjibiaoti_content in yijibiaoti_content["content_2"]:`
			`title_small = erjibiaoti_content["title_small"]`
			`content_3 = zhanweifu[erjibiaoti_content["content_3"]]`
			`content_2_new.append({`
			`"title_small": title_small,`
			`"content_3": content_3`
			`})`
			`content_1_new.append({`
			`"title_big": title_big,`
			`"content_2": content_2_new`
			`})`


首次提交 2 years ago			`data_new = {`
修改成并行请求大模型，效率大幅度提高 2 years ago			`"title": file_name,`
首次提交 2 years ago			`"catalogue": catalogue,`
修改成并行请求大模型，效率大幅度提高 2 years ago			`"content_1": content_1_new`
首次提交 2 years ago			`}`

			`# with open("data/ceshi.json", "w", encoding="utf-8") as f:`
			`# f.write(json.dumps(data_new, ensure_ascii=False, indent=2))`

			`return data_new`

			`@app.route('/predict', methods=['POST'])`
			`def upload_file():`

			`if 'file' not in request.files:`
			`return "1"`

			`file = request.files.get('file')`

			`if file and allowed_file(file.filename):`
修改成并行请求大模型，效率大幅度提高 2 years ago			`path = os.path.join(app.config['UPLOAD_FOLDER'], file.filename)`
首次提交 2 years ago			`print(path)`
			`file.save(path)`
修改成并行请求大模型，效率大幅度提高 2 years ago			`# file.save(file.filename)`
首次提交 2 years ago
			`result = main(path)`
			`return_text = {"texts": result, "probabilities": None, "status_code": 200}`

			`log.log('start at',`
			`'filename:{}, result:{}'.format(`
			`path, return_text))`
			`return jsonify(return_text)`
			`else:`
			`return "不允许的文件类型"`

			`if __name__ == "__main__":`
			`app.run(host="0.0.0.0", port=21000, threaded=True)`