首次提交

2 years ago · 24b05412bf
2 changed files with 270 additions and 0 deletions
--- a/README.md
+++ b/README.md
@ -0,0 +1,8 @@
 # 生成ppt格式的json数据
 上传 docx文件
 返回json数据
 ```
 python ppt_api.py
 ```
--- a/test_docx.py
+++ b/test_docx.py
@ -0,0 +1,262 @@
 from docx import Document
 import os
 os.environ['ALL_PROXY'] = 'http://127.0.0.1:10809'
 import docx
 import json
 import re
 from docx.document import Document
 from docx.oxml.table import CT_Tbl
 from docx.oxml.text.paragraph import CT_P
 from docx.table import _Cell, Table
 from docx.text.paragraph import Paragraph
 import requests
 import random
 fileName = "data/基于Python的电影网站设计_范文.docx"
 pantten_mulu= '目录(.*?)致谢'
 pantten_xiaobiaoti= "{}(.*?){}"
 pantten_yijibiaoti = '^([一二三四五六七八九])、(.*)'
 pantten_yijibiaoti_content = '^[一二三四五六七八九]、(.*)'
 pantten_erjibiaoti = '^[0-9](\.[0-9]\d*){1}\s{1,}?.*$'
 pantten_erjibiaoti_content = '^[0-9]\.[0-9]\d*{1}\s{1,}?(.*)$'
 pantten_content_tiaoshu = '[0-9]\.{1}\s{0,}?(.*)'
 prompt_two_title_min_max = "为论文题目“{}”生成中文目录，要求只有一级标题，二级标题，一级标题使用中文数字 例如一、xxx；二级标题使用阿拉伯数字 例如1.1 xxx；一级标题生成{}个；每个一级标题包含{}-{}个二级标题"
 prompt_two_title_not_min_max = "为论文题目“{}”生成中文目录，要求只有一级标题，二级标题，一级标题使用中文数字 例如一、xxx；二级标题使用阿拉伯数字 例如1.1 xxx；一级标题生成{}个；每个一级标题包含{}个二级标题"
 pantten_title = "为论文题目“(.*?)”生成中文目录"
 pantten_xiaobiaoti_geshu = "每个一级标题包含(.*?)个"
 pantten_dabiaoti_geshu = "一级标题生成(.*?)个"
 mulusuojian = "请问把以下目录缩减成只有4个一级标题作为ppt的题目，请问留下原始目录中的哪4个一级标题最合适，一级标题必须在原始目录中\n{}\n"
 self_api = "http://192.168.31.149:12004/predict"
 gpt_api = "https://api.openai.com/v1/chat/completions"
 def iter_block_items(parent):
    """
    Yield each paragraph and table child within *parent*, in document order.
    Each returned value is an instance of either Table or Paragraph. *parent*
    would most commonly be a reference to a main Document object, but
    also works for a _Cell object, which itself can contain paragraphs and tables.
    """
    if isinstance(parent, Document):
        parent_elm = parent.element.body
    elif isinstance(parent, _Cell):
        parent_elm = parent._tc
    else:
        raise ValueError("something's not right")
    for child in parent_elm.iterchildren():
        if isinstance(child, CT_P):
            yield Paragraph(child, parent)
        elif isinstance(child, CT_Tbl):
            yield Table(child, parent)
 def read_table(table):
    return [[cell.text for cell in row.cells] for row in table.rows]
 def read_word(word_path):
    paper_text = []
    doc = docx.Document(word_path)
    for block in iter_block_items(doc):
        if isinstance(block, Paragraph):
            paper_text.append(block.text)
        elif isinstance(block, Table):
            table_list = read_table(block)
            table_list_new = []
            for row in table_list:
                table_list_new.append("<td>" + "</td>\n<td>".join(row) + "</td>")
            table_str = "\n<tr>\n" + "\n</tr>\n<tr>\n".join(table_list_new) + "\n</tr>\n"
            table_str = "<tbStart>\n<table>" + table_str + "</table>\n\n<tbEnd>"
            paper_text.append(table_str)
    paper_text = "\n".join(paper_text)
    return paper_text
 def getText(fileName):
    doc = docx.Document(fileName)
    TextList = []
    for paragraph in doc.paragraphs:
        TextList.append(paragraph.text)
    return '\n'.join(TextList)
 def request_selfmodel_api(prompt):
    print(prompt)
    url = "http://192.168.31.149:12004/predict"
    data = {
        "model": "gpt-4-turbo-preview",
        "messages": [
            {"role": "user", "content": prompt}
        ],
        "top_p": 0.7,
        "temperature": 0.95
    }
    response = requests.post(
        url,
        json=data,
        timeout=100000
    )
    return response.json()
 def request_chatgpt_api(prompt):
    OPENAI_API_KEY = "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"
    url = "https://api.openai.com/v1/chat/completions"
    # url = "https://one.aiskt.com"
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {OPENAI_API_KEY}"
    }
    data = {
        "model": "gpt-4-turbo-preview",
        "messages": [
            {"role": "user", "content": prompt}
        ],
        "top_p": 0.7,
        "temperature": 0.95
    }
    response = requests.post(url,
                             headers=headers,
                             data=json.dumps(data),
                             timeout=1200)
    return response.json()
 def yanzhengyijibiaoti(mulu, res):
    '''
    判断生成的大标题是否可用
    :param mulu:
    :param res:
    :return:
    '''
    mulu_list = str(mulu).split("\n")
    dabiaoti_list = []
    dabiaoti_res_list = []
    for i in mulu_list:
        res_re = re.findall(pantten_yijibiaoti, i)
        if res_re != []:
            dabiaoti_list.append(re.findall(pantten_yijibiaoti, i)[0])
    for i in dabiaoti_list:
        if i[1].strip() in res:
            dabiaoti_res_list.append("、".join(i))
    if len(dabiaoti_res_list) == 4:
        return_bool = True
    else:
        return_bool = False
    return return_bool, dabiaoti_res_list
 if __name__ == '__main__':
    text_1 = json.dumps(read_word(fileName),ensure_ascii=False)
    print(text_1)
    mulu_str = re.findall(pantten_mulu, text_1)[0]
    print(mulu_str)
    mulu_list_xuhao = str(mulu_str).split("\\n")
    mulu_list = []
    for i in mulu_list_xuhao:
        if i != "":
            mulu_list.append(i.split("\\t")[0])
    mulu_list.append("致谢")
    print(mulu_list)
    content_list = []
    yijibiaoti = ""
    paper_content = {}
    for i in range(len(mulu_list) -1):
        title = mulu_list[i].strip(" ").strip("\\n")
        content = str(re.findall(pantten_xiaobiaoti.format(mulu_list[i], mulu_list[i+1]), text_1)[1]).strip(" ").strip("\\n")
        # print(title)
        # print(content)
        yijibiaoti_res = re.findall(pantten_yijibiaoti, title)
        erjibiaoti_res = re.findall(pantten_erjibiaoti, title)
        if yijibiaoti_res != []:
            # title = "、".join([yijibiaoti_res[0][1], yijibiaoti_res[0][1].strip()])
            paper_content[title] = {}
            yijibiaoti = title
            continue
        elif erjibiaoti_res != []:
            paper_content[yijibiaoti][title] = content.replace("\\n", "\n")
        else:
            paper_content[yijibiaoti][title] += "\n".join(title + content)
    while True:
        mulu_str = "\n".join(mulu_list[:-1])
        prompt = f'请问把以下目录缩减成只有4个一级标题作为ppt的题目，请问留下原始目录中的哪4个一级标题最合适，一级标题必须在原始目录中\n{mulu_str}\n'
        # try:
        #     res = request_chatgpt_api(prompt)['choices'][0]['message']['content']
        # except:
        #     continue
        res = '''根据您提供的目录内容，如果要将其缩减为只包含4个一级标题的PPT题目，建议选择以下四个一级标题，因为它们分别代表了研究的引入、理论框架、实际应用与实践，以及未来展望，从而形成了一个完整的研究过程和内容框架：
        1. 一、绪论
        2. 二、电影网站设计的基本概念
        3. 三、Python在电影网站设计中的应用
        4. 五、电影网站设计的实践与展望
        这样的选择既涵盖了研究的背景、目的与意义（绪论），也包括了研究的理论基础（电影网站设计的基本概念），以及研究的实际操作和技术实现（Python在电影网站设计中的应用），最后还有对项目实践经验的总结和对未来发展的展望（电影网站设计的实践与展望）。这四个部分共同构成了一个完整的研究报告或项目介绍的框架，能够全面展示电影网站设计项目的各个方面。
        '''
        shaixuan_bool, dabiaoti_res_list = yanzhengyijibiaoti("\n".join(mulu_list), res.replace("\n", "\\n"))
        if shaixuan_bool == True:
            break
    content_1 = []
    for yijibiaoti in dabiaoti_res_list:
        content_2 = []
        for erjibiaoti in paper_content[yijibiaoti]:
            num = random.randint(2, 6)
            content = paper_content[yijibiaoti][erjibiaoti]
            res = request_selfmodel_api(f'任务：生成段落主要内容\n请对以下内容进行提取信息，只需要提取{str(num)}条主要内容，使用条数罗列下面这段话的主要信息，例如1. xxx\n2.xxx \n' + content)['choices'][0]['message']['content']
            tiaoshu_list = str(res).split("\n")
            tiaoshu_list_new = []
            for dantiao in tiaoshu_list:
                tiaoshu_list_new.append(re.findall(pantten_content_tiaoshu, dantiao)[0].strip())
            content_2.append({
                "title_small": erjibiaoti,
                "content_3": tiaoshu_list_new
            })
        content_1.append({
            "title_big": yijibiaoti,
            "content_2": content_2
        })
    data_new = {
        "title": fileName,
        "catalogue": dabiaoti_res_list,
        "content_1": content_1
    }
    with open("data/ceshi.json", "w", encoding="utf-8") as f:
        f.write(json.dumps(data_new, ensure_ascii=False, indent=2))
    # res = request_chatgpt_api(f'针对下面这篇文章，请回答，我为什么选择这个题目，做这个研究有什么意义？\n' + data)['choices'][0]['message']['content']