diff --git a/README.md b/README.md
new file mode 100644
index 0000000..2872a6f
--- /dev/null
+++ b/README.md
@@ -0,0 +1,8 @@
+# 生成ppt格式的json数据
+
+
+上传 docx文件
+返回json数据
+```
+python ppt_api.py
+```
\ No newline at end of file
diff --git a/test_docx.py b/test_docx.py
new file mode 100644
index 0000000..794aa07
--- /dev/null
+++ b/test_docx.py
@@ -0,0 +1,262 @@
+from docx import Document
+import os
+os.environ['ALL_PROXY'] = 'http://127.0.0.1:10809'
+import docx
+import json
+import re
+from docx.document import Document
+from docx.oxml.table import CT_Tbl
+from docx.oxml.text.paragraph import CT_P
+from docx.table import _Cell, Table
+from docx.text.paragraph import Paragraph
+import requests
+import random
+
+fileName = "data/基于Python的电影网站设计_范文.docx"
+pantten_mulu= '目录(.*?)致谢'
+pantten_xiaobiaoti= "{}(.*?){}"
+pantten_yijibiaoti = '^([一二三四五六七八九])、(.*)'
+pantten_yijibiaoti_content = '^[一二三四五六七八九]、(.*)'
+pantten_erjibiaoti = '^[0-9](\.[0-9]\d*){1}\s{1,}?.*$'
+pantten_erjibiaoti_content = '^[0-9]\.[0-9]\d*{1}\s{1,}?(.*)$'
+pantten_content_tiaoshu = '[0-9]\.{1}\s{0,}?(.*)'
+
+prompt_two_title_min_max = "为论文题目“{}”生成中文目录,要求只有一级标题,二级标题,一级标题使用中文数字 例如一、xxx;二级标题使用阿拉伯数字 例如1.1 xxx;一级标题生成{}个;每个一级标题包含{}-{}个二级标题"
+prompt_two_title_not_min_max = "为论文题目“{}”生成中文目录,要求只有一级标题,二级标题,一级标题使用中文数字 例如一、xxx;二级标题使用阿拉伯数字 例如1.1 xxx;一级标题生成{}个;每个一级标题包含{}个二级标题"
+
+
+pantten_title = "为论文题目“(.*?)”生成中文目录"
+pantten_xiaobiaoti_geshu = "每个一级标题包含(.*?)个"
+pantten_dabiaoti_geshu = "一级标题生成(.*?)个"
+
+mulusuojian = "请问把以下目录缩减成只有4个一级标题作为ppt的题目,请问留下原始目录中的哪4个一级标题最合适,一级标题必须在原始目录中\n{}\n"
+
+
+self_api = "http://192.168.31.149:12004/predict"
+gpt_api = "https://api.openai.com/v1/chat/completions"
+
+
+def iter_block_items(parent):
+ """
+ Yield each paragraph and table child within *parent*, in document order.
+ Each returned value is an instance of either Table or Paragraph. *parent*
+ would most commonly be a reference to a main Document object, but
+ also works for a _Cell object, which itself can contain paragraphs and tables.
+ """
+ if isinstance(parent, Document):
+ parent_elm = parent.element.body
+ elif isinstance(parent, _Cell):
+ parent_elm = parent._tc
+ else:
+ raise ValueError("something's not right")
+
+ for child in parent_elm.iterchildren():
+ if isinstance(child, CT_P):
+ yield Paragraph(child, parent)
+ elif isinstance(child, CT_Tbl):
+ yield Table(child, parent)
+
+
+def read_table(table):
+ return [[cell.text for cell in row.cells] for row in table.rows]
+
+
+def read_word(word_path):
+
+ paper_text = []
+ doc = docx.Document(word_path)
+ for block in iter_block_items(doc):
+ if isinstance(block, Paragraph):
+ paper_text.append(block.text)
+ elif isinstance(block, Table):
+ table_list = read_table(block)
+ table_list_new = []
+ for row in table_list:
+ table_list_new.append("
" + " | \n".join(row) + " | ")
+ table_str = "\n\n" + "\n
\n\n".join(table_list_new) + "\n
\n"
+ table_str = "\n\n\n"
+ paper_text.append(table_str)
+ paper_text = "\n".join(paper_text)
+ return paper_text
+
+
+def getText(fileName):
+ doc = docx.Document(fileName)
+ TextList = []
+ for paragraph in doc.paragraphs:
+ TextList.append(paragraph.text)
+
+ return '\n'.join(TextList)
+
+
+def request_selfmodel_api(prompt):
+ print(prompt)
+ url = "http://192.168.31.149:12004/predict"
+ data = {
+ "model": "gpt-4-turbo-preview",
+ "messages": [
+ {"role": "user", "content": prompt}
+ ],
+ "top_p": 0.7,
+ "temperature": 0.95
+ }
+ response = requests.post(
+ url,
+ json=data,
+ timeout=100000
+ )
+
+ return response.json()
+
+
+def request_chatgpt_api(prompt):
+ OPENAI_API_KEY = "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"
+ url = "https://api.openai.com/v1/chat/completions"
+ # url = "https://one.aiskt.com"
+ headers = {
+ "Content-Type": "application/json",
+ "Authorization": f"Bearer {OPENAI_API_KEY}"
+ }
+ data = {
+ "model": "gpt-4-turbo-preview",
+ "messages": [
+ {"role": "user", "content": prompt}
+ ],
+ "top_p": 0.7,
+ "temperature": 0.95
+ }
+ response = requests.post(url,
+ headers=headers,
+ data=json.dumps(data),
+ timeout=1200)
+
+ return response.json()
+
+
+def yanzhengyijibiaoti(mulu, res):
+ '''
+ 判断生成的大标题是否可用
+ :param mulu:
+ :param res:
+ :return:
+ '''
+
+ mulu_list = str(mulu).split("\n")
+
+ dabiaoti_list = []
+ dabiaoti_res_list = []
+ for i in mulu_list:
+ res_re = re.findall(pantten_yijibiaoti, i)
+ if res_re != []:
+ dabiaoti_list.append(re.findall(pantten_yijibiaoti, i)[0])
+ for i in dabiaoti_list:
+ if i[1].strip() in res:
+ dabiaoti_res_list.append("、".join(i))
+
+ if len(dabiaoti_res_list) == 4:
+ return_bool = True
+ else:
+ return_bool = False
+
+ return return_bool, dabiaoti_res_list
+
+if __name__ == '__main__':
+
+ text_1 = json.dumps(read_word(fileName),ensure_ascii=False)
+ print(text_1)
+ mulu_str = re.findall(pantten_mulu, text_1)[0]
+ print(mulu_str)
+ mulu_list_xuhao = str(mulu_str).split("\\n")
+
+ mulu_list = []
+ for i in mulu_list_xuhao:
+ if i != "":
+ mulu_list.append(i.split("\\t")[0])
+
+ mulu_list.append("致谢")
+
+ print(mulu_list)
+ content_list = []
+
+ yijibiaoti = ""
+ paper_content = {}
+ for i in range(len(mulu_list) -1):
+ title = mulu_list[i].strip(" ").strip("\\n")
+ content = str(re.findall(pantten_xiaobiaoti.format(mulu_list[i], mulu_list[i+1]), text_1)[1]).strip(" ").strip("\\n")
+ # print(title)
+ # print(content)
+
+ yijibiaoti_res = re.findall(pantten_yijibiaoti, title)
+ erjibiaoti_res = re.findall(pantten_erjibiaoti, title)
+ if yijibiaoti_res != []:
+ # title = "、".join([yijibiaoti_res[0][1], yijibiaoti_res[0][1].strip()])
+ paper_content[title] = {}
+ yijibiaoti = title
+ continue
+
+ elif erjibiaoti_res != []:
+ paper_content[yijibiaoti][title] = content.replace("\\n", "\n")
+
+ else:
+ paper_content[yijibiaoti][title] += "\n".join(title + content)
+
+
+ while True:
+ mulu_str = "\n".join(mulu_list[:-1])
+ prompt = f'请问把以下目录缩减成只有4个一级标题作为ppt的题目,请问留下原始目录中的哪4个一级标题最合适,一级标题必须在原始目录中\n{mulu_str}\n'
+ # try:
+ # res = request_chatgpt_api(prompt)['choices'][0]['message']['content']
+ # except:
+ # continue
+ res = '''根据您提供的目录内容,如果要将其缩减为只包含4个一级标题的PPT题目,建议选择以下四个一级标题,因为它们分别代表了研究的引入、理论框架、实际应用与实践,以及未来展望,从而形成了一个完整的研究过程和内容框架:
+
+ 1. 一、绪论
+ 2. 二、电影网站设计的基本概念
+ 3. 三、Python在电影网站设计中的应用
+ 4. 五、电影网站设计的实践与展望
+
+ 这样的选择既涵盖了研究的背景、目的与意义(绪论),也包括了研究的理论基础(电影网站设计的基本概念),以及研究的实际操作和技术实现(Python在电影网站设计中的应用),最后还有对项目实践经验的总结和对未来发展的展望(电影网站设计的实践与展望)。这四个部分共同构成了一个完整的研究报告或项目介绍的框架,能够全面展示电影网站设计项目的各个方面。
+ '''
+
+ shaixuan_bool, dabiaoti_res_list = yanzhengyijibiaoti("\n".join(mulu_list), res.replace("\n", "\\n"))
+ if shaixuan_bool == True:
+ break
+
+ content_1 = []
+ for yijibiaoti in dabiaoti_res_list:
+ content_2 = []
+ for erjibiaoti in paper_content[yijibiaoti]:
+ num = random.randint(2, 6)
+ content = paper_content[yijibiaoti][erjibiaoti]
+ res = request_selfmodel_api(f'任务:生成段落主要内容\n请对以下内容进行提取信息,只需要提取{str(num)}条主要内容,使用条数罗列下面这段话的主要信息,例如1. xxx\n2.xxx \n' + content)['choices'][0]['message']['content']
+ tiaoshu_list = str(res).split("\n")
+
+ tiaoshu_list_new = []
+ for dantiao in tiaoshu_list:
+ tiaoshu_list_new.append(re.findall(pantten_content_tiaoshu, dantiao)[0].strip())
+ content_2.append({
+ "title_small": erjibiaoti,
+ "content_3": tiaoshu_list_new
+ })
+ content_1.append({
+ "title_big": yijibiaoti,
+ "content_2": content_2
+ })
+
+ data_new = {
+ "title": fileName,
+ "catalogue": dabiaoti_res_list,
+ "content_1": content_1
+ }
+
+ with open("data/ceshi.json", "w", encoding="utf-8") as f:
+ f.write(json.dumps(data_new, ensure_ascii=False, indent=2))
+
+
+
+
+ # res = request_chatgpt_api(f'针对下面这篇文章,请回答,我为什么选择这个题目,做这个研究有什么意义?\n' + data)['choices'][0]['message']['content']
+
+
+
+