diff --git a/ppt_api.py b/ppt_api.py index bb4db89..9aadc7f 100644 --- a/ppt_api.py +++ b/ppt_api.py @@ -222,6 +222,11 @@ def catalogue_extract(structure): catalogue_list = [] for level, text in structure[:-1]: + text = str(text).strip(" ").strip("\n") + erji_p = re.findall(pantten_erjibiaoti, text) + yiji_p = re.findall(pantten_yijibiaoti, text) + if erji_p == [] and yiji_p == []: + continue catalogue_list.append(str(text).strip(" ").strip("\n")) catalogue_str = "\n".join(catalogue_list) return catalogue_list, catalogue_str