import json ''' {"text": "EU rejects German call to boycott British lamb.", "label": [[0, 2, "ORG"]]} {"text": "Peter Blackburn", "label": [[0, 15, "PERSON"]]} {"text": "President Obama", "label": [[10, 15, "PERSON"]]} ''' with open("data/zc_4.jsonl", encoding="utf-8") as f: data = f.readlines() data_new = [] for i in data: data_dan_json = json.loads(i) if len(data_dan_json["label"]) >1 or len(data_dan_json["label"]) == 0: continue text = data_dan_json["text"] print(text) print(data_dan_json) start = str(text).index("<\\Start>") end = str(text).index("<\\End>") print(start) print(end) text = str(text).replace("<\\Start>", "").replace("<\\End>", "") label = [] if data_dan_json['label'][0] != '正文': label.append([start, end-8, data_dan_json['label'][0]]) data_new.append({ "text": text, "label": label }) with open("data/zc_ner.jsonl", "a", encoding="utf-8") as f: for i in data_new: f.write(json.dumps(i, ensure_ascii=False)) f.write("\n")