You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
39 lines
1.0 KiB
39 lines
1.0 KiB
import json
|
|
'''
|
|
{"text": "EU rejects German call to boycott British lamb.", "label": [[0, 2, "ORG"]]}
|
|
{"text": "Peter Blackburn", "label": [[0, 15, "PERSON"]]}
|
|
{"text": "President Obama", "label": [[10, 15, "PERSON"]]}
|
|
'''
|
|
with open("data/zc_4.jsonl", encoding="utf-8") as f:
|
|
data = f.readlines()
|
|
|
|
data_new = []
|
|
|
|
for i in data:
|
|
data_dan_json = json.loads(i)
|
|
if len(data_dan_json["label"]) >1 or len(data_dan_json["label"]) == 0:
|
|
continue
|
|
text = data_dan_json["text"]
|
|
print(text)
|
|
print(data_dan_json)
|
|
|
|
start = str(text).index("<\\Start>")
|
|
end = str(text).index("<\\End>")
|
|
|
|
print(start)
|
|
print(end)
|
|
text = str(text).replace("<\\Start>", "").replace("<\\End>", "")
|
|
|
|
label = []
|
|
if data_dan_json['label'][0] != '正文':
|
|
label.append([start, end-8, data_dan_json['label'][0]])
|
|
|
|
data_new.append({
|
|
"text": text,
|
|
"label": label
|
|
})
|
|
|
|
with open("data/zc_ner.jsonl", "a", encoding="utf-8") as f:
|
|
for i in data_new:
|
|
f.write(json.dumps(i, ensure_ascii=False))
|
|
f.write("\n")
|
|
|