From 9e6931cdb628b6f12dd9543027a85144cd25aeba Mon Sep 17 00:00:00 2001 From: Tom Edwards <edwardstj1@cardiff.ac.uk> Date: Wed, 4 Sep 2024 14:52:07 +0100 Subject: [PATCH] Work towards USAS + ner --- func/ner/ner.py | 7 +++++++ func/usas/usas.py | 23 ++++++++++++++++++++--- main.py | 10 +++++++--- 3 files changed, 34 insertions(+), 6 deletions(-) diff --git a/func/ner/ner.py b/func/ner/ner.py index 7777b73..b7a4e51 100644 --- a/func/ner/ner.py +++ b/func/ner/ner.py @@ -14,6 +14,13 @@ def run_ner_on_text(page): word = item.word ner = item.ner idx = item.idx + print(type(idx)) + print(idx) + print(type(ner)) + print(ner) + print(type(word)) + print(word) + print('--------') obj = {"word": word, "translation": "", "ner": ner, "idx": idx} data.append(obj) diff --git a/func/usas/usas.py b/func/usas/usas.py index c0ca4fe..455792f 100644 --- a/func/usas/usas.py +++ b/func/usas/usas.py @@ -11,10 +11,27 @@ def run_usas_on_text(page): nlp.add_pipe('pymusas_rule_based_tagger', source=chinese_tagger_pipeline) output_doc = nlp(page) + data = [] - print(f'Text\tPOS\tUSAS Tags') + tags = [] + + print(f'Text\tPOS\tMWE start and end index\tUSAS Tags') for token in output_doc: - print(f'{token.text}\t{token.pos_}\t{token._.pymusas_tags}') + start, end = token._.pymusas_mwe_indexes[0] + idx = (start, end) + + for el in token._.pymusas_tags: + obj = {"word": token.text, "Usas Tags": el, "idx": idx} + tags.append(el) + data.append(obj) + + res = [] + procTags = [] + for x in tags: + if x not in procTags: + res.append({"Tag": x, "Count": tags.count(x)}) + + procTags.append(x) - result = {'output': "Hello USAS", 'message': 'Done', 'code': 'SUCCESS'} + result = {'output': res, 'message': 'Done', 'code': 'SUCCESS'} return result diff --git a/main.py b/main.py index a30a228..34bb595 100644 --- a/main.py +++ b/main.py @@ -19,18 +19,22 @@ def test(): return "Success" -@app.route('/ner', methods=['POST']) +@app.route("/ner", methods=['POST']) + def ner(): request_data = request.get_json() - print(request_data) + #print(request_data) page = request_data['page'] result = get_ner_for_data(page) return result -@app.route("/usas") +@app.route("/usas", methods=['POST']) def usas(): + print('hello') request_data = request.get_json() + print(request_data) page = request_data['page'] result = get_usas_for_data(page) + return result -- GitLab