From 9e6931cdb628b6f12dd9543027a85144cd25aeba Mon Sep 17 00:00:00 2001
From: Tom Edwards <edwardstj1@cardiff.ac.uk>
Date: Wed, 4 Sep 2024 14:52:07 +0100
Subject: [PATCH] Work towards USAS + ner

---
 func/ner/ner.py   |  7 +++++++
 func/usas/usas.py | 23 ++++++++++++++++++++---
 main.py           | 10 +++++++---
 3 files changed, 34 insertions(+), 6 deletions(-)

diff --git a/func/ner/ner.py b/func/ner/ner.py
index 7777b73..b7a4e51 100644
--- a/func/ner/ner.py
+++ b/func/ner/ner.py
@@ -14,6 +14,13 @@ def run_ner_on_text(page):
         word = item.word
         ner = item.ner
         idx = item.idx
+        print(type(idx))
+        print(idx)
+        print(type(ner))
+        print(ner)
+        print(type(word))
+        print(word)
+        print('--------')
         obj = {"word": word, "translation": "", "ner": ner, "idx": idx}
 
         data.append(obj)
diff --git a/func/usas/usas.py b/func/usas/usas.py
index c0ca4fe..455792f 100644
--- a/func/usas/usas.py
+++ b/func/usas/usas.py
@@ -11,10 +11,27 @@ def run_usas_on_text(page):
     nlp.add_pipe('pymusas_rule_based_tagger', source=chinese_tagger_pipeline)
 
     output_doc = nlp(page)
+    data = []
 
-    print(f'Text\tPOS\tUSAS Tags')
+    tags = []
+
+    print(f'Text\tPOS\tMWE start and end index\tUSAS Tags')
     for token in output_doc:
-        print(f'{token.text}\t{token.pos_}\t{token._.pymusas_tags}')
+        start, end = token._.pymusas_mwe_indexes[0]
+        idx = (start, end)
+
+        for el in token._.pymusas_tags:
+            obj = {"word": token.text, "Usas Tags": el, "idx": idx}
+            tags.append(el)
+            data.append(obj)
+
+    res = []
+    procTags = []
+    for x in tags:
+        if x not in procTags:
+            res.append({"Tag": x, "Count": tags.count(x)})
+
+        procTags.append(x)
 
-    result = {'output': "Hello USAS", 'message': 'Done', 'code': 'SUCCESS'}
+    result = {'output': res, 'message': 'Done', 'code': 'SUCCESS'}
     return result
diff --git a/main.py b/main.py
index a30a228..34bb595 100644
--- a/main.py
+++ b/main.py
@@ -19,18 +19,22 @@ def test():
     return "Success"
 
 
-@app.route('/ner', methods=['POST'])
+@app.route("/ner", methods=['POST'])
+
 def ner():
     request_data = request.get_json()
-    print(request_data)
+    #print(request_data)
     page = request_data['page']
     result = get_ner_for_data(page)
     return result
 
 
-@app.route("/usas")
+@app.route("/usas", methods=['POST'])
 def usas():
+    print('hello')
     request_data = request.get_json()
+    print(request_data)
     page = request_data['page']
     result = get_usas_for_data(page)
+
     return result
-- 
GitLab