From a8227382a4c3eabb763f8a7e5c6c9bb7e5a386b0 Mon Sep 17 00:00:00 2001
From: Tom Edwards <edwardstj1@cardiff.ac.uk>
Date: Tue, 11 Jun 2024 12:06:28 +0100
Subject: [PATCH] work towards ner

---
 api/api_functions.py |  7 +++++--
 func/ner/ner.py      | 17 ++++++++++++++++-
 main.py              |  7 +++++--
 requirements.txt     |  5 +++++
 4 files changed, 31 insertions(+), 5 deletions(-)
 create mode 100644 requirements.txt

diff --git a/api/api_functions.py b/api/api_functions.py
index e539106..5aace62 100644
--- a/api/api_functions.py
+++ b/api/api_functions.py
@@ -2,8 +2,11 @@ from func.ner.ner import *
 from func.usas.usas import *
 
 
-def get_ner_for_data():
-    result = run_ner_on_text()
+# Perform NER on a file
+# TAKES XML text page
+# Returns NER results
+def get_ner_for_data(page):
+    result = run_ner_on_text(page)
     return result
 
 
diff --git a/func/ner/ner.py b/func/ner/ner.py
index 06a1ee6..b17f5b8 100644
--- a/func/ner/ner.py
+++ b/func/ner/ner.py
@@ -1,3 +1,18 @@
+import torch
+from ckip_transformers.nlp import CkipWordSegmenter, CkipPosTagger, CkipNerChunker
+from transformers import pipeline
 
-def run_ner_on_text():
+# Perform NER on Text
+def run_ner_on_text(page):
+    ner_driver = CkipNerChunker(model="bert-base")
+    ws_driver = CkipWordSegmenter(device=-1)
+
+    txt = [
+        "傅達仁今將執行安樂死,卻突然爆出自己20年前遭緯來體育台封殺,他不懂自己哪裡得罪到電視台。",
+        "美國參議院針對今天總統布什所提名的勞工部長趙小蘭展開認可聽證會,預料她將會很順利通過參議院支持,成為該國有史以來第一位的華裔女性內閣成員。",
+        "空白 也是可以的~",
+    ]
+
+    ner = ner_driver(txt)
+    print(ner)
     return "hello ner"
diff --git a/main.py b/main.py
index 70f347e..3fec6eb 100644
--- a/main.py
+++ b/main.py
@@ -1,4 +1,5 @@
 from flask import Flask
+from flask import request
 
 from api.api_functions import *
 
@@ -15,9 +16,11 @@ def test():
     return "Success"
 
 
-@app.route("/ner")
+@app.route('/ner', methods=['POST'])
 def ner():
-    result = get_ner_for_data()
+    request_data = request.form.to_dict()
+    page = request_data['page']
+    result = get_ner_for_data(page)
     return result
 
 
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..b9f546d
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,5 @@
+flask~=3.0.3
+torch~=2.3.1
+transformers~=4.41.2
+ckip-transformers~=0.3.4
+
-- 
GitLab