From 6bcd5075c479ae674a0b11ad7f7d752e436ed5b7 Mon Sep 17 00:00:00 2001 From: Thomas Edwards <tomedwards@sirma.com> Date: Tue, 11 Jun 2024 14:45:43 +0100 Subject: [PATCH] Implements NER - to use send http get request to {domain}/ner with form data key = page / value = document/s sends back json success with output containing ner results --- .idea/cogniStance.iml | 1 + api/api_functions.py | 8 +++++++- func/ner/ner.py | 22 +++++++++++++--------- 3 files changed, 21 insertions(+), 10 deletions(-) diff --git a/.idea/cogniStance.iml b/.idea/cogniStance.iml index 2f0a841..1058f41 100644 --- a/.idea/cogniStance.iml +++ b/.idea/cogniStance.iml @@ -3,6 +3,7 @@ <component name="NewModuleRootManager"> <content url="file://$MODULE_DIR$"> <excludeFolder url="file://$MODULE_DIR$/.venv" /> + <excludeFolder url="file://$MODULE_DIR$/venv" /> </content> <orderEntry type="jdk" jdkName="Python 3.9 (cognistance)" jdkType="Python SDK" /> <orderEntry type="sourceFolder" forTests="false" /> diff --git a/api/api_functions.py b/api/api_functions.py index 5aace62..92ab989 100644 --- a/api/api_functions.py +++ b/api/api_functions.py @@ -1,3 +1,5 @@ +from flask import make_response, jsonify + from func.ner.ner import * from func.usas.usas import * @@ -7,7 +9,11 @@ from func.usas.usas import * # Returns NER results def get_ner_for_data(page): result = run_ner_on_text(page) - return result + + if result["code"] == "SUCCESS": + return make_response(jsonify(result), 201) + + return make_response(jsonify(result), 400) def get_usas_for_data(): diff --git a/func/ner/ner.py b/func/ner/ner.py index b17f5b8..0001999 100644 --- a/func/ner/ner.py +++ b/func/ner/ner.py @@ -5,14 +5,18 @@ from transformers import pipeline # Perform NER on Text def run_ner_on_text(page): ner_driver = CkipNerChunker(model="bert-base") - ws_driver = CkipWordSegmenter(device=-1) - txt = [ - "å‚…é”ä»ä»Šå°‡åŸ·è¡Œå®‰æ¨‚æ»ï¼Œå»çªç„¶çˆ†å‡ºè‡ªå·±20å¹´å‰é緯來體育å°å°æ®ºï¼Œä»–ä¸æ‡‚自己哪裡得罪到電視å°ã€‚", - "美國åƒè°é™¢é‡å°ä»Šå¤©ç¸½çµ±å¸ƒä»€æ‰€æå的勞工部長趙å°è˜å±•é–‹èªå¯è½è‰æœƒï¼Œé æ–™å¥¹å°‡æœƒå¾ˆé †åˆ©é€šéŽåƒè°é™¢æ”¯æŒï¼Œæˆç‚ºè©²åœ‹æœ‰å²ä»¥ä¾†ç¬¬ä¸€ä½çš„è¯è£”女性內閣æˆå“¡ã€‚", - "空白 也是å¯ä»¥çš„~", - ] + data = [] + ner = ner_driver([page]) - ner = ner_driver(txt) - print(ner) - return "hello ner" + for item in ner[0]: + word = item.word + ner = item.ner + idx = item.idx + obj = {"word":word, "translation": "" , "ner":ner, "idx":idx} + + data.append(obj) + + result = {'output': data,'message': 'Done', 'code': 'SUCCESS'} + + return result -- GitLab