diff --git a/api/api_functions.py b/api/api_functions.py index e53910630e1cb92dcded85a0588408e7fad5ea9b..5aace6261be356028df8a59baf55067af1c51760 100644 --- a/api/api_functions.py +++ b/api/api_functions.py @@ -2,8 +2,11 @@ from func.ner.ner import * from func.usas.usas import * -def get_ner_for_data(): - result = run_ner_on_text() +# Perform NER on a file +# TAKES XML text page +# Returns NER results +def get_ner_for_data(page): + result = run_ner_on_text(page) return result diff --git a/func/ner/ner.py b/func/ner/ner.py index 06a1ee6fe5c59cd58babec878148a149775c255d..b17f5b8e989e3e5423e56a893db7a1587781b111 100644 --- a/func/ner/ner.py +++ b/func/ner/ner.py @@ -1,3 +1,18 @@ +import torch +from ckip_transformers.nlp import CkipWordSegmenter, CkipPosTagger, CkipNerChunker +from transformers import pipeline -def run_ner_on_text(): +# Perform NER on Text +def run_ner_on_text(page): + ner_driver = CkipNerChunker(model="bert-base") + ws_driver = CkipWordSegmenter(device=-1) + + txt = [ + "å‚…é”ä»ä»Šå°‡åŸ·è¡Œå®‰æ¨‚æ»ï¼Œå»çªç„¶çˆ†å‡ºè‡ªå·±20å¹´å‰é緯來體育å°å°æ®ºï¼Œä»–ä¸æ‡‚自己哪裡得罪到電視å°ã€‚", + "美國åƒè°é™¢é‡å°ä»Šå¤©ç¸½çµ±å¸ƒä»€æ‰€æå的勞工部長趙å°è˜å±•é–‹èªå¯è½è‰æœƒï¼Œé æ–™å¥¹å°‡æœƒå¾ˆé †åˆ©é€šéŽåƒè°é™¢æ”¯æŒï¼Œæˆç‚ºè©²åœ‹æœ‰å²ä»¥ä¾†ç¬¬ä¸€ä½çš„è¯è£”女性內閣æˆå“¡ã€‚", + "空白 也是å¯ä»¥çš„~", + ] + + ner = ner_driver(txt) + print(ner) return "hello ner" diff --git a/main.py b/main.py index 70f347e0ed63df69c4fd25979b1d4546e973ac83..3fec6eb2e765463f0d4aad69bd17fb2aba58cd37 100644 --- a/main.py +++ b/main.py @@ -1,4 +1,5 @@ from flask import Flask +from flask import request from api.api_functions import * @@ -15,9 +16,11 @@ def test(): return "Success" -@app.route("/ner") +@app.route('/ner', methods=['POST']) def ner(): - result = get_ner_for_data() + request_data = request.form.to_dict() + page = request_data['page'] + result = get_ner_for_data(page) return result diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..b9f546dadef45aa02b389c5cfff8214b31afb41c --- /dev/null +++ b/requirements.txt @@ -0,0 +1,5 @@ +flask~=3.0.3 +torch~=2.3.1 +transformers~=4.41.2 +ckip-transformers~=0.3.4 +