From 3b994a961e4a24ca2800de93a14006376e311a13 Mon Sep 17 00:00:00 2001
From: Tom Edwards <edwardstj1@cardiff.ac.uk>
Date: Wed, 12 Jun 2024 09:34:09 +0100
Subject: [PATCH] Work towards USAS

---
 func/usas/usas.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/func/usas/usas.py b/func/usas/usas.py
index 562fe62..1e7057c 100644
--- a/func/usas/usas.py
+++ b/func/usas/usas.py
@@ -4,6 +4,18 @@ import spacy
 
 # Perform USAS on Text
 def run_usas_on_text(page):
+    # We exclude the following components as we do not need them.
+    nlp = spacy.load('zh_core_web_sm', exclude=['parser', 'ner'])
+    # Load the Chinese PyMUSAS rule-based tagger in a separate spaCy pipeline
+    chinese_tagger_pipeline = spacy.load('cmn_dual_upos2usas_contextual')
+    # Adds the Chinese PyMUSAS rule-based tagger to the main spaCy pipeline
+    nlp.add_pipe('pymusas_rule_based_tagger', source=chinese_tagger_pipeline)
+
+    output_doc = nlp(page)
+
+    print(f'Text\tPOS\tUSAS Tags')
+    for token in output_doc:
+        print(f'{token.text}\t{token.pos_}\t{token._.pymusas_tags}')
 
 
 
-- 
GitLab