diff --git a/api/api_functions.py b/api/api_functions.py
index 8f2984856e0e2f6e10aa04656112e1025ac95a9c..2b83b6da513fb4a19dec90c8347fb2b0043e1678 100644
--- a/api/api_functions.py
+++ b/api/api_functions.py
@@ -4,6 +4,8 @@ from func.ner.ner import *
 from func.sentiment.sentiment import *
 from func.translation.translation import run_translation_on_text
 from func.usas.usas import *
+from func.collocation.collocation import *
+from func.concordance.concordance import *
 
 
 # Perform NER on a file
@@ -51,4 +53,21 @@ def get_sentiment_for_data(page):
     if result["code"] == "SUCCESS":
         return make_response(jsonify(result), 201)
 
-    return make_response(jsonify(result), 400)
\ No newline at end of file
+    return make_response(jsonify(result), 400)
+
+def get_collocation_for_data(page):
+    result = run_collocation_on_text(page)
+
+    if result["code"] == "SUCCESS":
+        return make_response(jsonify(result), 201)
+
+    return make_response(jsonify(result), 400)
+
+def get_concordance_for_data(page):
+    result = run_concordance_on_text(page)
+
+    if result["code"] == "SUCCESS":
+        return make_response(jsonify(result), 201)
+
+    return make_response(jsonify(result), 400)
+
diff --git a/func/collocation/collocation.py b/func/collocation/collocation.py
new file mode 100644
index 0000000000000000000000000000000000000000..85db37eb60b4dbec08d062d9db9d5f883b0c69a6
--- /dev/null
+++ b/func/collocation/collocation.py
@@ -0,0 +1,64 @@
+import spacy
+import math
+from collections import Counter, defaultdict
+from shared.translate import translate
+
+#page = 'ä¸“ç²¾ç‰¹æ–°â€ä¼ä¸šï¼Œæ˜¯æŒ‡å…·æœ‰ä¸“ä¸šåŒ–ã€ç²¾ç»†åŒ–ã€ç‰¹è‰²åŒ–ã€æ–°é¢–åŒ–å››å¤§ç‰¹å¾çš„ä¸å°ä¼ä¸šã€‚åˆ›æ–°æ˜¯è¿™ç±»ä¼ä¸šçš„çµé‚ï¼Œè¶³å¤Ÿçš„ ç ”å‘è´¹ç”¨æŠ•å…¥åˆ™æ˜¯å¼€å±•åˆ›æ–°çš„é‡è¦ä¿éšœã€‚è®¸å¤šå°šå¤„åœ¨æˆé•¿æœŸçš„â€œä¸“ç²¾ç‰¹æ–°â€ä¼ä¸šï¼Œè¿‘æœŸæ™®ééé‡â€œé’±ç´§â€éš¾é¢˜ã€‚å¦‚ä½•é›†èš æ›´å¤šçš„èµ„é‡‘æŠ•å…¥ç ”å‘ã€ä¿æŒåˆ›æ–°é¢†å…ˆåœ°ä½æ˜¯è¿™äº›ä¼ä¸šè¿‘æ¥é¢ä¸´çš„æœ€å¤§çƒ¦æ¼ã€‚â€œä½œä¸ºä¸€å®¶æ–°ææ–™ç ”å‘å…¬å¸ï¼Œåˆ›æ–°æ˜¯æˆ‘ä»¬å‘å±•çš„é‡è¦é©±åŠ¨åŠ›ï¼Œåªæœ‰ç ”å‘æŠ•å…¥çš„ä¸æ–åŠ ç ï¼Œä¼ä¸šåˆ›æ–°å‘å±•çš„æ¥ä¼æ‰ä¸ä¼š é™é€Ÿã€‚â€æµ™æ±Ÿçœâ€œä¸“ç²¾ç‰¹æ–°â€ä¼ä¸šã€å®æ³¢åˆ›æ¶¦æ–°ææ–™æœ‰é™å…¬å¸è‘£äº‹é•¿å´æ™¯æ™–è¯´ï¼Œè¿‡åŽ»3å¹´ï¼Œä¼ä¸šåœ¨ç ”å‘æŠ•å…¥æ–¹é¢ä¸é—ä½™åŠ›ï¼Œç´¯è®¡æŠ•å…¥2500ä¸‡å…ƒï¼Œè¿™å¯¹ä¼ä¸šæ¥è¯´ä¸æ˜¯ä¸ªå°æ•°ç›®ã€‚ ä»Šå¹´æ–°å…´å¸‚åœºçš„ç ”å‘éœ€æ±‚ååˆ†è¿«åˆ‡ï¼Œæˆ‘ä»¬ä¸€ç›´æƒ³åŠ å¿« è¶…é«˜çº¯é’›åŠé’›åˆé‡‘ä¸è¯•ç”Ÿäº§çº¿é¡¹ç›® çš„ç ”å‘è¿›åº¦ï¼Œä½†è‹¦äºŽèµ„é‡‘ä¸ è¶³ã€‚ä»¤äººé«˜å…´çš„æ˜¯ï¼Œä»Šå¹´4æœˆ340ä¸‡å…ƒå˜é‡å¢žå€¼ç¨Žç•™æŠµç¨Žé¢çš„åˆ°è´¦ï¼Œæœ‰æ•ˆç¼“è§£äº†ä¼ä¸šçš„èµ„é‡‘åŽ‹åŠ›ï¼ŒåŠ å¿«äº†ä¼ä¸šçš„ç ”å‘ è¿›åº¦ã€‚â€å´æ™¯æ™–è¯´ï¼Œç›®å‰ï¼Œâ€œè¶…é«˜çº¯é’›åŠé’›åˆé‡‘ä¸è¯•ç”Ÿäº§çº¿é¡¹ç›®â€æ£åœ¨æœ‰åºæŽ¨è¿›ï¼Œä¸€æ—¦æŠ•äº§å°†ç¼“è§£åŠå¯¼ä½“äº§ä¸šçš„é«˜çº¯é’›åŽŸææ–™ä¾›åº”ä¸è¶³é—®é¢˜ï¼Œæå‡å›½äº§æº…å°„é¶æçš„å¸‚åœºç«žäº‰åŠ›'
+
+# Step 4: Calculate PMI for all Bigrams
+def calculate_pmi(bigram, p_bigram, p_word):
+    word1, word2 = bigram
+    return math.log2(max(p_bigram[bigram], 1e-10) / (max(p_word[word1], 1e-10) * max(p_word[word2], 1e-10)))
+
+def escape(token: str):
+    token = token.replace("&", " ")
+    token = token.replace("-", " ")
+    token = token.replace("<", " ")
+    token = token.replace(">", " ")
+    token = token.replace("\"", " ")
+    token = token.replace("'", " ")
+    token = token.strip()
+    return token
+
+def run_collocation_on_text(page):
+    corpus = []
+
+    nlp = spacy.load('zh_core_web_sm')
+    doc = nlp(page)
+    for token in doc:
+        if not token.is_stop:
+
+            #print(token.text, token.lemma_, token.pos_, token.tag_, token.dep_,token.shape_, token.is_alpha, token.is_stop)
+            corpus.append(escape(token.text.lower()))
+
+    # Step 2: Calculate Frequencies
+    word_freq = Counter(corpus)
+    bigram_freq = Counter(zip(corpus[:-1], corpus[1:]))
+
+    # Step 3: Calculate Probabilities
+    total_words = len(corpus)
+    p_word = defaultdict(float)
+    p_bigram = defaultdict(float)
+
+    for word, freq in word_freq.items():
+        p_word[word] = freq / total_words
+
+    total_bigrams = len(corpus) - 1
+    all_pmi_scores = []
+    for bigram, freq in bigram_freq.items():
+        p_bigram[bigram] = freq / total_bigrams
+        bigramstr = bigram[0]+' '+bigram[1]
+        translation = translate(bigramstr).text.lower()
+        pmi = calculate_pmi(bigram, p_bigram, p_word)
+
+        all_pmi_scores.append({"0 Term": bigramstr,"1 Translation":translation ,"2 PMI Score": round(pmi,3)})
+
+
+
+    all_pmi_score = sorted(all_pmi_scores, key=lambda x: x["2 PMI Score"], reverse=True)
+    all_pmi_score = all_pmi_score[slice(40)]
+    result = {'output': all_pmi_score, 'message': 'Done', 'code': 'SUCCESS'}
+    print("PMI Scores:", all_pmi_scores)
+
+    return result
+
diff --git a/func/concordance/concordance.py b/func/concordance/concordance.py
new file mode 100644
index 0000000000000000000000000000000000000000..966a1a6d589183ca7d94aa6071afc9c63bd5d351
--- /dev/null
+++ b/func/concordance/concordance.py
@@ -0,0 +1,116 @@
+import spacy
+import math
+from collections import Counter, defaultdict
+from shared.translate import translate
+from wasabi import Printer
+from spacy.matcher import PhraseMatcher
+import re
+
+#page = 'ä¸“ç²¾ç‰¹æ–°â€ä¼ä¸šï¼Œæ˜¯æŒ‡å…·æœ‰ä¸“ä¸šåŒ–ã€ç²¾ç»†åŒ–ã€ç‰¹è‰²åŒ–ã€æ–°é¢–åŒ–å››å¤§ç‰¹å¾çš„ä¸å°ä¼ä¸šã€‚åˆ›æ–°æ˜¯è¿™ç±»ä¼ä¸šçš„çµé‚ï¼Œè¶³å¤Ÿçš„ ç ”å‘è´¹ç”¨æŠ•å…¥åˆ™æ˜¯å¼€å±•åˆ›æ–°çš„é‡è¦ä¿éšœã€‚è®¸å¤šå°šå¤„åœ¨æˆé•¿æœŸçš„â€œä¸“ç²¾ç‰¹æ–°â€ä¼ä¸šï¼Œè¿‘æœŸæ™®ééé‡â€œé’±ç´§â€éš¾é¢˜ã€‚å¦‚ä½•é›†èš æ›´å¤šçš„èµ„é‡‘æŠ•å…¥ç ”å‘ã€ä¿æŒåˆ›æ–°é¢†å…ˆåœ°ä½æ˜¯è¿™äº›ä¼ä¸šè¿‘æ¥é¢ä¸´çš„æœ€å¤§çƒ¦æ¼ã€‚â€œä½œä¸ºä¸€å®¶æ–°ææ–™ç ”å‘å…¬å¸ï¼Œåˆ›æ–°æ˜¯æˆ‘ä»¬å‘å±•çš„é‡è¦é©±åŠ¨åŠ›ï¼Œåªæœ‰ç ”å‘æŠ•å…¥çš„ä¸æ–åŠ ç ï¼Œä¼ä¸šåˆ›æ–°å‘å±•çš„æ¥ä¼æ‰ä¸ä¼š é™é€Ÿã€‚â€æµ™æ±Ÿçœâ€œä¸“ç²¾ç‰¹æ–°â€ä¼ä¸šã€å®æ³¢åˆ›æ¶¦æ–°ææ–™æœ‰é™å…¬å¸è‘£äº‹é•¿å´æ™¯æ™–è¯´ï¼Œè¿‡åŽ»3å¹´ï¼Œä¼ä¸šåœ¨ç ”å‘æŠ•å…¥æ–¹é¢ä¸é—ä½™åŠ›ï¼Œç´¯è®¡æŠ•å…¥2500ä¸‡å…ƒï¼Œè¿™å¯¹ä¼ä¸šæ¥è¯´ä¸æ˜¯ä¸ªå°æ•°ç›®ã€‚ ä»Šå¹´æ–°å…´å¸‚åœºçš„ç ”å‘éœ€æ±‚ååˆ†è¿«åˆ‡ï¼Œæˆ‘ä»¬ä¸€ç›´æƒ³åŠ å¿« è¶…é«˜çº¯é’›åŠé’›åˆé‡‘ä¸è¯•ç”Ÿäº§çº¿é¡¹ç›® çš„ç ”å‘è¿›åº¦ï¼Œä½†è‹¦äºŽèµ„é‡‘ä¸ è¶³ã€‚ä»¤äººé«˜å…´çš„æ˜¯ï¼Œä»Šå¹´4æœˆ340ä¸‡å…ƒå˜é‡å¢žå€¼ç¨Žç•™æŠµç¨Žé¢çš„åˆ°è´¦ï¼Œæœ‰æ•ˆç¼“è§£äº†ä¼ä¸šçš„èµ„é‡‘åŽ‹åŠ›ï¼ŒåŠ å¿«äº†ä¼ä¸šçš„ç ”å‘ è¿›åº¦ã€‚â€å´æ™¯æ™–è¯´ï¼Œç›®å‰ï¼Œâ€œè¶…é«˜çº¯é’›åŠé’›åˆé‡‘ä¸è¯•ç”Ÿäº§çº¿é¡¹ç›®â€æ£åœ¨æœ‰åºæŽ¨è¿›ï¼Œä¸€æ—¦æŠ•äº§å°†ç¼“è§£åŠå¯¼ä½“äº§ä¸šçš„é«˜çº¯é’›åŽŸææ–™ä¾›åº”ä¸è¶³é—®é¢˜ï¼Œæå‡å›½äº§æº…å°„é¶æçš„å¸‚åœºç«žäº‰åŠ›'
+
+def calculate_pmi(bigram, p_bigram, p_word):
+    word1, word2 = bigram
+    return math.log2(max(p_bigram[bigram], 1e-10) / (max(p_word[word1], 1e-10) * max(p_word[word2], 1e-10)))
+
+def escape(token: str):
+    token = token.replace("&", " ")
+    token = token.replace("-", " ")
+    token = token.replace("<", " ")
+    token = token.replace(">", " ")
+    token = token.replace("\"", " ")
+    token = token.replace("'", " ")
+    token = token.strip()
+    return token
+
+def collocations(doc):
+
+    corpus = []
+
+    for token in doc:
+        if not token.is_stop:
+            corpus.append(escape(token.text.lower()))
+
+    # Step 2: Calculate Frequencies
+    word_freq = Counter(corpus)
+    bigram_freq = Counter(zip(corpus[:-1], corpus[1:]))
+
+    # Step 3: Calculate Probabilities
+    total_words = len(corpus)
+    p_word = defaultdict(float)
+    p_bigram = defaultdict(float)
+
+    for word, freq in word_freq.items():
+        p_word[word] = freq / total_words
+
+    total_bigrams = len(corpus) - 1
+    all_pmi_scores = []
+    for bigram, freq in bigram_freq.items():
+        p_bigram[bigram] = freq / total_bigrams
+        bigramstr = bigram[0] + ' ' + bigram[1]
+
+        pmi = calculate_pmi(bigram, p_bigram, p_word)
+
+        all_pmi_scores.append({"0 Term": bigramstr, "2 PMI Score": round(pmi, 3)})
+
+    all_pmi_score = sorted(all_pmi_scores, key=lambda x: x["2 PMI Score"], reverse=True)
+    all_pmi_score = all_pmi_score[slice(40)]
+    terms = [item.get('0 Term') for item in all_pmi_score]
+
+
+    return  terms
+
+def clean(text):
+    ansi_escape = re.compile(r'''
+        \x1B  # ESC
+        (?:   # 7-bit C1 Fe (except CSI)
+            [@-Z\\-_]
+        |     # or [ for CSI, followed by a control sequence
+            \[
+            [0-?]*  # Parameter bytes
+            [ -/]*  # Intermediate bytes
+            [@-~]   # Final byte
+        )
+    ''', re.VERBOSE)
+    cltext = ansi_escape.sub('', text)
+
+    return str(cltext)
+
+def run_concordance_on_text(page):
+    nlp = spacy.load('zh_core_web_sm')
+    doc = nlp(page)
+    terms = collocations(doc)
+    concordances = []
+    matcher = PhraseMatcher(nlp.vocab,attr='LOWER')
+    patterns = [nlp.make_doc(term) for term in terms]
+    matcher.add("TermCollocations", patterns)
+
+    matches = matcher(doc)
+    match = Printer()
+    for i, start, end in matches:
+        perecedingSlice = clean(doc[start - 7: start].text)
+
+
+        perecedingSliceTr = clean(translate(doc[start - 7: start]).text)
+        matchedTerm = clean(match.text(doc[start:end].text, color='red', no_print=True))
+        #matchedTerm = doc[start:end].text
+        matchedTermTr = clean(match.text(translate(doc[start:end].text).text, color='red', no_print=True))
+        #matchedTermTr = match.text(translate(doc[start:end].text).text)
+        followingSlice = clean(doc[end:end + 7].text)
+        followingSliceTr = clean(translate(doc[end:end + 7]).text)
+
+        context = perecedingSlice+', '+matchedTerm+', '+followingSlice
+
+        contextTr = perecedingSliceTr+', '+matchedTermTr+', '+followingSliceTr
+        #concordances.append({"0 Term": escapeAnscii(matchedTerm), "1 Eng": escapeAnscii(matchedTermTr), "2 Context":escapeAnscii(context), "3 Context Eng":escapeAnscii(contextTr)})
+        concordances.append({"0 Term": matchedTerm, "1 Eng": matchedTermTr,"2 Context": context, "3 Context Eng": contextTr})
+
+    print(concordances)
+    result = {'output': concordances, 'message': 'Done', 'code': 'SUCCESS'}
+
+    return result
+
+#def main():
+#    result = run_concordance_on_text(page)
+
+#main()
+
diff --git a/func/sentiment/sentiment.py b/func/sentiment/sentiment.py
index a7c5076365c9208c028653fde23458a0e610a72c..34d4fda3cde5f283ce0770779854d41f2d18fbe5 100644
--- a/func/sentiment/sentiment.py
+++ b/func/sentiment/sentiment.py
@@ -25,9 +25,12 @@ def run_sentiment_on_text(page):
         res = nlp(p)[0]['label']
         counts[res] = counts.get(res, 0) + 1
 
+    if 'negative' not in counts.keys():
+        counts['negative'] = 0
+
     sentiments = []
     for k in counts.keys():
-        sentiments.append({"Sentiment": k, "Count": counts[k]})
+        sentiments.append({"0 Sentiment": k, "1 Count": counts[k]})
 
     result = {'output': sentiments, 'message': 'Done', 'code': 'SUCCESS'}
     return result
diff --git a/main.py b/main.py
index 4a14e71ad676ae89f5f0b84675b9ae2d26e3f5cf..5eec639603704766370978254d88bf5b2942a3a3 100644
--- a/main.py
+++ b/main.py
@@ -53,3 +53,21 @@ def sentiment():
     result = get_sentiment_for_data(page)
 
     return result
+
+@app.route("/collocation", methods=['POST'])
+def collocation():
+
+    request_data = request.get_json()
+    page = request_data['page']
+    result = get_collocation_for_data(page)
+
+    return result
+
+@app.route("/concordance", methods=['POST'])
+def concordance():
+
+    request_data = request.get_json()
+    page = request_data['page']
+    result = get_concordance_for_data(page)
+
+    return result
diff --git a/shared/translate.py b/shared/translate.py
index 5c993a10052f608854fb410b31df248266e2d65c..d269dd1acd35cfa46b009d4527360251b63e6b88 100644
--- a/shared/translate.py
+++ b/shared/translate.py
@@ -1,4 +1,8 @@
 from googletrans import Translator
+import csv
+from os import listdir
+from os.path import isfile, join
+import time
 
 
 # wrapper for the googletrans library. Takes in chinese string returns english
@@ -8,3 +12,31 @@ def translate(word):
     result = translator.translate(word, src='zh-cn', dest='en')
 
     return result
+
+
+def get_csv(fileName):
+    with open(fileName, newline='') as csvfile:
+        data = list(csv.reader(csvfile))
+
+    return data
+
+def list_files(directory):
+    for f in listdir(directory):
+        if isfile(join(directory, f)):
+            print(f)
+            file = get_csv(directory + "/" +f)
+
+            transfile = []
+
+            for line in file:
+                try:
+                    translation = translate(line[0])
+                    transfile.append(translation.text)
+                    time.sleep(1)
+                except Exception:
+                    pass
+
+            with open(directory + "/" + "trans" +f , 'w', newline='') as myfile:
+                wr = csv.writer(myfile, quoting=csv.QUOTE_ALL)
+                wr.writerow(transfile)
+