diff --git a/api/api_functions.py b/api/api_functions.py
index 8f2984856e0e2f6e10aa04656112e1025ac95a9c..2b83b6da513fb4a19dec90c8347fb2b0043e1678 100644
--- a/api/api_functions.py
+++ b/api/api_functions.py
@@ -4,6 +4,8 @@ from func.ner.ner import *
 from func.sentiment.sentiment import *
 from func.translation.translation import run_translation_on_text
 from func.usas.usas import *
+from func.collocation.collocation import *
+from func.concordance.concordance import *
 
 
 # Perform NER on a file
@@ -51,4 +53,21 @@ def get_sentiment_for_data(page):
     if result["code"] == "SUCCESS":
         return make_response(jsonify(result), 201)
 
-    return make_response(jsonify(result), 400)
\ No newline at end of file
+    return make_response(jsonify(result), 400)
+
+def get_collocation_for_data(page):
+    result = run_collocation_on_text(page)
+
+    if result["code"] == "SUCCESS":
+        return make_response(jsonify(result), 201)
+
+    return make_response(jsonify(result), 400)
+
+def get_concordance_for_data(page):
+    result = run_concordance_on_text(page)
+
+    if result["code"] == "SUCCESS":
+        return make_response(jsonify(result), 201)
+
+    return make_response(jsonify(result), 400)
+
diff --git a/func/collocation/collocation.py b/func/collocation/collocation.py
new file mode 100644
index 0000000000000000000000000000000000000000..85db37eb60b4dbec08d062d9db9d5f883b0c69a6
--- /dev/null
+++ b/func/collocation/collocation.py
@@ -0,0 +1,64 @@
+import spacy
+import math
+from collections import Counter, defaultdict
+from shared.translate import translate
+
+#page = '专精特新”企业,是指具有专业化、精细化、特色化、新颖化四大特征的中小企业。创新是这类企业的灵魂,足够的 研发费用投入则是开展创新的重要保障。许多尚处在成长期的“专精特新”企业,近期普遍遭遇“钱紧”难题。如何集聚 更多的资金投入研发、保持创新领先地位是这些企业近来面临的最大烦恼。“作为一家新材料研发公司,创新是我们发展的重要驱动力,只有研发投入的不断加码,企业创新发展的步伐才不会 降速。”浙江省“专精特新”企业、宁波创润新材料有限公司董事长吴景晖说,过去3年,企业在研发投入方面不遗余力,累计投入2500万元,这对企业来说不是个小数目。 今年新兴市场的研发需求十分迫切,我们一直想加快 超高纯钛及钛合金中试生产线项目 的研发进度,但苦于资金不 足。令人高兴的是,今年4月340万元存量增值税留抵税额的到账,有效缓解了企业的资金压力,加快了企业的研发 进度。”吴景晖说,目前,“超高纯钛及钛合金中试生产线项目”正在有序推进,一旦投产将缓解半导体产业的高纯钛原材料供应不足问题,提升国产溅射靶材的市场竞争力'
+
+# Step 4: Calculate PMI for all Bigrams
+def calculate_pmi(bigram, p_bigram, p_word):
+    word1, word2 = bigram
+    return math.log2(max(p_bigram[bigram], 1e-10) / (max(p_word[word1], 1e-10) * max(p_word[word2], 1e-10)))
+
+def escape(token: str):
+    token = token.replace("&", " ")
+    token = token.replace("-", " ")
+    token = token.replace("<", " ")
+    token = token.replace(">", " ")
+    token = token.replace("\"", " ")
+    token = token.replace("'", " ")
+    token = token.strip()
+    return token
+
+def run_collocation_on_text(page):
+    corpus = []
+
+    nlp = spacy.load('zh_core_web_sm')
+    doc = nlp(page)
+    for token in doc:
+        if not token.is_stop:
+
+            #print(token.text, token.lemma_, token.pos_, token.tag_, token.dep_,token.shape_, token.is_alpha, token.is_stop)
+            corpus.append(escape(token.text.lower()))
+
+    # Step 2: Calculate Frequencies
+    word_freq = Counter(corpus)
+    bigram_freq = Counter(zip(corpus[:-1], corpus[1:]))
+
+    # Step 3: Calculate Probabilities
+    total_words = len(corpus)
+    p_word = defaultdict(float)
+    p_bigram = defaultdict(float)
+
+    for word, freq in word_freq.items():
+        p_word[word] = freq / total_words
+
+    total_bigrams = len(corpus) - 1
+    all_pmi_scores = []
+    for bigram, freq in bigram_freq.items():
+        p_bigram[bigram] = freq / total_bigrams
+        bigramstr = bigram[0]+' '+bigram[1]
+        translation = translate(bigramstr).text.lower()
+        pmi = calculate_pmi(bigram, p_bigram, p_word)
+
+        all_pmi_scores.append({"0 Term": bigramstr,"1 Translation":translation ,"2 PMI Score": round(pmi,3)})
+
+
+
+    all_pmi_score = sorted(all_pmi_scores, key=lambda x: x["2 PMI Score"], reverse=True)
+    all_pmi_score = all_pmi_score[slice(40)]
+    result = {'output': all_pmi_score, 'message': 'Done', 'code': 'SUCCESS'}
+    print("PMI Scores:", all_pmi_scores)
+
+    return result
+
diff --git a/func/concordance/concordance.py b/func/concordance/concordance.py
new file mode 100644
index 0000000000000000000000000000000000000000..966a1a6d589183ca7d94aa6071afc9c63bd5d351
--- /dev/null
+++ b/func/concordance/concordance.py
@@ -0,0 +1,116 @@
+import spacy
+import math
+from collections import Counter, defaultdict
+from shared.translate import translate
+from wasabi import Printer
+from spacy.matcher import PhraseMatcher
+import re
+
+#page = '专精特新”企业,是指具有专业化、精细化、特色化、新颖化四大特征的中小企业。创新是这类企业的灵魂,足够的 研发费用投入则是开展创新的重要保障。许多尚处在成长期的“专精特新”企业,近期普遍遭遇“钱紧”难题。如何集聚 更多的资金投入研发、保持创新领先地位是这些企业近来面临的最大烦恼。“作为一家新材料研发公司,创新是我们发展的重要驱动力,只有研发投入的不断加码,企业创新发展的步伐才不会 降速。”浙江省“专精特新”企业、宁波创润新材料有限公司董事长吴景晖说,过去3年,企业在研发投入方面不遗余力,累计投入2500万元,这对企业来说不是个小数目。 今年新兴市场的研发需求十分迫切,我们一直想加快 超高纯钛及钛合金中试生产线项目 的研发进度,但苦于资金不 足。令人高兴的是,今年4月340万元存量增值税留抵税额的到账,有效缓解了企业的资金压力,加快了企业的研发 进度。”吴景晖说,目前,“超高纯钛及钛合金中试生产线项目”正在有序推进,一旦投产将缓解半导体产业的高纯钛原材料供应不足问题,提升国产溅射靶材的市场竞争力'
+
+def calculate_pmi(bigram, p_bigram, p_word):
+    word1, word2 = bigram
+    return math.log2(max(p_bigram[bigram], 1e-10) / (max(p_word[word1], 1e-10) * max(p_word[word2], 1e-10)))
+
+def escape(token: str):
+    token = token.replace("&", " ")
+    token = token.replace("-", " ")
+    token = token.replace("<", " ")
+    token = token.replace(">", " ")
+    token = token.replace("\"", " ")
+    token = token.replace("'", " ")
+    token = token.strip()
+    return token
+
+def collocations(doc):
+
+    corpus = []
+
+    for token in doc:
+        if not token.is_stop:
+            corpus.append(escape(token.text.lower()))
+
+    # Step 2: Calculate Frequencies
+    word_freq = Counter(corpus)
+    bigram_freq = Counter(zip(corpus[:-1], corpus[1:]))
+
+    # Step 3: Calculate Probabilities
+    total_words = len(corpus)
+    p_word = defaultdict(float)
+    p_bigram = defaultdict(float)
+
+    for word, freq in word_freq.items():
+        p_word[word] = freq / total_words
+
+    total_bigrams = len(corpus) - 1
+    all_pmi_scores = []
+    for bigram, freq in bigram_freq.items():
+        p_bigram[bigram] = freq / total_bigrams
+        bigramstr = bigram[0] + ' ' + bigram[1]
+
+        pmi = calculate_pmi(bigram, p_bigram, p_word)
+
+        all_pmi_scores.append({"0 Term": bigramstr, "2 PMI Score": round(pmi, 3)})
+
+    all_pmi_score = sorted(all_pmi_scores, key=lambda x: x["2 PMI Score"], reverse=True)
+    all_pmi_score = all_pmi_score[slice(40)]
+    terms = [item.get('0 Term') for item in all_pmi_score]
+
+
+    return  terms
+
+def clean(text):
+    ansi_escape = re.compile(r'''
+        \x1B  # ESC
+        (?:   # 7-bit C1 Fe (except CSI)
+            [@-Z\\-_]
+        |     # or [ for CSI, followed by a control sequence
+            \[
+            [0-?]*  # Parameter bytes
+            [ -/]*  # Intermediate bytes
+            [@-~]   # Final byte
+        )
+    ''', re.VERBOSE)
+    cltext = ansi_escape.sub('', text)
+
+    return str(cltext)
+
+def run_concordance_on_text(page):
+    nlp = spacy.load('zh_core_web_sm')
+    doc = nlp(page)
+    terms = collocations(doc)
+    concordances = []
+    matcher = PhraseMatcher(nlp.vocab,attr='LOWER')
+    patterns = [nlp.make_doc(term) for term in terms]
+    matcher.add("TermCollocations", patterns)
+
+    matches = matcher(doc)
+    match = Printer()
+    for i, start, end in matches:
+        perecedingSlice = clean(doc[start - 7: start].text)
+
+
+        perecedingSliceTr = clean(translate(doc[start - 7: start]).text)
+        matchedTerm = clean(match.text(doc[start:end].text, color='red', no_print=True))
+        #matchedTerm = doc[start:end].text
+        matchedTermTr = clean(match.text(translate(doc[start:end].text).text, color='red', no_print=True))
+        #matchedTermTr = match.text(translate(doc[start:end].text).text)
+        followingSlice = clean(doc[end:end + 7].text)
+        followingSliceTr = clean(translate(doc[end:end + 7]).text)
+
+        context = perecedingSlice+', '+matchedTerm+', '+followingSlice
+
+        contextTr = perecedingSliceTr+', '+matchedTermTr+', '+followingSliceTr
+        #concordances.append({"0 Term": escapeAnscii(matchedTerm), "1 Eng": escapeAnscii(matchedTermTr), "2 Context":escapeAnscii(context), "3 Context Eng":escapeAnscii(contextTr)})
+        concordances.append({"0 Term": matchedTerm, "1 Eng": matchedTermTr,"2 Context": context, "3 Context Eng": contextTr})
+
+    print(concordances)
+    result = {'output': concordances, 'message': 'Done', 'code': 'SUCCESS'}
+
+    return result
+
+#def main():
+#    result = run_concordance_on_text(page)
+
+#main()
+
diff --git a/func/sentiment/sentiment.py b/func/sentiment/sentiment.py
index a7c5076365c9208c028653fde23458a0e610a72c..34d4fda3cde5f283ce0770779854d41f2d18fbe5 100644
--- a/func/sentiment/sentiment.py
+++ b/func/sentiment/sentiment.py
@@ -25,9 +25,12 @@ def run_sentiment_on_text(page):
         res = nlp(p)[0]['label']
         counts[res] = counts.get(res, 0) + 1
 
+    if 'negative' not in counts.keys():
+        counts['negative'] = 0
+
     sentiments = []
     for k in counts.keys():
-        sentiments.append({"Sentiment": k, "Count": counts[k]})
+        sentiments.append({"0 Sentiment": k, "1 Count": counts[k]})
 
     result = {'output': sentiments, 'message': 'Done', 'code': 'SUCCESS'}
     return result
diff --git a/main.py b/main.py
index 4a14e71ad676ae89f5f0b84675b9ae2d26e3f5cf..5eec639603704766370978254d88bf5b2942a3a3 100644
--- a/main.py
+++ b/main.py
@@ -53,3 +53,21 @@ def sentiment():
     result = get_sentiment_for_data(page)
 
     return result
+
+@app.route("/collocation", methods=['POST'])
+def collocation():
+
+    request_data = request.get_json()
+    page = request_data['page']
+    result = get_collocation_for_data(page)
+
+    return result
+
+@app.route("/concordance", methods=['POST'])
+def concordance():
+
+    request_data = request.get_json()
+    page = request_data['page']
+    result = get_concordance_for_data(page)
+
+    return result
diff --git a/shared/translate.py b/shared/translate.py
index 5c993a10052f608854fb410b31df248266e2d65c..d269dd1acd35cfa46b009d4527360251b63e6b88 100644
--- a/shared/translate.py
+++ b/shared/translate.py
@@ -1,4 +1,8 @@
 from googletrans import Translator
+import csv
+from os import listdir
+from os.path import isfile, join
+import time
 
 
 # wrapper for the googletrans library. Takes in chinese string returns english
@@ -8,3 +12,31 @@ def translate(word):
     result = translator.translate(word, src='zh-cn', dest='en')
 
     return result
+
+
+def get_csv(fileName):
+    with open(fileName, newline='') as csvfile:
+        data = list(csv.reader(csvfile))
+
+    return data
+
+def list_files(directory):
+    for f in listdir(directory):
+        if isfile(join(directory, f)):
+            print(f)
+            file = get_csv(directory + "/" +f)
+
+            transfile = []
+
+            for line in file:
+                try:
+                    translation = translate(line[0])
+                    transfile.append(translation.text)
+                    time.sleep(1)
+                except Exception:
+                    pass
+
+            with open(directory + "/" + "trans" +f , 'w', newline='') as myfile:
+                wr = csv.writer(myfile, quoting=csv.QUOTE_ALL)
+                wr.writerow(transfile)
+