From 24abc819d3a7659399167ddb0e496457bd252807 Mon Sep 17 00:00:00 2001
From: Tom Edwards <edwardstj1@cardiff.ac.uk>
Date: Wed, 26 Mar 2025 09:54:52 +0000
Subject: [PATCH] demo branch

---
 db/datasets.db                  | Bin 1445888 -> 1445888 bytes
 func/collocation/collocation.py |  27 ++++++++++++++++++++++-----
 func/concordance/concordance.py |  17 +++++++++++++----
 func/translation/translation.py |   1 +
 4 files changed, 36 insertions(+), 9 deletions(-)

diff --git a/db/datasets.db b/db/datasets.db
index 8489e3980dc54d847378b5dff7902c70befe1462..cde875a2ec60e84f813b1fc79f999e817796427e 100644
GIT binary patch
delta 5581
zcma)A?Q<1Z8Q;Bk_uk~*-0aO;OLA{W=m;%E>)H1`Kp+(Qf}}K1gkYhh5-KeUmr?;+
zD51Lpv|}{{%^k;%hUygCY8yy%*AKq5opHoZ#=oF2ezBudnSK)@_&b}JoPBY|4}12^
zy}$FE=RD{4yzTJ#!0`CMNU89tAc#-nc>>Rh^Y_`}r<VtR^5w=<mn;avawUgR%w1?8
zUwA<Io^nokMLD4yRVvDJ%8>GuGN5c#`jm&2US*|XC=V$2DfcL4<u0YD<R(?}e&eaX
zd5a-H^Ex3V>y<Gd^GcXM=`F%o^4c)=dd(Qud5!q@122!c<zZXV^y=};@4YPMf+u1A
znkQo3?Y@k0vHK!^`P@B*@sRrh#v|@w{C=-{5c86I0OQG1?tZ)q?lbsfjk_1~cilmZ
zC){25<q>x$=5M$Im~V7<V7}emhVf%}3&wVLGse^ICX74WM=^G~4`Hl#*JCWZ4`RIG
z_F}y5u7!l9?rO|scU1welrSJjzYxTM3o7aBJlE&0$_nb;JtOqsp`m?WQf)&sO;x3a
zZ5h>z_hd`{W7F~?X>qO3T?t@&&p+%qkXA2ll1u*ep{CA!O8EJk{oQZ%*B3`CvM-Z{
z)#;MEKG@^)Jh#-;sX;BhdU3Q+@{j$j>0E<I-y9)hy`q#$r;9><B0HEa;;vHZGPHaq
zs->kl(ABZ3e`%^)H2MQ+R!?@IeQ~I*>lWbLff#To0jS~pj>Z8;N90L)`qtW7B_$Ux
z$=%{$eO$ZEQAwsnOhdC0w0kT8YG|sa!ie+cW+(280ZmG6+oItUk5yVTer80xtwhbn
zj(8v{#DXPEOCwHXx91XohD{wcyc?7GYywoLwxQdc%&}sfoXw01`Hh)TT;iEb=c~H=
zAXc{bjLIPzfNWS<5kZ5~f6t$i7;s%2Xc!jJ3_1^VrMi^3OpwOKWsu(?cAh!vJ``y7
zdV!a3JGup%onBSf;s3TlT%Yb7+vRQufWO?y15MqanpNF0B6s+^52u&snO_Tj=1S(@
zg-L_l5l5%xv+_0Zl6);i=7{Ze27)+ZQ<V_SQ3<%VK<w>jy3=R6rCPTvkz6|Uc0lgb
z+tB;RDR$g#vE$kr{Da*RXBU|MiC{b4S8YuL%`C#1p54*bI8b$T(}2G$uxxY$t`^p&
z3~e5bX=7SCR@3Ssu~447B<%4>Y!A~?9gBwChbi-L9B5KUcO0JbV;|N_XC;ITsgjvF
zG&&uN0wqX#Au7qzZ4LIWUf!DOT!;Wi5@5t|Y&B}8=i@+&T9!t6_h6d&(v6HrBA(CA
zb~r(TP1)sfhGxJ@9BAm2#C1B908}kvYkc^sk*qYF<nX(qt4-T54Qi`VeV$AJYNkag
zZzC3mzLo$aND+4R{PJRX*64w^3;pm$q5pzPHn0QlkFEoWsBRL%hY9A4-w1%5GcEuZ
zoUt~YWxf**y=<FJ(QQl<F0ZL1df=}l08NuxMo1q>+?W7VDIuYlSu55hUVtY?S39bz
z8>VAN^znoQAk-!%?-lHXr{ciC{)7k4l61Uo=Kax1%yl5eE5)Q}DlWu{62t3A0BS0n
zl@A?EVtWEm)h*;Qj{38D4c5<e)haD=(U(W+-&@f!5M658h|-#E^W>|+?<&63-y~vL
zZ|=_Hm!#fQu|X2AB7WxfXBLT95kY?~G8+xX1=^}*P=W$uKC3d|o&+GVspatA)%nT>
z_bC9|d%SxWQiN^kR7Fno!TS}#uDj#H<}lo@wTYZG>`UE^e@d6Ud?59ohv%g_s!53h
zRsHeN!rcwxJGI`tG@d|oFB{#=Q4Qoa&ERdu?j;ikBIgswi2~&Wpk`^B9*Ta0GU|&P
z`I}vhtb9!Hz%5ZRrmdQ)PK>DS`(r@l3&cQV-S8zn9tYx%RMci15X?e8?r!kq?!ui<
za%t%zijDdK=^{vRA*dqOLQJIG&~1WJOXt#R*H8DlYXac!_RZglX=)TlFbp{3`6;>V
zZ=1+m5trcni;*CE;&SKUoZMQmQ65CT7bF6OmPuXy25)h&@UsO@p;}N`v@?;y4mXP(
z{<B8?SYG^5ZMadc_;OL~PnCLuMN>yO<qQ>7s-<gHb#<!4f2~=3sW2rgcUT2DBs56b
z57I-}b9<T*{uq{r%6p_t0~LcAVS{@#0jN_G`IR$z=h@@#w*!T~ugohnmxNW-U$U$5
zuW4{ynfdeoV2dY3w{buf%^^`w_0s<@Fck-CR5f*z=ik^wE;E%W-rh4Q4ZoK!dFALL
zu5g<V7^wYyxl+8<b!GTUzQgNESQA`dxJI50dHn_2<o8@I{_(RUXKss%ax|jakpRe?
zxito4F`F0~AN(1xB>`wsLkj_!75vOZ)6K52i8hudhFB@e)8)mO7(#XG7_3v9w+myz
zcK~ef;aK1h$3X3?Q-8@!>XU$b$K%3S#Ig-O*v{&7t?9Nh7us3hHy7H=2*fW38*zac
z$JZ$OIUhye7lSx_GPVyOre&CXykO37A_gSrsZ<@V#GMYnmlA*uLah@DjYv>aQZrrG
zu{zN4X3M-wpog|i<O)*lA3<mQ{CV;9MzQX#{^sJ?wR}>3b`M5(M5pFhCQ2qwHC6+E
zD*=eqWL2k|>e@q%@Hdl?AS^2Rh11TmwbCq3XN8Xhxk>owf=Z?sqw1r}k*h2g5<>;k
z4-a^C0q~t;^QT8<LR8mmT2)uJmi&!>X!%WUQXc+QzTL|u2z4w?udZ9tT=x5><#ve`
zlfFFIa;vL$BOR0sEqB(=H7c2Z{kLs`CH6*TS(;^_P!1JptiSp?fT2VHpl(z4^&s3|
zJ)IzN4#-(7aH4B1H<cpGh_m%bc95?}yymDN2NjfK+M(M-nc+1Bs)g7MbD%jQ^JRut
zOb})f^bY(zqEz>kI1r@`I<jzp3i`fSCG9mPthP<03Vs_#j6wns-IW&V=NPad4zz51
zDF}6;wMuJ`==U_WUft}=ABv9Hu@lSMdw6e}uMRnHYQ{pTILnKTAzti=Zj0&?eIky+
z@t%c)?Fm3)P)hmu#0d1&KqLOc1x`y)G!l+LwUO5Bh<raNLHvg9Hbvfj5_3>QT!Z(!
zcuufSdwAokqVNX$w1@Y0JRjp}$8#DF``CwXk^;UkiWKiMo*Q`B*UIboZ7JUD18ALp
zd}-Tv@1K;e%_z0OHmNr$%`A!@5NQS-x)$v*yt@;Cw#B;iu*tl;5`Y#yR)j+BtOrlD
H-P-dX0ZXYu

delta 20404
zcma)^d2FNUdDx{HX%5cL?Ck7rot??<Og7%E9Xq?EZY5pYX^glIoOqK0j%^_IrbvPy
zEz)rcBq@^SBgLUaU8HX6zDC!KMih0DB1LhL0u9pq<NjB)2;jv04*t>ZCP0y*HsJoA
z_xVIoGvhYEh(q#wkLP{v>t%TH>*2+(58b)`$8~k}zscXf;O|SrpK7oF<DC!Y-@Q?v
zX{@WOyY=E2uk~LZeIfYCz26KzxiNY_nE$={FGb(w>pzXY!|P`BXBhFh=x2HVwdiMf
z{q^Wi^Ew;-G_SX!pW^ju^le^$Jo;0-Mxt-=`Zv)}^7=;fP3Cq+e}eZPNAL0aTJ$d8
zycPX%-Zw_?9NxS7a^s!ooy&EhPrUc|kADB({?>2*OsKsj6zuG1>1gU`X-b8D_LVz(
zUtTQ)_8Q**!QGK?W8;-~>aM&4u%Ep0M)FspSD5<Rd-FecHnpue7z(wu1)EY!%QtRk
zf9HQ+p1+)J$XwmNcKdC!&roM|a8p~bGuYAA+L?{K{9<#%$afm|8i&6B?nLm#hHS&o
zEA=<`8o%@X&rgIeHf9??zVZHrJD)P!9sIjzR{+$1df|4q@9%H?$@TTdd-We&y%ekq
z+`94;mx2KM=B3w{{zLSx+5bP@sm|t{9ijG6XItvq_1AA_fAMp#<m&I%tuXYBf85aP
z{l6D|-M}<)2GiN9!T<Kkoi8>8ziIF6c&nv*-wTKNS6Q1V=N`Lb3vTZtd*zY);Lyp8
z`Ooa^YB@LU&g_>@W-3cZ?o8V08}~{>sh92qK4IxH?rh|_OFTQ*p51U}$L#F9o$Ipm
zV|JpXt9^7lP|mO0J$ZNffjhSAMzhYrsGVQ)PEPE2rnBQMzPE?s&f@VoD6Py`WhrmR
zAK6ETc5F4^_C$f)O)t5{VV2}u0Cy6r!0V1}l=BJQvodw)j%L}AlT~KA-IEm=oSt_R
znR33{neBCE^LBp0K8}JA+4<zLo7iR>7WrPdD-Z~H+pB^MYbNcE?bw3}XCk9}vz$G%
z%%SW;UKY)dnT@-_o83ET&lK(MSY^4}j>TEGoZE78kL<*NlUcWu!<E>9(-(GTx0zT8
zPkW^vdu6)PGYA^J-n^IHcJrlLK;2#Iv9q(}^*pxwHtc~3d*y*U)E%%#2A%>~>1H48
z+sSPm<o3?EyN~(D{H9<z*JBTD$kIpqV8z)z28*F!Fjy{5@yy-qcP5V9oxLht-h&-?
zdCy5&LvCK6)p_SglNoDilZAD1Kxdz=f4Cb8z0>}jjdKqxJxMpe)~3%C-ES)0%Ej4o
zF7J-dy1nc6%DSDf3_f@_-!EUy=Dmmz6o_QdT9E^mPfF!nSlD24+n&idy&1bWYR_hb
zpvzH+0D`@>h&{9D?k}>N;)?IRp(4ZJ9B*lW1)bEgot%6dws;#D-nP?oZ+~=b4S4Gh
z19tZwY*V{zW%{JDJjpDE+I@?5Y12t;c%_*tR6@3c2Tp!S$Y%Ho`a1)wtfTNaW82Qi
zFb9Tx1i{I;EM*|~r3zM`u%-aiNe($n5ADGo7YP~}C>ImZhIO5>qd+CR$YVrT`P%NA
zk%b2$Ea@F&5iJ4yByO)3S;$?<djnBBw+1`u&fw$Cs)*a&V1JrdXKvU|#N;d%NZ$$b
zuQFcpGRyYhsJFeZBy$GS&ykCXj2A7+v6I90&N%#Um!jodp<F1~@kw_zUoP~!g+W2w
z$(++W>5JJ+T2Szelz9<r45fgO*y+P^Zd9asYS~#_0t@c?u)8=WAA7?x`4}k21qr*y
z0q1bnUO4i)cI}h2mnwWO9C2rc+~pm}4bHsYZFj5Jj?W;|DvrVo5!uNZE7-9;)^#5p
z3XFx{T2-x~obT;#=;>6mlyeFF-WeQ+B5b3Y#YydeUB#v>9bS_|7=#22?mjw>J6mgZ
zCh6qnK05Br{xDhJc=O7I3x8G@c>ltmVLSfdf*88XM$4ZwbWQD{V0&BBnW3A${CfXq
zqnFIc@fvH|=8w$&P3x7>#)%KFU&>zk_{LW+-1#Xp^t?H1Ywl<bVm;5yS+?;Dw>s)?
zUpIsQ&&y}ltF5^mW82)4dVi?#PWBIm>j!UK`}6CV_WFyzQg`uJfc=XX?@qlFeUq7Y
z-rM>2)tODLo$VdLP*boeB>UX4+P>*Nd14Pn%13kdPM>hfOwozO?TH7@+N@wJ7rj@`
z_3=T!EgwgqzcV=G9V}LQhAYc+cK4uCv9dXd$`1JB^8GSya@$=wDIZTe6Ftsk0?~A~
zRw0Hje2YDnT}qXY7>a%DvNPS@{<6J2AbNdhSQip4zOZ1}U{$weK6=tVPNMr!d%A&$
z`Ro&jZzsEiBXrl@0-Bn+PJ9Tt4(RITd>&IIB77XN0I~{CCEV-uKLQXZwTcyCUM+55
z7tq!Or#yuvXBunOCn!Dlz@SL^%^RI_r_<%5weN-d6{jk^0ms@eh!!ZU3bqHwPaj6~
zq;|a9nIGa%$gY#xaE`~k^e*7q#bu5UeD?N{>?<ZGzvh$8?5oQ;;~De=SVy-alt`49
z-jIXmSDb?gFnrH?7Vv<-w~>@{kk@Wjd(^U_wZNrwPWQ888_v`=kUGhvSIP*54MRER
z(3PGZJ84Z|Y~e3493xxRqm^?fY7TT8s2PBXL?ahxRY&lVf9S)?RNUU0bQX`$CSK3J
zcQOxNDr-|tZrkhMai@FKRRDuFZou4zolL<E!YJv8y>dN?g(>GUIw<vD9=`ahWyhxN
z;u_zxVoO`OIA~Ay+Ub)j8UqS4#3|=DPYF4gcDj21u^dw?Q0UZ9b1S>RUyu`HC(E2s
z6{Y2+my{-LP49Gu+B(bmk{yeFbUahLjtXX|H53X2SW}pzW@b}M@SRX+OS?`5Xk^4b
z3Bx$<OwQRJV>setG=OzZ1w6Y2Ooi(J7b#3mxFAYkt_LMxC%WzAE(#r0Ak@YStzqkc
zkar>PE`?dNX2Wy)c_nKS*|%d^w0ODj&|BJY!s{}#hQ~RdsClEN^xQ(vg**xgrl>yh
zeGK^Glp|pfXB;gdzGa}wIqZQ=7KmiUuRKtGMt3Rqu`sxC3rXi7VaE@VG}$3DjMXWZ
z<|@;(`lVRI8m*&Wrgm<<^s1$}5Wd1^#5YYJa-hobsNJ_y&h@i#0E9VX;#=1$Cv+1-
zF@4TN5kJgZf8cERoFMa;hn**4gYh<isS+MSw~80Evd(@!1BqstvsT&SsRiupuAIo|
zMRDBL2c2RXr65>HY=}<EZ3WJ3!9<UEIy3vM?@Xobq@ka|KfA*Y<=mh%v1LC@v&|_Y
z9{{<RJ#bRisyl;e$)MTKp7kw=5g&-Sg)OuKKA9ltsKk7wtFCL$?7Icuff20F4%&T7
zj0xZj$)Vt|-a~JHQ%Ey|hKa~WUdM;+trb4neUF^<ul1LYC!IdS7(D2E<VH^Hw0H{c
z#HJICRVH_x1W1eE%{b#z*eZJv!H%f!3XPq<74cP8rU~EF9;JTx&aEG_jJX?3qgwG#
z0jE|e!uiB37zR7`gyTD@L#K~3in-#v#w(n`dNGT@qA3_`5eCP7TJLwZlOmVLc&vx2
zFa>KykRYeMU2u9&_~<NJS&<8ae>o~=?0q7!RRLewZ0hcwxU2Kx(WX<_4}p*cj37aq
z%2Z56m=HvwfLN5Lh$f5rRz5EADjdlQYD&=AXi8@h;d~-)c~wR*lyH%Kyy&HI{KugX
zO2JnU#t5+}FbA-4zVwv~34hW~9}cowT06Fh4tfqI_qS1kx;EHu4z_imF)g1{j5g@#
zY;HqZYL$Xurv?zgA~IGSKT}-Bf$>ViO-*g>fk0I`Dzf#+7!KkVXKodDS|Z!n7C6EG
zsLU<@?ngK_V<kI3TZwK|QkWKTm*k{sgU!LAjQ?@sNMi&UaQet+j@i63|9}8mjMMqK
z(8JD+{%YTETxj~G)Y44-8<?jjb}|PQ;U{^Yi=M6ac%)41!k5Qko>n5GPXQz3Pxd{B
zDZ!Mo>C!`IDXt5GFAa|oZhL6J$t}u))+zKDD-4IO=xyUi7Xw}<>MX1}i#fPUuss`g
z3vrwk)QqCkr@gP77=i$+41y-)fRu>Jriz=miQ#DFF@hz6V#Lex-BVT4g7O{QCZz08
z)~Zp!j1~r%&A=(%@+P>~I81T~d~Zw@YS*aqF1RtEAga#=z1>-}=g|^zm@}|#$B4L4
zEY=hejczVNH&AdVQm6}z0Us53(LR0nkxwfSRUIOZHC*ZQ*FglUhe>B+@KDZ=;O3BB
zkuBq;fSe*P8miLsbHm>JAn#BPM4jTkV&8D@j2~OOusz7rg1vpLC=k?lf4BbdyB8XR
zjW=?2H*!P?Pj0+XSJ(B$u3s||#)G%3aYC>)*xugU-kII`wF{rEU;FOGo87^S-NA;#
zU}M8QKdJW_CVk?)b4k5WOLM5Xtx1!56CYnY4R?OptbZ=t357aCp{C}uaOV%Nci(SZ
zyMO8a@cRu98$P(1ZTR?7eei1CNZrN1x)7}!8U4-R1Igtznr&-yd2LOtZNZL~^SQiR
zU0;kgJaeqJrk2*uP-ka$_|rE(Q~!GI)6t7&%tzloJ33>6&F!rn*~O9T|Kx=W^=}^r
zFE@k%=f<yIxbx0)PSQp=2uoK3w?Djoc<;rAo9hkVxg1nnz4n#?`W&u;Cf>`2Uw*Nr
z;qc~_hH#+1{%7m>SNFoFAkAm%vp@TH4gcxJ+TxYW^8K5;_Zx3!8}DEJ{)+(<=QSSG
zH6Ac*v+<3wFLiyzEWh#Y*&$jwLam*hgo&x8zrJ`o`#=6y{mX&Fp{skJx&7&9LGfv!
zWIO!uYQz0(!#$F)*~Tvfe)v@j_sNd!hzdZ427BNnO&k1ec-GA?yFKIH&}wBY?Cg#I
zZJ0<J7iYOLxqxOsEh0`lvd7Y5?RyFI^6COI%q=`1f~(BU2b|p_9YioL6DB3zBO2rx
zk1TzL(BiS64D86X*=n|e6f7Kx@z|UaK^f=~ZkJ?B9zayrlykk#PEp*J-6MvPyFwfg
z6E@&j@=Y4`lob`XGR>E|rS6E@atn{jmdR@|f>_dt;x?cy`oxOzFFq&Ro1~+FYz}Ux
zd!58c?LyUKDXfweNpA((aNn4O#Re*3JGT(^zp{rXiFL#qNIrBPSV?cY+nGG^hEw+P
zynUP(2#94!?^KTkZ5FfkNlF~8d_wnDOoBG(RC!de({Zq(NMc(t*IN(8aWqI4>TZv*
zJ|R^ITNr#x@dsa95?_XzoY^?joP}+BAXVL15ZF72T_A0$-V3<(9o6yua!E2uAX5T{
zWQx<<#T|PT_2r{b7S(C8G+r*JHVHO9R`;+7$8`-SOk7Sjxj*0(P>o$mJLm{;B);98
zl6i>&?<-Reu<N4eV~6F!ggz2uHI#6|iSkk0o|y+<sN0W@7rfF`IX_cLrO77Z7-LAP
ztPRR6pR8n*4L=i-6GnS!lX#w4^uopstpPru$pf}67oM=5L^43Pdt&!=3q(ZqsQcq_
zUY!KKL=iutHE440h8flB0)o(J`-mJxSSY8yubBoWs|O@AUwN3QVoNZPTw{s?YiD-A
zgKoyGz|-<T7zafM&nOC7D<cw#It%;Aj6IXL>@;zRNQtbivMny9d?W0Xp5}Dq1?`=A
zIbC|%?K<+o$(P`ZDz&RD$2w2(kvCNgVEjoB!!yE5hEIf!V_^&KVMe)#5D=0jo|+1t
z6c%(80Xdig3O%MM1#>#FoCZzeSDXrsK*0e=BR+#7TSqYhS?@5`6nv|C*2+s`;eaTx
z!x%oO9Utj<D;4^OOW(Q}`o=Gh?)1HG*@H=>2a}D_zh}q#g-|mYa?^wd;*%7T=tm8C
zYuU=@s<~-`AeO*)lFUY*c<C^Vu7EJ%(}FkR1yqmSH;?8MBqtW^I6AKjFVuoo_R63$
z*Tn;ObH1GK)xd>?p0gj-puUputl5cFfr&1&Y5TZ~5Kj=L)DN$#Ru??PVxmiW`kb+o
zD)vkgX9f_g(@-<lZx4!IWJYB=>CS}}7-6N}egqJ<s7!Se?`bWHtq^_umdey%xsWZF
zqR>Hh+dd-jG#12oa8(Mb&XZ{|(mW@pq_jk@D+whHPehBXnR1lfy=(UrBtkybI?v)m
zs8j0k(kt#PD3x@ZfRLebWV~#X&o=xNcK1l0rvNj+W-7PN_`W@4#f3PcN6@LhSV-iS
zo|fGx*^TUo{wE{VKLc~h@&g#b1*riLP<O{5FChi0-adK2>HLomGnECY1gYNSOuoQP
zr7Mv^=gG2HO32fhWeamzr3!-rB9<mc<8JIpO2FuJ2qo+|NGwghYfwy^sdBmfPn=nT
zefh?jPciIS9Fe~-mzKzxR(b&Ev4lgjy_MOp)N^uaO9E;nuC(MtwguV4$M(aFz-E}Y
z5-m!u?mP<%5qrSZJUC%XmDWJ@6cm%B*qBdAiYA_7E6GWOM~DX$(aImfQ-!33S!Ua_
zIgdB&quolX;H9^{gF_|&q+P-^%sizzBtLl&aX0$C?LM*>QXT1Aa*uGbCHJd)fEKKA
z0M;<JDM<p7XYLXhnbQdt(Ff5zp#+5qF?4B3H*+dqkR~#c$02KBc<HHy1=+YkGs|#>
zDD}<B%4kH+RUD=`=M~p5+)i&oG-~=``FK(GK7HhCJ?j|tL<S`>Vh@ptKjmDipm8B5
zc`kBG<8i6h4t3uX529~T#YOcK9EhpV$Ev0D<n|D`1>bGLaOwc{01*O-HwusBqh&n8
zr>u}TZ^rDqg>F-O%ecucIVprPDFqHvHFM&4Y^^~nRII4)&3W%&hE!KTliX-Vl$H8K
z?)tVfA0Z>Cj7q8m_v|rVgcBbYV{77JAlKbsOQ1k^R|DGWqe@CLpD1lMclwV3M}j4`
zfh~Iq-oY}?fu2ApmSl@wQ8PWpOX|s!E9eHP&iw307W&$3?(`1g<f8Eve~k5_v#e)8
z<y69~kU)}@d2&W+t>KZ9c6Mf*B}v_>k4n6X4=3Qj#*tA0t=KKXG~e&-ufr4SxVniR
zq{^MK_K{v_%r?3)3P5;P{YqtR_=AtEglY9oFc<62;&3^)3a3{hX=h+b^vmh~Je9Ia
z?*J?MJ`9;Y{H9)M5~#vk8SfE0GiYs6`xAj)NWdeQD1DD>&wG)L-mrHv;Vq>&3nGqv
z>ZdpkET99Q8z<P=U4R$V2C|PJJVTOcSQJx<dj4a+scw4)W^&tG@Ad{PQKQ7~WE*hD
zA-R{GbJq$W#XH<<4N?(z`teQ1fxS5>ALUp^<x*upkF$6{unSI1cmpZ~$0yxYG&6#o
zDm@8AxH37$0KQS?C?73YDpGvQ!iZE=R*(|AXO7A%)pD|QJbhNRqE0XxZZGN8Jo#a&
z#v+U2zZjc9#yFEiY}gNS)6JktgmomDT*!Y0UjzNSgw(LSk{^8IiB-|WRM8Ft<cg++
zE2bZi9E?}S`VlZp5CCZD+s$YBrzJI6KbfODgVG%S2dfL@V++{)%A*pT%r~;`(IF_a
zD0?@x0H-Lym7&UN8U}EqTTVt2CSH2kUERP|tNY3<rbwZ@L}h-0Mv2NC?En;GdZk2t
zGRH@$69@!mV-*QW$%=E*rTD61%I@7RTSsdo1_eV4dm4^9*&RE!>I~=5i4I*K$t52r
z8lF}8+(cPy=HOh5b48^gA_mN)M%4k6QyLEq2Ez=(xB+o&W{(ACtoq6W+hVI;NfoI+
zJEIU7F@-KeoCr}X<$pO>CW#1;cJ`5Y3WZcX;24tmJ#)%2R7?r`guP76C+nxTq2WDe
zPD-iw>=WYCeT;hIo?YCk%skHiaQo{1wby>;IT60eQTWM*OrRlr<wE1{*In@%f1ez}
zziNEF`wzSRjhQug{Vaz-tvJ}!+1#3n{M?n>+2RZJzi{zx`h~84&A>bF{rA?Ifuu58
zXyyuqQjsYN(r;dU>8~%5H~GS~hQ&+u*S=hL?aOTQ`D?HDHh29FvtN5}r19C=sr&WU
z?_~e;VZGHb@nJoEU>{%m!QI^t8gBM9+<RfY;okM{Ub#@8sJoV_Pq127{k^d_qMtOd
ztbe1r8jWSGZ7n2lnwvUOJBRhJS#6<jdi_}Ch%-6r?SrO63_>|S+!pGfo60MuU2JA@
z_#RDCs5Iv=?(HAdro-VwckDpLy*5g2ZF>u8CyJkqHxZ%20MFe9_dv43?4}#oJ27fU
zEUG)5=5GCz3cekKaZD(^l#iAG2%8v^9wBB(iaDS9Q5z9Gz=twC5a^JM{$m5Vb-;gY
zN}PjAXR=dRLNOxr_=zfJl75)lh8-0oakqB55Vl(MIJn2%F{am|2CL(4ZkuhMk08cd
zS}jE@I;nMP%gd5~HJfwvT6{K#OwK`bE1-+H0^Vo@w+h&JPXM4h>PI`WYUi`gR357<
zz?)i@k7U~3$n*%3anJXU5OO+7Dg<%cl6-({6pSg2v!Y**8{2@$_SA-UozT!CiolS;
z9M69$73kcwy1Ft~bi3(|F*MdLfF?qqnsu!M&Ca6HHQyp>`(bkM{bx>zX@bGL<5jG4
zV04RAS+L?JAk!22!5Il_gAvg`hHQMV*<zim;ewPVX}gnN4NHoSN?0!(Q$#p>(qD(C
z@?4Is3dX4{Yy}M6edJfkjTmWcp?XSI99;5-HtZZdAsJ^M?=$iXU$wsS%U`X2q!&_{
zcxy;vR*Wg{yEAT@GFil-PbxF!6v$_fFxR6HF`HNKt(Hr~!HsVc{slr!P2{K|%`L(<
z5n5XGzji-yb;?JUFp~JP(sd-Y8r(j&ur7XH68?MO?i4W8dQ{dCxBi5B>KN~dp9G5#
z{M1pX#g|wanl2;+jH_@lMS_GxmxQK6(xgIHibbAIAn1F9YG)goP>vLBg0Mshw@C%v
zR}3J=C>&n-Mu`g5R46q$h6P0?WKjkiZcy9IFt1cZcA+dp#PkyHkv-bNOTp55>k?Z!
z6OuU-$ui=jqEstHq=@phy0BJos>)M~LCV3{C@r76Ckf%No-n50OP|;)J<!jpvPo;O
zNwO+Zo}DX!31@r6S(x=nY=YE|HW}bVC8y>bthtZ!G)S^u047H^Ylnu~+l0`<8YyXZ
z;6P5#h*Zx_onG{TsVzH}sb>2!WH5_w!b;3hcwshoId#Tt29l;`1tnXMUe2MMotb<J
zfTrMCl2ovX2bsy++c|bt0t(WB&RWcR2bppysW=7ZGbEZSU=@87fuS#ok|@kM*&|qW
z6EpVUx^y~qXF*l19E6%$1y^>FX4wO!G=!E+BYI}APgkJ@FK)qa2)_0<wYAa{1CY4o
zIzqtSN$M(sdLx9w$Ku;Os*n#85B($*B(icQR(#-|63_4;4DB=3sa*pNf`Hr_?&*e|
z5dA634pOP#7irlk1gdnvff1cy2HC4A?_rFgVrNjnP;YQk-sA?{^p?(7K5!0+^CdB*
z`k*P)ViZ72^IHrLbu_CU_}x_dKV168#n3<h61loJIy5m=%iiEdTRGiA$?M%!N%Gda
z>=K;|(zsy!uT=Q*wO1_Dx*@=zOQ`0OWYYcx({!QpfxMR2RIhkTtKL4n6Vj&vmL=OV
zu@%sS8cNCClIB#r!~X|!KnL5R3TVuzT2V4UBsbK15pFh(RJ8+;hUeQ@$%(@(yL%*l
ztK6vGML?1zeUGFjE{(~?iLmoqVttFhDjl!7H{=!WGrinA)3bZUCkQH!T3h{w0uB_2
ze<k{p#<i@ie)(;P>X1&{Z^YZXd87V=Y~zQo{N=^lUo}2rZB0%tC2%XDT9f<QVfaSl
z{cPiRZZ%%H6TN4~FI+zJ0$W?#JK9^@T2n!q9J62eueUzDyiZ9EpR%E|{`Q}l@#h*U
zn}Y4Z&i2-thRXV@YxiFK&f)zFjW-V(Z~pLG%)OASum482_;(k6DRAZMW{u>FsnEAC
zzMOsY2N%9~k^0=t@87j(!>p48pr6Aexfzqll7!6m5!DquDR+~CM8Dg2NWeig!;Ngh
z+a6-M*2R0VA)_?1l)Pe55(*MJM!fmFvp8B=e&Ft}^Nm;-lTRY-B=rvMMEP%zQy#PU
z^R}gthxn|V8^fZ}M^9;5Hqo6?nGB~V-l5?pzs6#kF?Qyb$_3G;UTKn~og}XnGzd$S
zL%ArI^Rr|Ic7k=z#Ik5C1<>hVw|oc}qylblciWjr)r#pyY3Mp_$|Ce0bPoE3Q34e#
z(jYD3GrFrd&Kb-JxsM)@M#e_YG*1U*R&fiIm_^ZwWH}8sWCe?6R;~l+aY4$%SS2=y
z(6S2%Vq?R&lB2`QQ&FN??<Dt?7=py~iCZLu1s)jNkLeQRVsPqKNLJ|ioy4+BtEx$!
zYYq`)8%Hl;(QJemu!~<NG{-Q8(VT=RIkk_Xc4lWx<thyH?(rjcVT(=0dgOAnB|CG`
z(;jbW3?!CZ>@mIWy>c~Vv4@fovAP^WmD0*%tffS*k1fP<MLgVrUN=U%sE8Wy;*U}1
zFqR}c*Qr2_81Pm}-HW^0LJSjX6Lq&b@5L9S9XQtunssvJAzvQJR`hI@#weWOApioc
zyRwRLq1jwAYE%QW7WK1kf(*@rs`sD`-Y}+cVrBBsNgpU4SwOT#jO@e)wG56#4qpfG
z2%M4$A{pkxiL7Xep*k9<45<QD$Zcz82G9j)x*|+USGtg^@YU=f{YUu$Xj(g1j&!uZ
z6lsb#kP-$l<_s67%L?p@*Wt<X(WGQIsT<5%8(wik0tLyeNXjJ8h8hLK3aEJ9#SPNB
zE}dqlTzv+8E%0W|X0p1~ZP>uq`lsiCDUFhb@IbSJ`j>n(#D<QDfe?|LVQEN-h(uPm
z@}C<1;AsFFi&CKfBwa$P=|)ltNH=wrEzF8Ox(s-*FnHEO5ldqoEWuztVG72|E|^Pq
zD5}SIwnT`;`#y=VU3F!o{)}p7W!Zi4vAdo@3?;W$nzEJ2nVThYqo#_pN2W+Dk>)AA
zu1DU!wLpeOneS=dG^l3|Izc%N2&BhoEmAI)?BQZSVPr!NBzG-NPJG(QK;@|17TZ+`
zLe81llF3X+X;;QrpaTuweL$jGTxnC9lfw+eoc<@BbXp3h48`fxqUj5L#<NyYEcq4D
zxu=+K7Bt1X$z(bsE4(tQ385B7d?FcQ!`Z;g`SqBml5W+A`YC%-yK)N)wCbYWq==b=
z839g$-|A8HDhG`T^dZQjQdDDM5>Ax@9f1p8-;-07B#93(`=@+SOFhdMI>Xx{5h~fH
zDgt|o9zq$@BS!EXm&lL0yL>6SOITQ*F0a*oBUm&&xQZ|EY@TjDCDejrRH79hqJAh%
zwm!d@h0%#1OC28uKDDPdVTl478unKmTYw?J36+WJO_XQglGZ?-MRE{M63cAW%z2hR
zh}ju^q*FRYgfZ-GgyH5hnmnVlC6!KXBfTOSPjiunVja;mnZYVed}?6-YROSmflmvA
zQH0`*oaRO(IE>3}DouS#x9^JSnH?DBd-GAsE>3@pHC2xN#RrI5R*)BwfGI0(cRxXX
zRK{_80FVTNR715+qPkMz6gmee4dT)kmgLHzDI&ra;6aj9OJTlIyR6@cIAEaZ);6^a
z60=+h^CkUE9IUwj+nqC+WdE_Yz;G%fLIU6sD>oP+*u)zX%gap`Nec{15<fwgnApZn
zH1~M{tDzqr(Q#6m3ww{mPciO*=s&M#PmF(dm(Wkj%cbGwAZvj=1c$YiRDuG085)R;
z9H~M!*J=E3)Ei<9OEQb`aEZ)9y-QCP!eq=oeKqtZ=g~FO_KJ1xnUGU1=w#I5=!E7|
zgB0=A+uw%Q_y=g`jXAeJ?3oj{Fi95)3!&ZE86hYi=MWS)!Yj=}5x%6CSPy3xEHU9#
z(S&k8!cP%5g+;Nn;0=oYC?qSLaFW0!M1mFF@v}Qbr{0iRr;ta#S<=PbM=x?7Ex`7^
zz?qLyT2O^gM-1(lc%teQDe!clzzhIY1z(@S9^566Vo2N=n;OLl)=>cE+`bqcZuqTC
zTh&{MQrE<zKQ+||NVyQE^@vt2fs3rb=|{j)7m4~Dah$NbHYpY8Wb;X5IGkoJ{6nud
zV`n9W2REUdB-k)svN2iKiLedsD%YxxVjRXx=K!k;h8{@`foeA49I6mz768Lwn$MQ%
zswvh|!*}}nl#WU!rpSHREEjAZR9DvG4DEs@2Q@MffK@2000QQfRl%XTU}@f%bgUkl
zQY%WpeH?KY#l;skmwpJu&b&G8ZSkE*?b0$+=(<M(_)?`rDHbh(twzaKRj2CBN_5W2
zBAxyPNL3%zBcQj$do#LGjFna+jo&Q3S5aJ5@qWpME0Rw;=KUMU+^Vtg_jktCbp$k6
zCLqjfT8ChEkP2}$w!wMxxP5u*g5iN-{QNJ^d2L-)D=h}c#E=T9N;OE+_(#rR;fEU&
zq!v}osk!Z}(V+~*t0rnQY#G31R~L?0!I_w1v|FIFT#O~mg_gHhOemw+EujhI8W|ji
z?GpNMLr0Aj7kG<BiMlM(H0*i)u{cI?E;mt4<JOli@nSGn2H_2@Kt6X0gPox*mTs^0
zgKHeM(*&U305H2@(K7B$$+c9FTqUeX7`N_s#_=$SJ`9`XtMQlK)>V`~g*>QTMLvrl
zJ+Qf;$@unMdXlCiOr^QnD*^Pq0+*A-EhC^cyru?POi~)%(i=FufBn6evhVHO{EPod
z*5lRBUA+A*atWV!@9(cw^A|xfC~cIo+EYs-*YE88Q8fNS{TpkUm+tL;SK5HT`_d0z
zyj1@}ciju!RKouB1u5VCOS8tg^4;kgr{%jZT|8*G)%o+cfBc#BNA*`9&hC@e|M}Yc
zw{9N3ACP|U@U8j}FW0|ICwBe2?D&)QcUNzA{kB==FF#)c^=Tc@l-_Pz{QdzYl)f1x
z2`vt-!OA^RAl<Y_%~eL0K_a;Ucb#6Jq!U>dUwMEGCdPP{%iNHMl8wQn*4aX;mn1)W
zko7H~@h)|)Y?kZNG=;33`m3?V8~>~-iRU&p&6u=al6ynjYNwQ3MK>20U<5n+h}K8F
zn=o<r`m0tz*TG^CbJF)Aym0O^Td$nKB%3(~Y$B}9CI945CA<LZVI^QX5yi11Rl#Bd
zOqSev7^crbJ`$I#aFI?r`^5&tON366JSpTYhLXGRTZ!o4VNpfc7PozJxmrw$n3Xmw
zkDky3P?@LQZkd=}s`DvRktZ2>xF^`tWF%V76uc#jBUMWtg^PT~l0pSi<Zw2av#K-`
z8$6Mb%jk2<MoU0SF0X?O0Tz!czb;j}u3j0nJnu#qG>K16g!7SG(QA1KzYG}KW>Ykc
zlu@hXJ!25EV8G(m)!nW8^W3Mqe#@|2ZDO$1XSvg6{kFp|kg&@(e(>`5?@kO|ZEU=>
zS9fU-o_%!b?#P#;KfwZb-dj0qly7Qn?FhAWwzY)X^+I&$o6a0Po3wUkxa0(nIutAB
zhRDEs#dRVm$~)*iKKfmJ83~gF<9OoIsAMQ(eucm%?${C3)WQwtGzV=~K9<_)VBG8L
zk{=vcc_i#HYsr>+8&w)M*2lOtOnZYc$M8e?21NN+;rmBIX4*8wv(@{vwC|bnuD_0f
zo4o^=7JJT;c2sf6=@^qsd$^kt3i4bWP;>cPyo{2$8%|S=@t1i@cnMmxlR^$MnV?5b
z9DKRhDo@=lX$ls${SnBKEpmBFk**j}APD{gQm3HM$?f0$C3is6bLU2e32>#mN}3Ue
z=)}Q+DyWzd?0O^pcC6R!0_ZM)fKJd%Eu6mRkODM_wF>)iru5qlJ+YINR=fW{^oJ>^
z!zz&tex02jO1vy5b{0jFU-vR`(*oH?s*-GqyjPDa!}ZS^R+6PGe=X_@0d8U;@|4}}
z^bztZ`7FRO%`JL^TkLFNnPzm~S{ufYmh2wsO@EgEa6hPZvXl~1Z0qR-Oe~{lw2~F$
z)*`N>-?#lVcs_J0!iHglUyyftU_qMZ7QsH>NQMxv)MyedAg|&?sxLx;&_3EDHz<6K
zbD1BKLP<Df$1M3uv(b9-l0kyY8h6pxl5+NSbfxAt;7$qpS0-@mIpEpxxrI<1)hW8G
z{??5kleWwl^;k37eIh>(aZdi^RNB#LJUY<BfmL*PQ_Jvjijxz`kj8entHI`#v1yoJ
z^dK@$QUV1co*gM(K~1%3LKEXvF@ZPvL4-~H7H2ac+3wT(5GptNQw#Ds?k5ygj1@F?
zeukr-Az4g1)YTl2ss|#C;)U(EdMOtZEn@rRcUojQ%mcP0yubm0OO82?V@{K#1j>+H
z4yyu#>uk)KLX43Xssoqi&{agF5Hd(MUov+RKwxmr_c*+*L}I26^>Jb7pkIPNyKgV`
z{^uKi%wl!@Sr=k1x&4BZr;@lz^PrIdn2X<M;c_DXaa2k9V}H#@g$`F(Z?K5T&L>&I
zFSW;{wy3_OY~0sJD=WHMzri}7*(B6=3H{@nNEW|gL?u9e$mV3!>@>>n5`a-8S#7^C
z4Ht3?w7`ot^fH8#BZ4Cu_+Tr}^`mI|tO}B=gxkj!ZLTy^2<>}I<Yf>-33A}~{f7`$
zmNxEDRV+D?)JjaA0vM@6sMCTBph9FFQ#4Q{s*uy*Ael<aolA26CqNsvv>InuiGk$&
zPKsZJ+J#4WWeusun1VbF5VSH<n37eEHBg|~RvT}nDl;T`Nc9ob1?M9tfGsqZG|1sc
zb$Z!F$`hLpW#c}6VMZ=+Id`;S(^w_9a2#Mg|IR_*jNH-q;KDC_-CWV|Wu!Ez+S7NI
z5_0iM)_EA_+A6wLx)5WOs>LZ~BVZ5!_;ssUL}tc`k*zbAz5C=BLO4GTl_a6karC^-
zLi*gi<iYeRyL9kIH@shrNJ`i!=~|-&9z?GhED0WH&gZJ?{(>M115snE)QDt%kbL>d
z7pOzZMa0h<SLpneO1*t~b`kMUt`jS~{QVaj_+glzUvId130MB+EA{VZ8#7na*KWVd
z5>H>?>}=}jY;E&z0^WS(z548X-+tvUZupx9pR=jXINOxlk3+9q9%}qW%Al`jU%2<`
zi;cf=h4=qKJ_E>?`TG*@|CGN?-hYn&zsBFM^Eb=iE&i_ZC%;q|;qPzw<CbcDC;xxU
z-wJ<n1@blhtA8u|$8Wtl@YCxJTvA<c_~2C#^x;eWm+y4FZ;rQUoy8|D9j#66?Jcd@
V%<#)!yE5^emnb`DUj4zh{x7cl*46+3

diff --git a/func/collocation/collocation.py b/func/collocation/collocation.py
index c003be7..185024a 100644
--- a/func/collocation/collocation.py
+++ b/func/collocation/collocation.py
@@ -7,11 +7,25 @@ from nltk.metrics import TrigramAssocMeasures
 from nltk.collocations import BigramCollocationFinder
 from nltk.metrics import BigramAssocMeasures
 from db.db_config import get_db
-
+from googletrans import Translator
+import asyncio
 
 def run_collocation_on_text(page):
-    datasetid = page.split('><p>')[0].replace('<div id=', '').replace('"', '').strip()
-    print('dataset id in run_collocation_on_text: ',datasetid)
+    translator = Translator()
+    datasetid = page.split('__')[0]
+    inputstring = page.split('__')[1]
+    #detectlanguage = asyncio.run(translator.detect(inputstring))
+    #print('detected language: ',detectlanguage)
+    inputstringCh = asyncio.run(translator.translate(inputstring, src='en', dest='zh-cn')).text.strip()
+    #inputstringCh = translatech(inputstring).text.strip()
+    print('inputstring original: ',inputstring)
+    print('inputstringCh translated: ',inputstringCh)
+    print('-----------------------------')
+
+    #datasetid = page.split('><p>')[0].replace('<div id=', '').replace('"', '').strip()
+    #print('dataset id in run_collocation_on_text: ',datasetid)
+
+
     collocations = []
 
     nlp = spacy.load('zh_core_web_sm')
@@ -61,8 +75,11 @@ def run_collocation_on_text(page):
     #allscores = scoredbigrams+scoretrigrams
     for item in scoredbigrams:
         itemstr = " ".join(i for i in item[0])
-        if '部' in itemstr:
-            itemstrnew = itemstr.replace('部','').strip().replace(' ','')
+        #if '部' in itemstr:
+        if inputstringCh in itemstr:
+            #itemstrnew = itemstr.replace('部','').strip().replace(' ','')
+            itemstrnew = itemstr.replace(inputstringCh, '').strip().replace(' ', '')
+            #print('itemstrnew: ',itemstrnew)
             #translation = translate(itemstr.replace('部','').strip()).text.lower()
             #print(translation)
             #print('--------------')
diff --git a/func/concordance/concordance.py b/func/concordance/concordance.py
index 71e7c12..166cfb5 100644
--- a/func/concordance/concordance.py
+++ b/func/concordance/concordance.py
@@ -5,10 +5,12 @@ from spacy.matcher import PhraseMatcher
 from db.db_config import get_db
 from nltk.collocations import BigramCollocationFinder
 from nltk.metrics import BigramAssocMeasures
+from googletrans import Translator
+import asyncio
 
 
 
-def collocations(datasetid):
+def collocations(datasetid,word):
     collocations = []
 
     nlp = spacy.load('zh_core_web_sm')
@@ -49,7 +51,8 @@ def collocations(datasetid):
     # allscores = scoredbigrams+scoretrigrams
     for item in scoredbigrams:
         itemstr = " ".join(i for i in item[0])
-        if '部' in itemstr:
+        #if '部' in itemstr:
+        if word in itemstr:
             itemstrnew = itemstr
             #translation = translate(itemstr).text.lower()
             # print(translation)
@@ -66,8 +69,14 @@ def collocations(datasetid):
 
 
 def run_concordance_on_text(page):
-    datasetid = page.replace("<p>Collocations for the word '部' (department) for ",'').replace('</p>','').strip()
+    translator = Translator()
+    datasetid = page.replace('<p>Collocations for','').replace('for dataset','').replace('</p>','').split()[1].strip()
+    word = page.replace('<p>Collocations for','').replace('for dataset','').split()[0].strip()
+
+    wordch = asyncio.run(translator.translate(word, src='en', dest='zh-cn')).text.strip()
+
     print('datasetid inside run_concordance_on_text: ',datasetid)
+    print('word inside run_concordance_on_text: ', wordch)
     #page = page+'部'
     nlp = spacy.load('zh_core_web_sm')
     conn, cursor = get_db()
@@ -81,7 +90,7 @@ def run_concordance_on_text(page):
         data.append([docid, content])
 
     concordances = []
-    terms = collocations(datasetid)
+    terms = collocations(datasetid,wordch)
 
     #terms = [page]
     for i in range(0, len(data)):
diff --git a/func/translation/translation.py b/func/translation/translation.py
index deeca7a..bf74b33 100644
--- a/func/translation/translation.py
+++ b/func/translation/translation.py
@@ -2,6 +2,7 @@ import html_to_json
 from shared.translate import translate
 
 
+
 # Translate text
 def run_translation_on_text(page):
     #print('page from translation.py: ',page)
-- 
GitLab