From 03303cee97a1c30c0b6fd358f61f81826ff9088b Mon Sep 17 00:00:00 2001
From: Tom Edwards <edwardstj1@cardiff.ac.uk>
Date: Thu, 6 Mar 2025 12:23:04 +0000
Subject: [PATCH] demo branch

---
 db/datasets.db                  | Bin 1445888 -> 1445888 bytes
 func/collocation/collocation.py |   2 ++
 func/concordance/concordance.py |  15 +++++++++------
 func/neroverall/neroverall.py   |   2 +-
 func/usas/usas.py               |   1 +
 5 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/db/datasets.db b/db/datasets.db
index e25a539924a328ecd136ce8770ddc7cebc8b941e..8489e3980dc54d847378b5dff7902c70befe1462 100644
GIT binary patch
delta 30749
zcmeHQS!|=%bsl*njr}`QO~y7HN3k<@GjY~2N$pMDx=4_v&EmR1o46>F0!<O*p-C5@
zDbV64MRAq5NTNhZq!wzUG?GY4qQp%kQ?xJbOWi)SMbV<J&3pg<??c^(0s&kD?RV~>
zC~9W1JouqO8E9zo|L=0{x#ygF?mgc<ajt#Ux%Sm)|Ml-(xzhF9`1fo0_qpkhjC6hP
z@EfZy*mFHsu3Wj<7sF@Q7y91r`>($L=rjA?==<lszwdjk@2~s*vai-x>^th)@7wNM
z>s#uJ9Y1UP-M(*kea`&?s{E<@4nFtXpG56naDM{df8G6YeEx>}WB5$EKZ?(r?vLQ}
z5%=@>{DAwz_;k5%<MZF#AHwG|?&nZ<%>69B|EK#FKA&=b5KnHqKY;H&?*7wTk381X
z@9w{L#s1`rcmMeJzVkcZ{G@$k&~6(W9UOIx4m#rYpL@Lj_-}rxpv~mEkGo%f=Y`nA
zJw1=+t~{Cpt=o@2?f;_tc~v_))~f9owvE|Fhlj@EslV&$FMR2>p7(VhMZ0Z<?i*dp
zPadcK;p?OG&Z|8=5502bp;yq%=O20|@iF&D(9BaWp8Vnc&47xZf9U<X!~Uu`JT$lJ
za&fXzJdky#h)j#(xJZ^|HEFu5w`3$Lr?T?YZ{|xP<TJ`?4gcfcjbf-#@idBI)R4Jp
zBep8@73s?2&qx%7=lHF&sYYp4E@b6oKm>P;N<^N7MSah5u2MgT`kb*e@rE-g&gSH7
z_zmaso6d1-DPk-;jrl$4_E-x?GMq*kQ7lTQOH9q9MJ5$pHi}N;xFY;<Bb}8iE}0C-
zIe(*A(`5Lr@P_1m$`~&hK6j(+6{|!ecV@U(4cDH`#olo0%haA4Gz#&cFa!}yfGDHl
zHQg&93T4cE)^aXgl&L1U3<hOR1W@J;=N$gPn4(86?hpsj<5by9#l>b`6bd3&qhTa-
zayr;32ANfszIr02kLAg%<#wrF(nes{@`Nm>%dBo&yQjjl#iSFR!Jvpz6KDwat)rw_
zkBTTw7npBtMrCU1t+NECQH1<r&TF1}EZ<b4T$amQ_|!zriB|AJWKNB80e?~t&NiwM
zJd(*W^$^|>zOvX{GSZ>O*@AShHR^6F=2R@%p~}%Briwchfe;!t^9LXq-HP#$TwIb{
zXN}?{2n0FSwpWH{WNO2JIJhw9OL8HMMvPk6+E}$V7OZW3ks94hYF0eo6fq@xm_1S6
zMz?0c%_UfLEMHK@YBUY=Ii4p7jHa3OMI-8!?qgyx<f1&s{6$r~5~ZTC9U-Qvi3h{q
zaBkzza#wXxiyMhxqa1Dtl}N|lgrtF|vAQTH#qyHKEJ4tXbVUYcE$3R3&doH+Kuu8C
znDfx!SVi!>cGn1JiR|SiO-{K`S^E4UxF$B!M!8P2PoI<KN+|yNBaim#5Lppi!Kff~
z#PjkVQ-dZTy-))%8RDR%EXmzqUd(zzCjEG590XdLApB*BGncuTHOy%whxYA5aybR%
zkn>d$+-;n>G^1P-dLn1+95aJ48iy(b{>JL)Mf&TIxJEhMC@zV_3KcwnD(^!4n*+|Q
zG^(?BC70%zuV!YK29gYj>UyIzDFc%rlA3C>AsAYtSe8ctsIJbl23``6JX>TB#`mG?
zA~3F@s+wZ92j;emO#-n-852AKO%P&~65b<YEzc!M1Q+sFvWAiua|^Qr(xCxKKjd2k
z>-1Jv(+c*0i!xBM5?<4_WCTiP;0Ub{HJh*nWVt|$h=z=SkE+Gj%uE@k4brU%PXMyr
zC}(IWo4b&3xg5Zs*xI6o!iRD`3)VJ@KC@OkR~g9W0+~kL?U7EWtfcUV`i*McJUKz1
zkT28$@gfwqkJ-ogk?MLKJZ_mZJSWXNaS;{eby0;nlG)WzsLAuhikaFHGgGkXm^juV
zDpajQ0dz7Exflj*o)n<c=#*ImrENVi^Xn)h<K^=wP`gG297#6HF))r;bLJG$EVJxK
zi?KYcLY|qnMqO8;Ojegnj7;Q-DQY<)Eu5^Q4<os1`KFD<sx^Cxp2$uGciLOQ$|Q?4
zZmNQnC^?A^SZy>=WVjEYM#gR#M$6h;kf+L+MX-W|WnQPAQD|rYhLIb^bi?df@f}f)
z<BgeF7oj<@6l)VC8hVTtAaIu`@E4{3uIW2A5*hRY)*7)L`^b>Z*pDf?wuxk=0**UF
zU|zW{JZ_S0U2B{znVAgnFFlF6R2;OOVZAAf=>?J8kw?cO6gQGCSSKun=QgS+f+-Qm
z;<+_f7YB<X;}`xV5uFnCxF~L0>k#QlYAq0g@u!6$ylT}J#X(e-=H+5pdeY)3p~I4x
zDeBTpkDE@n$gWbI%nB4=GjncZEk+g>`s_J2($m79faQQWAUTOl;iot_fwr2}DhvT0
zVy0l)WHLn5m5yP0h*jWaatCJ8toz8aoVjGQ4AX$}pSoleT``t}riEjhp)4)Q0p>9m
z!(^MD8stFJ;`Sf(e*LQbGcV5`zWuar?bfW_17<QxTf6&gHZdIOpKet4!C#}|2O;7N
z#^y+D#mvGqS&mX(_-UPY*c=Yj6rLTDz)}<<gno?bG^{oXnfbIC9B&!cp&>{NGlM2f
zha==U1)I^32|g_PEiVm_#MKkf<e672LTp?{xMIw>$!wpk8I{$hj?TKUXpx~+%!M$>
z$yw$ML{Y>tvbbisW@#vaT}YsnIK@LVJt22e7y%?x=+Q8*;26QVa$PJFOf^$?zm3u!
zXrzevBe%%KjBo<<LZF1VIr}Yxj@66&t*qD56Ik%cgCHW+#Ie1BS%<^NR)?6KJH!kS
zL#KD{Cxq!}Es0>d23Nq^o`)j=j|ToH22Y)z8w`<1pcUacH1$l1Sv5R7VuP)Pet{CG
zgUrT6xI&Y>z5-2gjNAqpq{vb7M}%4xz_SZWmWz-=xqKKuJ%{2Xsxt18@IvEQpOgVo
zj*^Bck@G&hLphzMWiyKRXw|I4DOhFSl1s3^&}58qGKm@0S=P~(#S;-fIbo5UT8Npu
zXs@XcZOgG4$Vz9^+kkBsE`3Xs3(BvaqY6c*M$49(5$;>e!mWz2w@d?%PBe;l;j$Tt
z1z1ch^{{oc7>rLC6Q^(kq4sU&-taEsPsSGT#|4erCP@f9+G?m#p4PE+;zeue66G|l
zl8Z~Cc!1~Z5~2dHH*gt5bq3GnViuk<`wq+n&<1w4`RU7GO5554_iRcg6T~TH3P7ZW
zh3hM*2Y;_o@|g8yGz@ED#Y^ykWo}dEj<Bm>=2M?d{<}mqCl_k8;ZQ3)k-3@%TQN1-
z>8-BrZ)jJ0ZCA9L4?o*$!?x<V-ltQ4=Ki4Co&80BYg>iA-Kc%UJ~kZx{jTfpFZ|W5
zYo)GdHc|4KpQz;Z$#1)#R$I0Ci>=y4o3?*?y#F%}TXUy&>rJN<f6xd_9!v(@Z_=WP
zQ&`64J5ktzeKr#LMlE5bN?4I(a1NV=_<Q@cCw1QN(W3F7Cho6G>rXN!3M(W;WoS~J
zTO%2RHu}oM>e!LtE?^@AU4@Gy{JF8w+juUbez|sbiAp^evr=W@-xVd=-e~0F!oXxw
zu(BkSrBe_t!&{&iD=1uZ?$cU4HD(L!h?1PuHmPuC#R!vUOM5@~kL6Wy<_1HkXXS;X
z8#F?biCsc=i<qG&GDhqWJ{9&5+&7vNiFFK0RLFFZicrU1Udf^Q!s{W^=kcRm7z213
zA`GEQL)R?Ni3~brlKg#M-Jmy?F9bH4fwEc1!i2XIY8>fz<zdS7-NiCTKJMlOwk{g!
z?`48{-pFP}vRudrB#fiG_(Oe%vTsr4#+35liaa_47wtBitx;Ws?KAc!<$TFFq%Anp
zWlkI#>&G&pM~$-Dov3=3rY08~9Hxf3vc|PRLbLtTBfI_12pQIUIxcQlzKBuY9_Dw<
z?&o;k8dccOvN4x50%W;GSXcbOx0OkHlq|#X#0-E=Dzsh202?*zkDMe13t5rK$v_Tf
zk~Y5KtXOwr62M?M{vxqv<k!#}^9s++XcfgEIq+xDpp37J$injwi|0YZ^CGkS{99-G
zl$qYpg!dRT+}^CURI}C>PzyzABU0ak>4a<4qKc;NWI~psG;K<*F#mE2duuP#BV$=P
zGmQa5e9#We6E&qMZ)z1peO_M#p>(Q}+TimP%zdL8#@wJ?87pbD$Baawx-8eU(?Y{j
z>VV*+0M#PGYksj+MMK7B*_?92*@HN7&)~;%Efp7DaDNG(a(P;WJv0^_Oh0tu4@;bx
zgJU~uX7fB{GL(6MUd-ok4n)JI6Pr0&4D0SjvC^pE=w-p!DmN-{s>o?=)Rtvnp{Ztx
z46)!oEi+wu3`PM;LatL0i{Fy9xGvXH-~yaW_?q-HpiU~_52HUQ_t6;+oQ}k5$s9it
zwTv0BU==YEQDYsR1U9PJs%2?g>QB(QfKo>i2Dp+iKd}n=t!pu3_mpU?d~=Jf21~w~
z-{H}*Wx<AkpUco3W`tHZWm#nW5Zq;MQq#_D8iZnqrg-<(nO`1ki(Et&vHAzJ(`Z-E
zt$R-6KKy{wxZ#1(A)Bpp8rM0E>zu}QPUAYKah=n+&S_ldG_G?R*Ex;5KV(4OXy-Jp
za~h{M9RL4L<G$b3eWSPQ>X)xv{W4DCK6CYhi+9}5VN2Ej;^CjPPU0LxV<V$BJI?X#
zgmCEBhhH`J_eIFnC@qUagx=LruE#ItC**dLc5p@aEu2N;hg47b_+5sg=E)i&8P@tT
z1*6r@5|JZ?ec*Cs045O8&KjFFglg!BcOgtM*@G=?5Ss@AYd#&J7@RcYI0LgLa$dyI
zL>8g^IRx&xLpoVnrHD(Tvc=t^KF)5$Sp?@%IEvv8=;$H24--I<2JBL~*P{wfk5E_o
zqd05OxOI3CI3A;8g)<j|%S$bKEN4JY?t&1UVso6Soj2IRQuqVA*L%1mmvDj>B$i%!
zp;*LwU~?Adt(it?`%UL0^Hj+w;1Iexrl+BS+o8Ghmo6Tu@O$M-6obNf0wP56EM{gX
z=!(cOhEE3w*%I|j$6&w%@U*FZxj8wP!<hxguW(R+UA~#wp@Ek-5f23oZ|e6Gt~L8s
zyZ1|!w|CY))P|aCqKoUOcp06-P?#tSN@XNs)+x-z<Kx(b<@1S%KHrRP;V^54PQiI-
zIM(MQD((ieNkKq5B?9V3om)`&?`#QYGzcS!g9W)(!U>1z%bT?o@Wt9*l*Oz$d1x&8
z`1leW#4)>E-Vu>Fq=5t_;}V<KBRKVH6mwiC{<j-_Pw2w4B&yqZjur=pads~j0wPmu
zu@R^MoWBJzIe;RZPh=<~#{=m3Cz6<Ug~B<HeP9T^K)x`YIDn&OE@l-6DP|_cb299>
zGiDzi<JhRevfQhf_944n)6gafM_WzDpzV%*Y;dF%RwYzY%?ZH)5()7i6o>jK3!q;z
z0qvwVb$>30<4i~q(<Gf0G6Ks!igbp(B65U7CYS=6ZJdm^i8V!<#cJ6|A=Dad>+n*4
zW?f$((2l2olch#w!%XcU+(0$kWE|t8#lFoT(R{9!VUQpctyj%+Psqi&L_$F19IOPL
zcWW)lK_56LB#NlTFCxa%v|TKHXc)ZV0EqCPLer=Rh39J2m#w8FKco}7Hmk!A<A*oj
z`-INCAi2UhHqOfdFoF#{n-#%Bj&5lbb&t_`f4gy_qsh!Ej^hZBvYF;!CUc$YuTKNQ
zMJ)ys6asWcVOtqSgZ`QVM3!Y)-y&(a+<+(?(TJ2?q{HNNNLDj23d9A3#Bf?#JkTht
z$3|t|N2jleJldCw84*$Qr(P+*M1(**^9N!hgKo|lIRVPe!ikLQTLdC1=3v<`-UV0b
zU~$T2R1RPja8682D%DY+yX1C7B5zchhpNPNj+$?#H5|~=pdev^Q!~FuxVZ!@jEjEa
zb8cE3Q1A_S7MXcvlF(W~B^Y=S+%;S^k)hMeb{HM~Oi#<XMXXgK1jR-mn$FWP$1=D{
z=ckDH74PzT6#t()H{YkL<t~(gsl%B$$Z0o<kbJ~ZurLVEK87#jr!t5!(sG6IA|wZW
z!Rm!t2zEGZwm}kb1#yts2<hb+PSH=<U=r7qB4Ij37C@^Se#$kyKzc!d6~KF30%Hv9
zQ@p2w^Ly2)aa1$5R_K&H6UX{N6rus8NSGUI(Zj-oun0vaJZ`*&YNFAmru!*|28l!q
zyp+IZ!&1sMDCa(f3b4L6E%g3Aq)40r^n+l;9A*(lp=lu0B7{akvxYUUseM4lz*bVF
zLzvPW+fCuw!_NnBGM|MJ<hH@>0o(8>ET-O+6lD!Y#|DO>E&Pb-C+@TnLR^H7Rp%}Y
zmvUgt`n1C_Jko|aVP^A?v2Y-kg77Jh_$k`vIiR5Me92l~|JGY@Y!nR6a_<ltM!8Eg
zQeK1Az%et;pYtV-TTp@XS54snoOERZK|NZWF4x6;h~n(8zWrMdIes~wN_0Jg<!N6;
zikL;n6MZkbFIu}hLtryJ1Uw?Rh2Z-=#L)Cdf)5Z%aEiq=wINe}ZV>$9&={1PPyz~I
zVd&@7W3j@*!7Z9uob6W$4I@{#<XREZMchszZ0yI14l~0nZO(nuIH3p>m~{|rf*v4D
z%eu+xMsA|hZQX@pk)kM3)0-xVjWn_R5N;E+?3CJSGr%ZK0swSgHDs>uf%hC6qPQlW
ztECENSDEv1NTbFivd@M2M#!J~ETs?|0>oE%5Cwu^(H9YA;m%3vrYgpK9OeRH5&UH0
z$@&E0N8W2d*mRIOwn=4Ez5q{)BEFkI$KXSwJOeie5nt-kU)N|9Wdz*;9{Loahwu%?
z0g*6rU$MTWz#H;o+Zz@kyse1r8MA;`?|r??`}#xP*RP(w-rfD(-mc4_>W`~n{3TGe
zeQ>}&FzjdpRcqG)<L<f)qyE_afg3vzd3MJ@CvejV+;jproxn{eaMKCgbOJY>z)dG`
z(+S*k0ypOuLOL+&pGFw<uV7dBZo#O1?|i2R|9T7WeCJyCOAp#x-Fd*?YIwwM!xp-8
zMYVH9wR1(ab49gtMYVH9wR1(ab49gtMYVH9^}c)b6hP>J;+lck&K1?g|Jzqo-_5;M
z;hp-mM_#%oanSAiV_Y`AW7HPo^4@s(^u{ASYxmUs=>2t@VXn5i+uwiWyYsJI?=E!T
zdc5mKq30#-zrW7=y34_~Pd=bA-q*D?=FMr|+@s#yd$x1eU!H&M;ojabvLJPZf$tmd
zx)poI{UNl{|6=-U7x=zm+tBdfplx8lF&a<(%hjj!Vf(A*Bv4K+xiE|C`g|`Qw~>eK
zqqyK|Rx^elpq=SkxVLJ}`I`6Q+t2_6R*9WZx(9%M_^!CRB|bEKo8S+_NV4JpUXC`!
z59&tepg4Nt4#c4c&>0HU^9}fBAbgxJ$62lr;@QBB0eOPRD+t1!b~|0$L=Db2@xwYo
zycGEosJNz$5}@I(qFg`0f6%Ew{(yjjpHx!j=`pS#N`C>jsOc@D?~7TwwysFBl1_Pu
z)FR{U?2*Y;+;XLOa2sn7JvR2Ts+$Kh2<3GKiy=%d<7q(a)+zj~I!BPbou31Co4R&i
zp+pK`7wQsuvo2i018w*YBgQ@QPvN5b>_}4q91Rh#pgc_`bCw`tvQCMqC^GChwvZ1Y
zi_kHFpMaMGGLX%p8o&-F8R-}JA|UP<60S&1L;G94rrD^ckim-TBFl~f<MA9b2+{%a
z+uGGnI5<r}kAn?DDFU@H4G6+y3uM6PQQUV$(vLI-Okr+-O#F0_l$I|wLjF$idc_e6
zfU9Z4a}JhsT>vNuOmdMaLMO}>Pps+mFeBtKU3msv;DHj`QjBw5IMCF*H<{m>Zlsss
zd<g)y;*LDE7)qH@B)0-27U`Gbpz+gR(ZBemue5$5kr)Z_K@`{tKs4yPH)mvk5^_lZ
zf{;L_f^hx}>YBh|B+Hzxp<POVP7wY*Tp`!&4hQ7LH84nG;{u|W|D$p@V1gS>Inqgx
zI4>4MRe<+1Drrukfak_xrIo4z?EvhAo3*%_jPH~<NF_FEC0S_amn<C8Y|iEBrl(Ra
zRDpnq@Hl}tqJ+@}C|3t;hA22si3AL8iP=UB3)hjRhZVhP7{C~1aPWyIJgJy9R!Y7Q
z3?v71f*M6(B@LKwU`kQUtXHACm|09BXQcta_W+q`a7)}w14iqV^OVqpG)t)ut5UlX
zX@t*cvxV)7(=wing<%P%{0Is?h_pr|Up}YI<;_e@Z2B;NdP_`(Y>wL?n$T&>b;y>?
z&dAkdbBdKo9UZ0u$PHvVr{u{t^6^kM7oSsukV7}yi`qwq$y$(T#0l$(0ZHEsfd|hs
zF)_4G>I%%))FH)LO8XEa@Ep?%VKdg_7sAFVKn9qULWQo^Bdb|%;T{4UHce9o3$jMs
z#D(=-S!9b~C{G|U9n|S-1@k1=s7IKmp!tG{%J#R|2MiqMmmz4fb}U!3$f_bjX##qC
z3t&~d9@rg2#499og57H79|F7=kWtu~P>@@pG<-Vb4T{gu?%^Rof<YwkLR^B#K8$cH
z#CoNKB*(c3l15l14={gnzh<`q9VO>Co5bCtp3V>ix+zrW>l)ZV3@C2vl*b@U)|HwK
z0jQWKZ8@xHt$DyLaIzVykl2cw8y*y;bp|FFvlrT<FU2V%gT!@IpXC54mIUZKw+9aU
zpfUi11GiD!J~}|+p=sZWkNlwbYgg@`{~R&^Ju}M51KY(Lyy4sRMx{;}y_6;)(UGVF
zxJkH8j7^-yo!1_HTvy0dA_kYr0cHbao8g<Bx=qyw<?XW8G`uO~!2@)ZV5;CUV?{Mi
zcmtzk98kUh+}HmR!a*0}b%3DMGRmu{Sw|MJE%shWniVL!y#NKAAGpSH79vg>)sjZ`
zE;Lchz*Z3?ka9&RCqk5w<S5ywzCuX|xNl4^NuNrufp^S1vr1Xn@M>tNiKnC~K(@-C
zz0cP>zlG7}>z&^CnV#1RJ+IyDdAQ$w3pJj4arN4TUpF)~GCDFeJQTO#)<)r_f4uqq
zYZHAFHzwYjzuEm#p*z<#*7g1ms9JA7)~a=pQ}g9$Pk-SL{=94EXMEi+-%NME{Ftq`
ztM~Xy?=gJI-QEw*e8K&!@+CK4Xw`NM4UO6d$A$;(BYaoH{wnbEMg_T`a=?FDGwWo?
zmZQ*4vx=lJ6_|cfKW_o~YH(+Rb7K*o&dT`CX^uE)1e-@>CIEzujXKPT@+Ak@mn6R`
z<VP|o$`7$gnagBSr!E<&Pf+S~3>PW@35Cd+(;GnK;WEGleLo>(rBlKMBA*anEdbC{
zxqi)d6y4}sqih<>`V!=w1Z>W)@XUb4k_&|QKo2tFlyXMY)wLOb5-Cd&rED=OtF-XI
zMRQZS5x5IRDnnTl3G+{Hjf2vUW)5{xEbGj4<^U4`gr!C{3{DU(M&wh*R7R=&rIOAH
z76sgcA>eb=OB#x)Dr^}$lcMN1XC{$+%NPgI;~;|POto>I+f3!_B8KRdJOG)<87iVc
zpu+9){+RiPP{2pyNml%ohd#J}7!BpNL6)&{+RP!-AYj!<-H9QBi|0w*azMtni4rxX
zH`{{k(Ew5<FT?8*nNkbgPc8`|=;?9`%#;ix4`4I74Ns_?@?loc|NZhuNM@BZk|z?&
zUJIKJfGMr(b=?C%H!dq7YYiB5A9+pc_|kzA7?KxwtiXDMX+ZA_{|vZ~CvpvRcyWCR
zbg*-|h3uuAnFG!PWrgz)Je?5o<tQ0ZXIyx6g;;OHq@xh=QhD}V8EXgJ4mI{P0HR3$
zO2StGHXauU=8>Beyt)|vTpnm)uEM=?Y9e;nk<JH+L#ec}PuV=c{|o)2QRR1NN2u_(
z<awQ|4!%&I=K@cosMW}sM+UMi-NmwElrj`8kd9Z7eKiVtp-Rk})`qI-&!4LaWD|GA
z_V_6*t%m&qTMjhIMy0urz+JRCKT9yrI+KQCKYq$Qlel33j|%x#jXdFPDT_-mMPh#t
zfGdfcZo8aThnfSJTIK4+06`ZGlk)49S@ALDG+0&`Y&Dca$P`1|Y1Tp4dBQ)kgJg)1
zrZEW@Zlaa{2qd)jKhN_dp(Yn?@<Po6KLk@n@HqbRFZR4oN9&x02Z?(qMh4h6YvR^4
z`k?M1Tu7$_+&2D$e&F=u3;$&5$iGHHNY^a6^<_#C+jPS{<aYp>Z7!01gXmH&;2<SO
z#1leU1DlT*$nA;E3f%z=>_R0e?@(=4^)!nsTi__e=(#hxt5ESmbYKn5+$3~`cm(Fd
zB9qGekU72I1QE(Qd!`$a18lTt?gMo>>46dg4NdZ%-@v3&)1Dn*_*-g;5z+k_b=iiv
zj(m<~$_@t|fG|-zQvt)1W+t+dWAlRs$rVcZ(V!-rGsx%*X369#jbFx*#Onw$f={&3
za@Mgq;tK=Rs#*=0D0GY68Md=XaSV+kPGJjvp5F*R3D2$a7(vPubC3fmfL_Q`+uNXy
zgGdOZF5vVlX=>hssGMnN&)7o}T_mSXP*%uqJoM?Ws-%!j9YL;DWj%T8j9VnwUegdZ
zoIv!`UA_$RTf&W{6pZKy%MB5LWMoK;mrO`c&WfvhFi8Ixr}0CB1my;%Y?53H+Gmt0
zg&yDX0Z+gL^RuogG^f<HebM6l*vuv31Ja8E#h+g#Mj`8G+ginoNa3BxV-LU7dk--E
z6UrgFl;69FXzze0I>7V}Fuemz?*P*~!1N9<y#q||0Mk3b^bRn+15E#E&hP#1f+zm<
zv4@@a-VlBP`>7{iynI7=%rQDPG~9NOz5U(CZ#)ReZhOFHj4O9GW1Y=dXEWB>jCD3+
zoy}NhGuGLRbv9$2%~)qM*4d2xG;hY<_5ZV_=;PO-J)cGLrKbz;x%I?v_FUh@cS^?e
P6#Xmw(d{Ru{{4ReuPU{a

delta 1274
zcmZWpO^6&t811j7YJR(CI@>!tnLn~QEO9Ym`)|5?kj>5^A&7?z3T7b(6Eq+w?nER8
zG3>@3W>|Dd&x&af#0f|i^fGaDY6SyBJjIjo5Ipw5lfft|1hRoxQ!`<E*~3@;!>jjR
zef6r=y31?bWv?im7X%p9E3B3~&)IPP*10bil+TGE2$9S~Ho{!yapu3w!@j0o$b4E;
zKPcwigY5rT?hG4=o0ZrDfiH+H@EPc1D1P>qn~e!tZt?5)-&uL%!ZFP@HPtbzj$``w
zeodAJcxF3UZe4PxIch@+p$yAXElc;Wz7Q|a+i#;&kv^_JS@`xGk^97Zy3oe~u(w}!
zmaaDt?&IMXIeOy=yuKVq@JNU(!6WA30hB*F;T~k{`Nigu5PL<nZOyc4XI_3DN<UQG
z9Cz?fY9GeG*HVi8OaHEmAPqbqvy=?Mfjr`*kCojCmTp*j#qv*kq(tBCz(yKY21CHI
zk7r|@LW(Q=R18fpY(v!y)4vE(f!>{sZ9^I6nBn9;!<M1hnoY0G%g3O+J?|zs;;)Ng
zGS#$A$EMv)DaX9F0%!%O6+{LB#*H|No)lsm(UYtx)o6)nz<v!~cTBTNZ@r9;LfKn!
z1@5@J8WPcUQ>_>(?RAt_<C_8A!a-DS;|5_BrMMA$vNQ-HSf+RwgBm5>X(Se|aH(sV
zFpsKfR#l^ikIKJC+C+|T5joMN<ZtXiVU^?ewpOb+y5snDj}#}pJ#%xIXbkG<s91Do
z_9={0W*B1yN@~fS*9JXA<TmLgS*2Ksl=P&X)0@NbGANeQ5Kt1N&*G9Ra>_qn3Yng4
zmp0Di;tl1^6cVm9Gfla9;JYbYi=)(Wmj2Xn)?__3YI1>-gp*fQRa14WNt0Rh^9+((
z3`c(gnaJE0;Z&rJ>r-2}uBdU8XbK`uG?~QZ#G&SguEHhObGtqrwNkC>72T@(o$pDR
z{<x7mfy(X6?i7cH_u(1zS(iNJ=~5h_`z$KxK9l$vmDde7!{=?1T?xxzNvT+d-vFsd
zuRchv;!$AId@XwdGXahRbG?@xwOU!DSk3+h1@{1-5Iz$VM`L1sBo4ugEbub-us85y
v50k)ubF(om4{F&__PptcN6xV8=nfTUvuBBPXLNnB?>sAszBiZs<d6RWcJy5P

diff --git a/func/collocation/collocation.py b/func/collocation/collocation.py
index b6c12e4..c003be7 100644
--- a/func/collocation/collocation.py
+++ b/func/collocation/collocation.py
@@ -11,6 +11,7 @@ from db.db_config import get_db
 
 def run_collocation_on_text(page):
     datasetid = page.split('><p>')[0].replace('<div id=', '').replace('"', '').strip()
+    print('dataset id in run_collocation_on_text: ',datasetid)
     collocations = []
 
     nlp = spacy.load('zh_core_web_sm')
@@ -22,6 +23,7 @@ def run_collocation_on_text(page):
     data = []
 
     for row in res:
+        print('row in collocation from db: ',row)
 
         docid = row[0]
 
diff --git a/func/concordance/concordance.py b/func/concordance/concordance.py
index 5a0fe24..71e7c12 100644
--- a/func/concordance/concordance.py
+++ b/func/concordance/concordance.py
@@ -8,12 +8,13 @@ from nltk.metrics import BigramAssocMeasures
 
 
 
-def collocations():
+def collocations(datasetid):
     collocations = []
 
     nlp = spacy.load('zh_core_web_sm')
     conn, cursor = get_db()
-    cursor.execute('SELECT * from news;')
+    #cursor.execute('SELECT * from news;')
+    cursor.execute('SELECT * from files where dataset_id = "' + datasetid + '";')
     res = cursor.fetchall()
 
     data = []
@@ -65,11 +66,13 @@ def collocations():
 
 
 def run_concordance_on_text(page):
-    #print('page: ',page)
-    page = page+'部'
+    datasetid = page.replace("<p>Collocations for the word '部' (department) for ",'').replace('</p>','').strip()
+    print('datasetid inside run_concordance_on_text: ',datasetid)
+    #page = page+'部'
     nlp = spacy.load('zh_core_web_sm')
     conn, cursor = get_db()
-    cursor.execute('SELECT * from news;')
+    #cursor.execute('SELECT * from news;')
+    cursor.execute('SELECT * from files where dataset_id = "' + datasetid + '";')
     res = cursor.fetchall()
     data = []
     for row in res:
@@ -78,7 +81,7 @@ def run_concordance_on_text(page):
         data.append([docid, content])
 
     concordances = []
-    terms = collocations()
+    terms = collocations(datasetid)
 
     #terms = [page]
     for i in range(0, len(data)):
diff --git a/func/neroverall/neroverall.py b/func/neroverall/neroverall.py
index 491237d..ff06fc2 100644
--- a/func/neroverall/neroverall.py
+++ b/func/neroverall/neroverall.py
@@ -9,7 +9,7 @@ def run_neroverall_on_text(page):
     print('neroverall page old: ',page)
     datasetid = page.split('><p>')[0].replace('<div id=','').replace('"','').strip()
     #output_json = html_to_json.convert_tables(page)
-    print('neroverall page: ',page)
+    print('neroverall datasetid: ',datasetid)
 
     ner_driver = CkipNerChunker(model="bert-base")
     conn, cursor = get_db()
diff --git a/func/usas/usas.py b/func/usas/usas.py
index 083b321..b961022 100644
--- a/func/usas/usas.py
+++ b/func/usas/usas.py
@@ -9,6 +9,7 @@ from db.db_config import get_db
 def run_usas_on_text(page):
     print('usasoverall page old: ', page)
     datasetid = page.split('><p>')[0].replace('<div id=', '').replace('"', '').strip()
+    print('usas overall datasetid: ', datasetid)
     d = {}
     with open('func/usas/usas_overall.txt') as f:
         for line in f:
-- 
GitLab