From b8343c650f7eceee6707be5dbb37a9eb16046981 Mon Sep 17 00:00:00 2001
From: Dorian <interact@dorian.systems>
Date: Sat, 15 Oct 2022 19:43:49 +0200
Subject: [PATCH] first experiment with opacity

---
 README.md                                     |    4 +
 make.py                                       |  233 +
 summa/__init__.py                             |    2 +
 summa/__pycache__/__init__.cpython-38.pyc     |  Bin 0 -> 342 bytes
 summa/__pycache__/commons.cpython-38.pyc      |  Bin 0 -> 807 bytes
 summa/__pycache__/graph.cpython-38.pyc        |  Bin 0 -> 7422 bytes
 summa/__pycache__/keywords.cpython-38.pyc     |  Bin 0 -> 6680 bytes
 .../pagerank_weighted.cpython-38.pyc          |  Bin 0 -> 2782 bytes
 summa/__pycache__/summarizer.cpython-38.pyc   |  Bin 0 -> 4305 bytes
 .../__pycache__/syntactic_unit.cpython-38.pyc |  Bin 0 -> 923 bytes
 summa/__pycache__/textrank.cpython-38.pyc     |  Bin 0 -> 3471 bytes
 summa/commons.py                              |   15 +
 summa/exception/__init__.py                   |    0
 summa/exception/textrank_runtime_error.py     |    2 +
 summa/graph.py                                |  244 +
 summa/keywords.py                             |  227 +
 summa/pagerank_weighted.py                    |   86 +
 summa/preprocessing/__init__.py               |    0
 .../__pycache__/__init__.cpython-38.pyc       |  Bin 0 -> 196 bytes
 .../__pycache__/porter.cpython-38.pyc         |  Bin 0 -> 13383 bytes
 .../__pycache__/snowball.cpython-38.pyc       |  Bin 0 -> 98836 bytes
 .../__pycache__/stopwords.cpython-38.pyc      |  Bin 0 -> 19856 bytes
 .../__pycache__/textcleaner.cpython-38.pyc    |  Bin 0 -> 6821 bytes
 .../__pycache__/util.cpython-38.pyc           |  Bin 0 -> 701 bytes
 summa/preprocessing/porter.py                 |  635 +++
 summa/preprocessing/snowball.py               | 4291 +++++++++++++++++
 summa/preprocessing/stopwords.py              |  210 +
 summa/preprocessing/textcleaner.py            |  188 +
 summa/preprocessing/util.py                   |   24 +
 summa/summarizer.py                           |  154 +
 summa/syntactic_unit.py                       |   14 +
 summa/textrank.py                             |   97 +
 template.html                                 |   31 +
 texts/warehouse.txt                           |    1 +
 www/css/main.css                              |   38 +
 www/index.html                                |  177 +
 36 files changed, 6673 insertions(+)
 create mode 100644 README.md
 create mode 100644 make.py
 create mode 100644 summa/__init__.py
 create mode 100644 summa/__pycache__/__init__.cpython-38.pyc
 create mode 100644 summa/__pycache__/commons.cpython-38.pyc
 create mode 100644 summa/__pycache__/graph.cpython-38.pyc
 create mode 100644 summa/__pycache__/keywords.cpython-38.pyc
 create mode 100644 summa/__pycache__/pagerank_weighted.cpython-38.pyc
 create mode 100644 summa/__pycache__/summarizer.cpython-38.pyc
 create mode 100644 summa/__pycache__/syntactic_unit.cpython-38.pyc
 create mode 100644 summa/__pycache__/textrank.cpython-38.pyc
 create mode 100644 summa/commons.py
 create mode 100644 summa/exception/__init__.py
 create mode 100644 summa/exception/textrank_runtime_error.py
 create mode 100644 summa/graph.py
 create mode 100644 summa/keywords.py
 create mode 100644 summa/pagerank_weighted.py
 create mode 100644 summa/preprocessing/__init__.py
 create mode 100644 summa/preprocessing/__pycache__/__init__.cpython-38.pyc
 create mode 100644 summa/preprocessing/__pycache__/porter.cpython-38.pyc
 create mode 100644 summa/preprocessing/__pycache__/snowball.cpython-38.pyc
 create mode 100644 summa/preprocessing/__pycache__/stopwords.cpython-38.pyc
 create mode 100644 summa/preprocessing/__pycache__/textcleaner.cpython-38.pyc
 create mode 100644 summa/preprocessing/__pycache__/util.cpython-38.pyc
 create mode 100644 summa/preprocessing/porter.py
 create mode 100644 summa/preprocessing/snowball.py
 create mode 100644 summa/preprocessing/stopwords.py
 create mode 100644 summa/preprocessing/textcleaner.py
 create mode 100644 summa/preprocessing/util.py
 create mode 100644 summa/summarizer.py
 create mode 100644 summa/syntactic_unit.py
 create mode 100644 summa/textrank.py
 create mode 100644 template.html
 create mode 100644 texts/warehouse.txt
 create mode 100644 www/css/main.css
 create mode 100644 www/index.html

diff --git a/README.md b/README.md
new file mode 100644
index 0000000..3b4dfe2
--- /dev/null
+++ b/README.md
@@ -0,0 +1,4 @@
+
+opacity experiment using:
+* textrank python implementation (https://github.com/summanlp/textrank) modified under `summa/` so it gives us all the sentences with their score.
+* wikipedia python module (https://pypi.org/project/wikipedia/)
\ No newline at end of file
diff --git a/make.py b/make.py
new file mode 100644
index 0000000..c1b8436
--- /dev/null
+++ b/make.py
@@ -0,0 +1,233 @@
+from jinja2 import Template
+import os
+import wikipedia
+from markdown import markdown
+
+# importing module
+import sys
+  
+# appending a path
+# sys.path.append('textrank')
+  
+# importing required module
+import summa.summarizer
+from summa.summarizer import summarize
+
+
+# TODO:
+# * DONE: wiki header
+
+# those 3 would ask to start from the HTML itself and keep and index...
+# * wiki paragraph
+# * wiki hyperlinks
+# * list
+
+
+#   variables
+#   ------------------------------------------------------------------------
+
+# wikipedia_page = "forest"
+# wikipedia_page = "warehouse"
+# wikipedia_page = "river"
+wikipedia_page = "elderflower"
+# wikipedia_page = "mushroom"
+
+TEMPLATE_PATH = 'template.html'
+HTML_PATH = 'www/index.html'
+
+
+#   utilities
+#   ------------------------------------------------------------------------
+
+def map_value(value, min, max, new_min, new_max):
+  return (((value - min) / (max - min)) * (new_max - new_min)) + new_min
+
+def remap_score(s, min_score, max_score):
+    s.score = 1 - map_value(s.score, min_score, max_score, 0, 1)
+    return s
+
+def compress_score(s):
+
+    # compress whites
+    s.score = s.score**3
+
+    # stretch + limiter
+    # s.score = min(map_value(s.score, 0, 1, 0, 1.5), 1)
+    s.score = 1 if s.score > 0.8 else s.score 
+
+    return s
+
+
+#   wikipedia
+#   ------------------------------------------------------------------------
+
+def wikipage(pagename):
+    # get wikipedia page content by name of the page
+
+    print(pagename)
+    wikipedia.set_lang("en")
+    try:
+        results = wikipedia.search(pagename, results=1, suggestion=False)
+        try:
+            pagename = results[0]
+        except IndexError:
+            # if there is no suggestion or search results, the page doesn't exist
+            raise wikipedia.PageError(pagename)
+        return wikipedia.WikipediaPage(pagename, redirect=True, preload=True)
+    except wikipedia.exceptions.DisambiguationError as e:
+        print(e.options)
+        page = ''
+
+    return page
+
+
+#   parsing and gluing html
+#   ------------------------------------------------------------------------
+
+def is_header(s):
+
+    # i is the header level
+    i = 0
+    while s.text[i] == '=' and s.text[len(s.text) - 1 - i] == '=':
+        i += 1
+
+    if i > 0:
+        header_text = s.text[i:(-1-i)].strip()
+        header_level = i
+        return [header_text, header_level]
+
+def wiki_parse(sentences):
+
+    # TODO: doesn't work with section nesting!!
+    # 1. replace wikitext header with html header
+    # 2. add the opacity to each elements
+    # 3. compute an artificial score for header that is an average of the score of the section
+
+    new_sentences = []
+
+    print('--- HEADERS ---')
+    for i in range(len(sentences)):
+
+        s = sentences[i]
+
+        # if sentences is header
+        header = is_header(s)
+        if header:
+            print(header[0])
+
+            # start computing the average of score of this section
+            current_total = 0
+            current_count = 0
+            next_header_found = False
+            j = i + 1
+
+            # iterating while we find next header with greatest or same level
+            while j < len(sentences) and not next_header_found:
+
+                s2 = sentences[j]
+                s2_header = is_header(s2)
+
+                if s2_header:
+                    print('  ' + s2_header[0])
+                    if header[1] >= s2_header[1]:
+                        # encounter header of higher level
+                        next_header_found = True
+                        print('X ' + s2_header[0])
+
+                else:
+                    # adding every sentence to the average
+                    current_total += s2.score
+                    current_count += 1
+                
+                j += 1
+
+            if current_count != 0:
+                s.score = current_total / current_count
+            else:
+                s.score = "NaN"
+
+            s.html = '<h'+str(header[1])+' style="opacity:'+str(s.score)+';">'+header[0]+'</h'+str(header[1])+'>'
+
+            # stops at the references part
+            if header[0] == "References" or header[0] == "See also":
+                break
+
+            new_sentences.append(s)
+
+        # not a header
+        else:
+            s.html = '<span style="opacity:'+str(s.score)+';">'+s.text+'</span>'
+            new_sentences.append(s)
+
+    return new_sentences
+
+
+#   textrank
+#   ------------------------------------------------------------------------
+
+def txt2rankedsentences(txt):
+    # from txt to ranked sentences
+    return summarize(txt, split=True)
+
+
+#   main
+#   ------------------------------------------------------------------------
+
+if __name__ == '__main__':
+
+
+    # --- WIKI REQUEST ---
+
+    # get text from wikipedia
+    print('--- WIKI ---')
+    page = wikipage(wikipedia_page)
+    if not page:
+        sys.exit("--- STOP ---")
+    title = '<h1>'+page.title+'</h1>'
+    text = page.content
+
+    # print text in terminal
+    print('--- TXT ---')
+    print(text)
+
+
+    # --- APPLY TEXTRANK ---
+
+    # apply textrank
+    sentences = txt2rankedsentences(text)
+
+    # print ranked sentences in terminal
+    print('--- SENTENCES ---')
+    for s in sentences:
+        print('[{score}] : {sentence}'.format(score = s.score, sentence = s.text))
+
+
+    # --- REMAP AND COMPRESS ---
+
+    # sorted version of the list
+    sorted_sentences = sorted(sentences, key=lambda s: s.score, reverse=True)
+    # remap sentences from 0 to 1
+    max_score = sorted_sentences[0].score
+    min_score = sorted_sentences[-1].score
+    sentences = [remap_score(s, min_score, max_score) for s in sentences]
+    # compress scores (make more stuff invisible)
+    sentences = [compress_score(s) for s in sentences]
+
+
+    # -- PARSE ---
+
+    # parse every sentences to either span or header
+    sentences = wiki_parse(sentences)
+    # add back page title
+    sentences = [{ 'html': title, 'text': page.title, 'score': 1 }] + sentences
+
+
+    # -- TEMPLATING ---
+
+    # getting the template
+    with open(TEMPLATE_PATH, 'r') as file:
+        template = Template(file.read())
+    # render template
+    html = template.render(sentences = sentences)
+    with open(HTML_PATH, 'w') as file:
+        file.write(html)
diff --git a/summa/__init__.py b/summa/__init__.py
new file mode 100644
index 0000000..e55963f
--- /dev/null
+++ b/summa/__init__.py
@@ -0,0 +1,2 @@
+from summa import commons, graph, keywords, pagerank_weighted, \
+                  summarizer, syntactic_unit, textrank
diff --git a/summa/__pycache__/__init__.cpython-38.pyc b/summa/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f0224f0371da1eca43d7bacd531ea5bdb50fdaec
GIT binary patch
literal 342
zcmYk1Jx;_h5QUw;O|;S?aSOI!eF20hs3;H;g2GKS#6x0s$Bt}AyWs{Lg`02&w^Y=0
zRM<<yNb`-J^rZLZVYOPaIBy>}-67u3B>qnx#Z_E)79p9;c8oHSQ7&>SL_wt}sS*_}
z#DZ#3XH4ezY+ZasH(S@C?hKL9E>yjRaR|*8=e@Dgh0_sww4k@`9#ubhlws*6BCzTe
zZ8&kW_Rx9NwUbspG#(E=*>BfPsQ!$pn=KN`geqZ?P$x7I@64w6j`(@xgi-;Q#wyVK
z8a(hjYhJMPj)Ml}Hcx1~E%yWN!Qx;hhoEJY&&Hzj=VmAQF`BonimlpqGtQy8AS0(8
LUS=`sJ1+YLYl~oP

literal 0
HcmV?d00001

diff --git a/summa/__pycache__/commons.cpython-38.pyc b/summa/__pycache__/commons.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..fbc99d9e56f2784493d5f8f1dc433267560ff5ca
GIT binary patch
literal 807
zcmZ`$&1%~~5T02{j$%9QrH34PC_PmatU#~E6ly5+U<#o<2!dH_H<oJsIJ<I_7WtGw
z+_#92euln@u07=ydg_c!OLFMKjAlmj^UXJ(j*hxO?`QoXt`XoDI}e{A=PikSK}vzb
z404PRK?&vDKqQo_Jo-ASa|*%1J@tS98}Gf5%Oqy+?{mm-N#2pzSEMSqhBekOhO^_f
zsPPVKu|?Y@OR$A-LG~%^a1uC%fkb{H?X)PBj(n+9YXwf^TYa(8MXVcd&SM)lt#rNv
z)1jK*QJ8C$%21Uil|?v~l?*?c@{5iu8_Ghab}-W>UW8SmFQw5*xw2Xoirn+kXj_Gq
zzN(BY7NK3`xeVhn&&$FNmfxD*Y?Wqe+D_%&Y)d9~(8n(J(fAa5L;@4SsF(~pBDH}#
z*fIqtKd{1ExW#j!P>CC#{)i23aSYdZjmlXF!!d_rM2)Y22zMus+A~QW$s2C}oKy{G
z&(_F8Hds^ZEq(?n7F=u?M1WCGtGS+D>U5q|foR0wr147H-t2_or6QLZZ^<+V<Tr=I
zxi0k8(u|&vYdOaS0pDL6otIzrbX6EF<3!Ff-JZw}hFKZQ%#H^CD(|U?;71<!wv9v7
zMu!Dsn3cvxzS7zLyx3ycPw!F99>VeeM)fJhGQ&XpzeMwttV!VRn=n1?P1^9jC+9xc
T%^&8aT4j2~mugq*JHG!1tx31l

literal 0
HcmV?d00001

diff --git a/summa/__pycache__/graph.cpython-38.pyc b/summa/__pycache__/graph.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b526b791988cdeaba6140e2424aa537959bd39cc
GIT binary patch
literal 7422
zcmcIo%WoUU8Q<AmQWQl=v>txsVQ(BeHWOJ&Upc6XE7?{ZH;!W1O5(5$h!tlfF(SG2
z?9z%@3I!Us=p~1uKrclRyFh{Z8uV5aXi=b-0zLFp?4g&OjQj_^w7+j=ce#AHP+ufw
zcAh)){pR<5^L_iy<YeB!?>F6VRo^~k7@t!m{mY=TisZkBj2oQQjRq^ThFLb5aoyl1
zw;mYW5{%oQSY-<>8J<N;R#+cfWt)%PF-o~7m}`{GaBS_$)we|8hEwjAA4s<vG(@oL
z@hbJFe>N(sNd6Cz38T!oQ8qa%Tf){We$2`llx?1QV4$2uIm^dXc?{(oA6MlZ$|v{)
zFFarvHICXOpV~3DEk4bQsx6PUV|+%9o<Mn)&#7_&<#~Qwl_z-~Qk)F4H?B*!wcGi2
z-ofv-Xi4FVX5c%)u5fD2K*(*kDjZKbRj;YULysG%?j4%xY@=^ed=%Vvn|qGG>$U6L
zsWn=4(ZGAuTdNEb$3H4lx{BlzCY)g-X6IcsRBs|^%cj%xxNzz<KUmBsU8^!UXo+P<
zwWnGI%~*CedQ<2{^xJrBJZ7Qoi~6>tb645l?=Ce2uenRylQp-wbj=OirCZYbP*ekd
z$!)qde{oC5>fRDi*mtFXRK9SV994)b{9q}7#1axO`Rzu-T~bgiwhqD!ZQCCw;Y?&k
zu2awmFRG3sLGe){*=;45<JRkr;5)*{iS&>^IxaVXqT_8p^*tIYfTbbI#yII}4%IND
z)}KRhP%hO`Rl=f|epDWH#9xe(OTnT!1)v>XlQ@L>f#X)I!uM-ib>VDFui@;}?h8$d
zwA@4oM^R;2<;e=GEmyjYs7LXl#=*zhc1_45^C`wn@9{`m>xc5ODaQatXww^d{F4gO
z@usNl>~49|KaDXO)`?Crrc2y;BxwJb2p&;pe-~CrNW~T@?zO>L!L?3_k3a|i|AFF`
z=hdMm$zE1vq?nts9qeAZ@7CKmP1p;9!%_PX&8k>TCPW0B03L&A<#t`aLh}|<U`<D0
z#eEx93%&7=$|HE_{uGVLl53o&xa9=iXa#eFfav4F%DlH*11U8ps&3QSf_a2)H4hwO
z<APHQF8B_v0<XCP9)r$a1E!Dze!LuZ#POQ-gJk_?y8%<<V?KBM8m5R#^;%QV3s6J5
zuhHt!7gk`~Iq=%bJY!b$)_qwE#KqEoeXheVfb7y=9<aGuzoM?RqBzu6+iC|eSAI)W
zN#!_+aWUywa1<3vPSD0Z0V)n?jit0k7I1!HMlCVK2A6ZihGmS=sx#Gz*%0?Vp~DZ2
zD%t(`N99qR_;ipHI9Rf5eT1MpunF^ogK>g=PjeuJ*P5QQ;6U?;2sk~b<<=zeL-VCx
z+Y`>!?_O*!IE~U_YU--CWVGb6h9v<dGG)4J9Rr4^?FTS_ib#XAsE6h_`(#i0?oz#C
z-Q5!P<%n*Bv!hP+0#zPfI${7_)Pu95uKz7LN}0%{5+0$sAA}jTGg-vRj-&|<d=s6T
zCbNAsp}r#-*v}H-1`KS>lc!l#TH)V1(b&Z~vN5F<o^DK|D@fASJp>QgO`7f|@JuA|
z#C=DC=jRDL*M)WtHQ%&lRGKtQ9SsfT$?QI^zYQ};z3G`TI)iMFZ<=)iBj$R)S&=74
zKrlfIRP+Qdg~Cj9v!uF@1Vk$l<+f;e_XR9~y6lt>Lq4_k6jLt9f8@dpnoK$J%H*Q1
zjO1Y=SS~r<xUa-EN>gF3QfaykQK^LaN~PiPcAd(FO66YLtw%j$l?wN&m5QW$Rg#M<
zDbA9Uluc1~jItAyA?z~bGn73`S>k!?61hK;e1A&*OUQD}woR7LBaI_X>QpG2Y-YY!
zS93}%97l-dR_m_sS24HmM|)dE@;^hCj<UFk$jvHexLwZjOnFSlR^^<ItIFd%&nF%j
z<vcI&NjxX`6raYkz>7SiqAQB1Sb37a%;)$#N>lteKY<a`{3JhxXOTa{pT+YSe~v$o
z=L|p1&)_-B9sUBIbNnoS5zl#kj-SW#IDd&>!1DyZ$V+&h<g*BbUI{ZgAX`&Jx)BFb
zF8LmjHnp4D6~(ox*tFnmxjrfwaO*&|sJUTQ(^}GfCy!G$i)<(aA$R>MlK&jCn$a~X
zI%leEs@&qn9RyMvB|Dr}D$$Z&OUR%m{BT?~kg*TPiR%?roKnTa#g(Ii3b3x$tOb?I
zPiT2!g<)n{7O69(P*{uy^ywb4CfW2kRJ!at=An6Lb<M7|ZFY?hjlHaQwu^dPyJN@~
zF|foWjel%w2ZL;4$l4*hpJJ#YC-L`ZNGd`#ST5PnQ>`LxN})(Vca4YG)FbwgVKcNL
z>ya66!47A4M4-%WC52+%7XcUBZo3|Y^Y5+Sxc<gwWo>iw_Khp=Y_3-}*4|nVr>?Dk
zbM2j*n-$$X%AP5m`;<0L02!=vOyl^k%vb^?X)}S>Wv6i33b4maUO?+8^P=?r7wC+a
zn$}AtN8Wd4v`GI#1P#t9T1dY7v}MS${4%vn#EjVGmow<=bMYwRB7>t&gFdz>dSni>
z-K>%>bIRBold_vpwQN))xzU3Y%RXWZ<!*M*k~iZqXwRs2<Dp4?AeQ|IpNr6MMRm}P
zvrjB$G>ZW~>FCzu_>}W~eCmN}TN@=)tKnH}s&jgsuBwaY{RQW|UqWmyh06~p!dEH`
zKZlSHvu>+}=pmdXuQt&%?O%tpAk8q7-laHeT}n@u#(G4T^LU+JU%5&zQ}q?fde&KS
zpA7h~kbD|O+<%T0kr&M(J8s5`8SiE|;<7MFLBE0mATjoGvan+unq7V3V{&09qS*@i
z0U+zL6{A@Q0AH7-05dl_CpRckJP${D5FSetlP(YWRm!F*8xVyqq+cWX6iSdNHp}B*
z=o5ns)sRt4q#*hQv|skne1yY{)z}Jc@_F?k*?Z!^A7$dyHThfG0g)g2I1woc*7S!K
zO>78xDx7iiLEqBX)bij_-$Z&b?e-3mIx<4{gWMso{IPk@#(on}@VU}ld^KcluStK3
z$ghwr2vl2_e^0NWjbx#(oijc2;V-V#y{cRHFQX8TE?M$x7^|$Rq|d1%K_J8OJEQ>+
zh|PYVl^{71)xC@gArIAqDgBsTGd_3*ECP?9m{Vw1<!zJH@*e6?MD{wJlub`kIc@VZ
zdW+<yggJ71Xx$!rAOcwo6>XMn;Z}DC&`~bTA27rxISe*K%77HBLhMQPNkXTR94B!I
z`%;qBjNVmXYTPcNd!#X^PaS_kig7!oPh-UO(f8eh)=f|(C#-7%yCUvK;~|V;7nczb
zvwH}!a3<Dgb$Ki6Qc7KtD!2-5y4VN7`7+-BXWApJs&2|?m&0%L`*zuP)^{;CzBIe$
z4m&}3DZ9{R9M=(Y6NcK7n|c)0bPt_>dPcfuMX}(qwV_=gx+$On+JPZiv=x=-sXbzW
z3X{lk4a)g-wEYFiCr8GBB`mVeNqru}k`7UU4C_^zO;)#4ATrP``yNPY8v)zP%X19h
zC77`Ty?Y<6cT)s5)eghlo8mxO^^sg5ZTTy$Kua0SWCb?GI`cjK?Vapsol_z58<?AB
zD(RTI_WDoZT0AC=HZeOMuekL(UItvx;US6RVYc1kZXnWFm-9UzXb|$RV9wu=BFN{U
zb)AS@BksLE%!!PY6y~>3NL)lh9^`>hv6A~-xs9rZQgZ8CYLphAjE!<9X;e>f^d%H#
zrHb{!W3hFqs8163Oe9NKdr&U=X7xEzaw`>aRAf`q7KMydpGc(9WPp@NWfZz>hmk#e
zNSc66arOgjS&IREe~t7VIs^xFoAm$Oq20-M?XLAYXm@WBw6j3FTbg$7qvbBrz0)LI
z$=rDI5By*PzJ!JJ36hq`Zy<}l!00auWb;D{*Izi+fTuzt(?)ehRFqeceSs9Y6InKA
zlB|g8h@dAlL5F~&u=JFqh(tb58GRy`XDC~wYzbL;?&jK+^_v4;a(OcLl2xy~NW**K
zXfIe*ajJ?~d!eifR>|s0GHa4_UXo%-qN1d8Q{tpVRKhAbUqJ7VkbIg?@vO~klTt31
zvvU^xi-r8SD&>&dxh(#fTu#!=jZ!AG+^wpli$h1)#9!4nYPm4CqQmsd6x}HeDOxk%
HGcx}Kblg^l

literal 0
HcmV?d00001

diff --git a/summa/__pycache__/keywords.cpython-38.pyc b/summa/__pycache__/keywords.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a2b22817a5fdf52639ef2f6d24159ebb67fd259a
GIT binary patch
literal 6680
zcma)AyK@`I8Nc1T1CJp16h)B|Ekh4Wf-RG_<44RWvK>ouq$rGSJ?tQskt6m<LIiN&
z-omn&Lkd$Ft5K0AGjS!&xKWcPN##B>`4?`|rO3EW=O+1m-vS^&$(ew|-Ea5y_P)pO
zd+gpAAJ04ZeUo0Ps%ISMpEMZ!jbQLL{*!`m9OWoccN(HB1<|tZHoUS2+ErfNZ{*4&
zO#AhqF<Q<uodZ2q#>-eUQXg+jlqZ=EKu?vYL64SaR9=m3$nt>=r#!0)YW$v4KBy+t
zB>FiurKZszQZwoR`gt|04x(RBbLtTK!)jhFpg*Dxt0U+a)uMU^{WI#QdKUdrRaD2&
zKdX+b6X=WTq<RkhF?C9vMt@wLQ6=;z)br{E^e5F>^&<M`)RJ08e@gvOy{cZjC(5VQ
zo9cD-2B<UY<!er9<y+EINo0j;+*qqMLsN^J3C8{h?Wi4n3w_bkL#=Ql(qVI}awn>7
zY???_l4`AW7msGD^(bssO!T>_tlh2LiM7J)l!>>ZW^E@Lm>XMb*XpXWp~KcDmdxv@
z5q}m{+D#pW)y;6N9#xvLijr?7K3bNmt7UoRN|pE<{J9vsi~nQ_jd6_F6=>zISeMtF
z$6~eQW~1vm-iew?WU{HttM6XDapCgndzDL<uU^0SVK#N~<Kdadj-JF;+<f<|<wm4x
z;j)T#Eo?4d2u-;BzK(B4Rg)}-&9Ig%twp-JwQM${JE4v?<8}h^6^84vjuNv>iXcrb
zC+$WfT;7WAl1h@L*4=EXvJsg|D^4owwYrIPa)9I@X&s@b(N;;_y<ZN5hkq7!6NGVg
zolj&(bYv>@;Z*9wMkt8?h1hU9Zt6UgkDLq6t#er8b$mQOpZd4mRBq+;`R!A9=PHlI
zRxY`T(M=j9r&9N!2g-jWgo9NsKp|Im-g-A|o;F1YS6qwDX0g_+*4rv7LeygPxuv^8
zoGm8MLS6CVxLLnjjGIv@#{y;fn_;~jUDP_(nO`$eBgtG7Ze+Qz)ry)b^BE_lT$W=Z
zDNlC4)KgJ;virH-M3#e^+jWz9q}R-AH*03kfSOQ|&9b6d?-iKELhcZrB(rE7kr#mo
zq$j4tgpgt$tv-VJA@!3f`dBMqkRn<b=XY|!x#s9GOaQ}UA6;I(@QaTs*DjYYVwg`_
z^_pQ{*w4~5v$S5*iK*PFHC24)Ic(80{!!3XGIsAr5v%|8k1<GLbQtn_>UMjPf|J`$
z$4f=(GUcZ*<htzS00ohHj|5f|1=BJm)7&GG(40#YqE=rLPtV~SS+E%<?1Pqy)JBvA
zp;8s1N+X%izN<--S&r0O3A_DTd88fz@0GAetYsd(wO<dD6}Y7}h}{)?C8S;l*2t4(
z<WZqdVO%Bodp~jp`e$cY0%4$$UD0t3q=yqo0DsN`-^kQ~PN0ijMD^_%k`j>Clp;#5
zCRJzSmBDbwp95l#;}Hv|kwl!VFla4(f*zivrXMXV*bY`&I<7_udG(%ke}I(<aqIwV
zGeVa@4dIKd-FI&*(XP`GkjH^sGL=dwMjIrXfP0IFGIiNW52pf=4ieGpkcjkT#d2G1
zQ}QhL{x$`0eTE(`P_vKiu}Te|Q{h!_;(5X;ROG<!lqd-OBIbr5>0_dAf+7ai1YmEb
z;}P2g#MEP@2Yb+x*`)nQ1@5my6Kkt(e?Z-5u^Tg4o@7*+nC#O+eBICWcwgDH@EyFD
zkQN-`co}__qbbecM>HviLGwHaK;L=f2<k;DNSUAzH4Pv%?`+BBl<~fR+Tq}|yyRyt
zBDQ`3bmng-NN0MH=!tidB+`^BIkJ_c?o03_!|j#|O=NX&32*mNkr$d!^8Gk}8&CIe
zZlpml-?dmj3BQ-Ax8QVq<8@WfcsD+b;P0Zg*`C`oLXORu`AAG~NtVH_M@?24Gw}ra
zS)xqTo{F74D^Rw(N<vxm5Yz(-r*Dv8gW0SH_bM%Z1&y@@10FdHC|U&Q*%|=5d(&D8
z6hnS$PZUqTjJNfx)VxFuSqGpe3zYK~V^qWF*D;Z~pM`p#Ni;dzb^P~vE`aBp;Q1kA
z5O(@5N=kz<2<39u6(~IR=soTvJ35OG&m0aY`}jaPFdPb#Pl%~LoXIJla6o!WY$aFF
z+(-QXQmms0Q1V_t_3H!J9r4iF3iR_v+(#ry<*d_0<mh-`(psr}mAe-(mONkyir+;v
zDUGZSS&J-3-oph-z@JK;e&S<6knRK(in*jpg^v}Evu|*&9yZogczz8p{u}?!&anzt
zMOEZRstc2)bM?3y)|2xf?Aq&(W#*z-%0_hb8A_vQh#%sG;t%sv1hD~2+?wx*`w%h}
z3*gQ6>+<(<+k^l1qg_d6a_J>T13r=CES2aVVUGcp^XKZd#8AvRzkw%ggd8-mbK-w6
z-g~3uWnL0%qhF&<37DB%Ynqa;De&;8$ny{`GM{UbGPr1Nm`%&TPiPS<ZgStpLOkh4
zi#cyz=*t)nsfAS12Po}RLk6*q1&4YG`B0KG-4=)zs1r~>yh53A0dAH`cC11wHxLR_
z@0_z;puA!$8&{L2xhT*FK|vNy`r|gqBR-QLHZ~l_^UUW5ds?>wO;xPAVRe2XxsA2S
z1R6(B^~K3XB6L#-84BH0|B!T~!Gzo#k|8`ms^gFfXtr{o%=Ix>K+S{LnC2kG;&zbY
zBv2AN5AsF)R**)*28vcx<yAVkSe91E;Kg{oXxgoMlngE%l5wS|YE@qN8=mjK$Cm8a
z_jF|1x;e14TlKH7Z`ehw8`Mx!`#I6%%307mC}cjrq<@Cla-ln0A#Ssg?r6W7=ef4*
zZGiI9Y~pwZuWXa{3TT|1hYX3Lm-6HX7!SdSyo-VgE8E67TZW(z0XX!0iV#EIuSl`(
zs;2Oep{U9{=4;w|_26kw+H^jX7+pi9uh{%7#eU|;D!hgl>|)=&0C)qVAy80`Az(0{
ze*=MY59JKx^bW;igfkR`2zH!LzD8AovWei1c>pDK-~fxMzaan@Pl-bam0ME3pGrCb
zkvrhs?mQQizwV6i4kNrnkOsWNAJB^wb9k5N+|~&C9QFv<r9Mq_faFAH6rB0#D9?RG
z-%#>ljtXy@L3&=?{&ahhq3Z6WgZS$ia;1m7{)7#+gKPH=94o~hqDi04jV`2n(6sd~
z4AI|=mh)CzM}8-Y0H;@tTX*;3m?jf&0La`{+{(P$ajjX(XQTaHxYpG3SXK@Y)a=oR
zGOxA&c>lBHE}6Jp&f_#0X&XM?r*-{6Z9~Q!W4ZrYkK6lLnouoENVYh(S-XHE!wgV8
zD{}vlvLFfJQ&^!XnGT_cYS(_SBo@2KL80A2{YM2A>^2XiP;l`VoHyuj<tmBZ-JZp)
zdxVY>UHseX8y6*Pf)cKcs&TU#wTv!f@A4=K%z!T!n5@R_CJNv(PP`_pKeeoyVcA2Z
zKisxh5B?f^bpvLAkV&$W#QIar4;2R_8U2$VC+E?m0>y#zWO1O+@+_ADX*d1juHaP)
z{%XY5#gH=CxiyHho*n)lOg>?}p_sXBfjJa_D4r1GoZiXHsnA08f^ft6=&==;N{&0A
zNx>F3ls5G_G=C=I$7oP$;KbhuO~qalJgwBwq@N)MC;}2bm;p*?Twlyk^aCqYNXg;K
zqJlLj``<xlO-Z?WfTs%>{uTd&mBjb+)vF{jk;kSNX{%qNK~+nlX46k-RNtaCdqoU?
zy<0;Jf96kEXj6bc2l&h|h_u4&zrOk-{r>*eYBqv$1mR%wQk81G-jDVprEs|Y3kb3b
z?%QNCW2C)!LU=?FO>^mpk`KI2U_6{Z5N;80M@`O*qyf`;6Qp^B%26tue>-YM)BG-l
z+kBdTIQ9tZ6$RpY>gml5N;4?nOo7&P#>}`~la9eD<*wH$m<igoGj1l+LOQ-HI}>Iq
zoq#h>V<*{}G&2}Y!d-D50+9XHou%u@xkau6BDi3&(MIZDi;7|KMzdDMm18kpqZ_{B
z8HCQ#G2}I0;YzZa&EoE(M#sFcjw}_oIOlWr(;q?~r2|>MmeiVw3E}yfi?{U*=CcEp
zCoe_yH9XOD?#K$2ftygidD2wuXA8jW)2~E1BxgQfk0Sc^ZezJ3<l>2wI>$T<4Ki?P
zD)aa@@;&<UPw5Lm9S6WR-R1THlm#qn)HHgN)=`~jqxx}B<;mW+Y&=Tz&uMmIxUSQy
z^iq|Y;c(6mzQKG_M8g+vE^Zgx0-~6d0n>q;LDbH>6OvaCoxliw{W5R`gWd^Di3cuS
z{_tfF!1#bJgOKlW7)JI7?E{E(6!r*msShB?)soOu-Q%pgg>KIvx=us70c1Pid5h?6
zYBV*&I@u><HcGd;d^7eBEbg8vC&UyA+k(^y=1Mr!pj*}JXfIv5v<fe0vQ)}O>Bws0
zxSnW=Li#i{beQFPoou?V|K82JCWYu2O?jcEqweJsGX4@7n}HW;O`5Y}QpIF(hYZzd
z(9xK@+zLHPRD&9-`}8<9RG;(htyKwKnDLd3J=)T}(B8cPpLF@;%&OZDNUk3!6LZ4i
zuw$`8S{cwIqlV*GaIO)nc0D>zIUymFlNJdt{3kshdC(2KU;_X7AQuFI&n@+%!5HTK
LU=dHo^T+-RR{Y??

literal 0
HcmV?d00001

diff --git a/summa/__pycache__/pagerank_weighted.cpython-38.pyc b/summa/__pycache__/pagerank_weighted.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e18dc20e1c74880802619e2c9f041e68684c9e0b
GIT binary patch
literal 2782
zcmZ`*O>Y~=8J?NlC70BPX@#{#+jW(|Xv-E6gC<Qcf?Ysz5)_8w*ohJ>8W5|U5w+HG
zNzD#zD;NR+WFX2R=wA?zy7Uhe=&3)Ur#-d>+8~#nd+PHJMcT63UCcY5JM(@#?=$<^
z(o&t_`IG%!@>jI}PMudD51rql6kk9{CV9sCJmQ?%A`^Wt@?6`?{C*Hs(DtLMyeg|Q
ze8HnouE?6Kqg9g&a`6R=>T*dgqh63#WCQi03?8%g8{cDh*5+kBDRj3V8=XE!&r@j+
zRiOGqbJTl#^VSc4d;iA0?SH=Oo*(Wdq}}WxJ8CF2R{RUXa>);bUX{YI6L!ic?3mwV
zV{XK$H|<+K5o1q!W8e7FKjvHP%Ri0->&xIbZ0uvaGF^x9ug6sxSh32+A;tn*ozUv7
zZ@H;_!@l8rLRMt;g_}F!G<VF8`M737E39|ITdYYsR8P3A?uiL+GF!XJa7uW%YeMVU
zYKz%$6<?X!0oQ-A6{kZ5-(zlr9X_y?msY^4_c%NJ$KiX{gZIL(ez_TE$ta7BD%Kvx
zJ@qKg57zbudM(b^M!8J2N{o_gJsl7Cw|npno`2ud9M$)d1ho<pvqzv2puCGx+=8&|
z%jTHF1_m2o?}YC#=pZC}%+C1s87~)<?5XZE#BATRMJd+bDfz*fC<AwF$)B9D|Hw{X
z$u#cBfllMRvlW}T^H2|-K)<3B=W$xB?<$=fbj-eb7HhRX7!@kcCAu(26{cg<bE9Ei
zrx^A7ac2l?se68|lIz2xvbNDvxq3d-cblZX58*SxzPfXH^+q;G;;gv4KL4a$FTG6V
zWsnb~DoS6bi7CT9J?I~(qoNFOT2Ga=ob22k=%VzXi6-=wUc6hBKid3wd*}Y65ASbp
z-tT_;(WCp1Kl=E=R=cj3u_Rhb^VFnq)_odhBUM&q+#jZSudEaa*%z&(2KJbAkarJ9
zac)v`6g6Is<)c3Q6E%}T{#0pLnW%1@?5FCfk|j?gpL|g+&6VqtZVeo)mq=VCEw5hk
zLWtqw4g?b+Yw(8f`63VbR4((|{EleyP}D_(SA<@}x`gmCdnoIuY@!q=5C&)g`i0@H
z_9T}q6Jy`{r{0A9;RCP)2YY}q#sO#6e^Qa24W$2_nDtEcyvMCL4JXJRWFKE;)@!os
zY+U)A{hEEk9<eW&sUcU8douja`@I-f$DyfP<kGP~X4JMF#=#-2owP`IGqt6LNCxNN
z&tsiC*QdQ4zQ>{v6RJ1RN4g3tKK%eF_A|9-5OfFXh4R|&D9vOymQUgY&ObWms&aKY
zKGcKVcsI=uwb@Jv7}GIDxs2VD{qDRUAeh4={U(kom-o^PfuGN{9x9}5-n*=BR5ie5
zUMNjOS6;!9i7JXNz%$BB(XKh1X-e6s@w%ST%4H4p&#}~L5nY)hV@ko150oMWo+{9?
zLy;2>Mc3zRrx~wgw@A|A(OcN9ptNH^Q0Rxe4lFI>eM_ux-NE<;o}s)U_&L`8Ga5ZG
zG>Sk0Z=SLVm>4($JC6e+Y~Wb>>v6^Uw$ftXGUJ`|FAxR(<){7$oAYmV{>1~}BET#d
z9bbkmMhxZ{8XSAz=^a~>o~!w3)mA3B;efoV4PDDat7fY=u`@Vt=kSi>Y+!Q_uMKPf
z{s(%2xM2H&dH;__VHgy7jYc&&LQ^KSg<e1vg|kFH6VYNa(pu%FOT07<@;n-P1*<nu
zUUFUge<OAMGU3fqBq;Xdp#p#?dio6#*GaTUtU{E*UN*qp?6|HH4~M|0CJSfe=IOz+
z$V&zpSQE_dHYzJ1)1KMaw`rT7khnsEHko4T9cmG#^iLt&g+pr@_=jGgNDv#{29B=r
z2JlqpE5fb4;A&!dLXl&kU!g%PG`;od&bRK?B36>zcown8zkr7r`_`LA8_^TnF7w;L
z)O>P8x#<2r&eK`b#*vR~&D#Ey(Q7j8BOk%oQRO^s7GBA+*YZaH3>$oiQjh}}Y+8hM
zb>YS@uqxfYWYgT96j|}oCj5Qe0^iZU3)$wiz^N^;F8()s{4X>tp%M1brQY_+ngZL<
z&7!8^y+Oml6h`Z4>mQRKzn8uP*u`|$Z)4^Yl!7oyp=t7K!i~})q-&c`tEFhs5y5&f
zj5TPA05TiO(mc+3j=A(NFs{i!{XU7CB-Tg}N4W4hpSUw#apD~FWw_BF$Wf;5l8FV4
R!<lfW@6~VAuZGRA{$C<soLT??

literal 0
HcmV?d00001

diff --git a/summa/__pycache__/summarizer.cpython-38.pyc b/summa/__pycache__/summarizer.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..cd4765386da06c2d5b308bf66204dbc8458e0226
GIT binary patch
literal 4305
zcmbVP&u`nv73K^{iBcR#cAUi7%_dAYMOWLcogcuq2g6M_*|bPociZd+3!wo)YbG*n
zij-$YPGV4(Iz{)=LxBQCdy0?wQ~C$=!fOx3p7*w=es3r#l9OP8r7$y(Gvw&ad*A!s
z;}@q-HyvER`_KLEl@-VNFWt;uHQd}r(|<=N9Klkj&pM1bf(tk0eYfLT*-QO?ty8zM
zp9cL#r)lLH<Wrpm$aN7scRHs<Lo_iiic?|%;~8;UEMjblGopoYNi2zFjLYJzSiyK!
zoD=6Uu81q*U2*9->zoszcu!o0bY84JaN1YDhGtHim42G{Ze0JGRd*K#aZjo^+l!va
zq_<<Fh;%m@Ji~(*x~Yt_$jGNA+I|*knHiaNrN+{!?O~FNsHft=4sKVK?B@qE8fHqy
z-JN(ll~I-psk`(qlgo!dc5>15A-b=e7tXPB*Lie*#HRAbh!?EjkNL=@anDsNSYw5I
z%=C(}Ye#(K6`o+@HMejJ@0c@Ze`P-~-d~)@jI91Vz<WOJf5eWMIxw}bAi=^8tl@1b
zQf*E@jtLL#7Mi|<u5cc$RGJm+m<eZ(KXmXfTrljw?FL(93@LhYY`!)x`P%i;qp6p$
zwYAVo=i4vW`%)zFy2w=$XX|%k6R&@!@?F_AdOgnKM6Yd2)!kb+JMu}a<W4@+FuuU;
z<6KE?ev+qRT@U;Hxaw7MDAn5FS=rd=$xJ>SsLgYvHyyxpxO4a+9A+vHMKcQOwT(3I
z#;M+1o4(ktm%epZ8SKQ`K3$%PdU#zY{UnW5VxE;jEJU^B#%Wsmw5Kl5MqRkv$mk$W
zlL;VvwE!(T%?V<n8#D4vMUhlCUu~^UV?v#xZjqKQyhZZ{7WFbZhb^!L9<XKBWC3e=
z?=ZEB**RxUz;=gT+C^7bu#K2u1R{b2fcM#m&)shVbi8o)xVpK2jo^3DLGNC407nly
z5YU)<2nhf@2AClE1VszKYx^q}TzpFr#@jX3!ch7MubwR3RAy?4CITT-Yf5#N9;?h%
zJ<F1k?OF>}R<WibtEltnE~1Ss5bzdj;q;uhc#}1`UGEZvCs!Q;sbMX!1z~IfoSwpN
z<Qkl>a4DD!H#j%hWaL}fwQ{ZSi`oT{8SL>1fxhsnnRgLFFL6rCAztb0L7EuCLO#q4
z`hGvpq9?f$T9HQT9drO0*Ei5^mdkc&N+?mD$=Bx0lf1Juvs1Nqrv~zh^o{X&^Q4go
z9c?sChIe3|fHygTWr2BI(dwLuCJ}E<1X_2D1EYW?(8CzFA>p8G3&BoDC|!gU<o6+~
zYt&iyyz)_qxu2nF>j$RjzU!i!TqIM3-a*s!WC5)N=;}GVLqr7|Gj&b4vopeb;RyfL
z%rVhVZEFthlaoB4jVTy<*zi$ngmdg-%U$-E|Bdf^4=R<xQXbJdJl6%gyyS{vs|+Rq
zd*WPWb0bZ(fj0)5oAd{Cgc^~|`w`fwADajiHB8McxlG@9O;0j{)sy^T^=wvp&S?0O
zsUKlgkuy(dqkfFJ+h|iYO4j^df1*C3-QV^XznJ|C(7d;sCt2I=oQ2_3+_jl<k`mPw
z+UW!8<{<$dowv{i?P_ZY=CMWF)H>$oa7h-TW?@1~5;z|H0GW}KH}`5PBy!pXArw4f
z`yin42*aw&cmW>vO}+=lA*I@G*_ilNmo@8jtv<xcNp)8vqWS89$mG)))pzl*4$vX0
zKsbwB-GV%)HRX1qU8{8mf?y-w1JPLRe+8NWf$Xvu{1`w7(b)XH>J<JeMcnxAi0ygm
zlfvc5v<L=wO0>N2f9j0*{zBnW_B|}ZUnU1K3*(TG7Uqw`=?Omyl{7<@={GHgPZF~e
z#?26y$xWPw#73c|u5g@`un;2i&aMnpevk+$*6fQF^ZAw)DxeYoN;lqydE>!AW&-Sg
zgXVloc4d=>F^Mf}nIiF1sWk-<Iis^w5o3}~7f$-;a+Q`RuSIki%e2+jzr`+9g77>r
zUceT;OV(dV5JWd~WJFOsB_r~0;q1a^L^L2E@2{w+3XZBLP#;&USg;lAP2nJgB1QZe
z>G6RB&*8Zv{-CO?{CQ%c6hX}fJRyYs1R(t(#5SwY$x5bA9yHiU<NmgYH@kT78#L$e
zRy2;hsGn;SCH+CJOpJsxwc<=|Gk*JX1cJLKpOtkb52Vtv?UkO!dt*XRGGh(X_Qu{)
zu}O0EG5)WtAz}B-&PmO#ey1ASczYb<%U*zDr3FZ1?tlOKCtd&k`BquS*1+w~y)LEc
z$wj;{^@zd^A_f`hi0fs8s`H5biQ)uNQPhsyQQdg9&UK1<RpX8V;}-$dxTZE9>qYP)
zpm-A=lK3ABPkq^g`GpU^HRzd9!!(O}(KsS+*KJ}sT>Bh`4hOM{`_f1i_J;_tZHZ|5
zB1^ic>%)ATD%bD>ZB+Z}_u^g<{B<1gt?c_0i&KI>e9u<CRP)Zdc#@NBG?q^LRM|{)
zl4*dVi+T&nmCc87I+UL(m8)_!ddmk*xe(2K&)8bByoi+c=5LxR=xt@!h$df8mJQp@
ziVJSj!7PM=6s6ygzOZb9xQT^w1q_=2WLca>+T;Tp^A>%!Y%b4C_8imjN&GdYwiu9&
zs4jG<5Ox>bW%PmDVhiY-uKGPz<`|l=<)Ym|)07wpU#N6AmBHgOgpaZ0Rxu7#5Z}7<
zQBCCrZS-AfTz!g%-s<CjU~$IB^ub}jm)_*#wyW;bUcaV}3LHeNtq`cPePzAZj5Xbq
zKFr4ntjS?0uKqx~)D#(GOA-IwyNAy?E443_9?F^>TfUY{Q;$r(kri1|5!;p*2TCG2
z;JX;m>#b2)Wnm_jqGV{bF6&h#s}(U|bq0!i^&9HwG`4)V1*!_^mP!JPDa%flJ*+#e
q(ldvg)}Cds(a*&&m7B!2_VbmhUi)){7Jg>}F9?E0a4uMEUi&|miOf>~

literal 0
HcmV?d00001

diff --git a/summa/__pycache__/syntactic_unit.cpython-38.pyc b/summa/__pycache__/syntactic_unit.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a76bdaef585b055a5bcc23b7486a039a82a723a4
GIT binary patch
literal 923
zcmZ`%OK%e~5cXp?*@gzF1dWh-$cbo?$Vwa$LX|*5;;<l9z2suKYq#Azyv24z8||&(
z$MnpRf8r~rp1Jh`GaIEeMa*huXFOwnGoO9f?RE&rkNRWrof7gBgS!$zun+2<fcZca
zQFIBa$t53A#STcy3&?B;hiD(vJqM!+)R7TWiDHT`>4>XH#Y-}ZR7<ttA1ex{Y=!Q@
zh4HfRWpQXqpBIR{7WDDdccAVSn3~o^keY#|HMe^;$Iia7XT&f?V3jG%@y14qV4wLl
zgPqsELDD!xUZ49A`)Z<1;9j1DxHL+ihu9UB)oBzWr>Dmjp>2$&1?Gxa<m*OeGp$OQ
zsmhkpWbdVy*=Jjgb>UqmjV#^pNZVqP`BVK>T76p0ot8#HcnqgIpCP=JW|F!2Y$h|e
z9$w60`r+9{NQ5ZC6#@r$%`MRue7m5F-3_va>(T<Q@K(^ri5t@Pz)?WjnDmHZpu7h(
zjVX9h59x{X#p6%5JSmNw_OXZ8{lTI$=)ZW`eEr1^%vWgVv|6Wc=pbc(3A&<o3s4$@
zR^|bOp)G{-RtTKHVLyZ}p5AZ2srY~Q3GDLF?<GDJnx`~y*kK>S@_jg<&FVA2>q2sk
z_YYmGl9c6n+Crv{oM|CKM~GRa=2P^yh4?a;(^bZ9Bh(`_JDZ?5mc>CW@{Q{7p(L!+
gPI$AHr!2IpBYYc8e5d2v%^`WS0)C4p`~oF^0O&%_@c;k-

literal 0
HcmV?d00001

diff --git a/summa/__pycache__/textrank.cpython-38.pyc b/summa/__pycache__/textrank.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f2f48c92868eb1bebf6ed2e3d4f602bf5aea166d
GIT binary patch
literal 3471
zcmaJ^&2JmW6`$E%l1qw`CB@%$oK4cEW-C(i!6;xDvW-Ya6BM!|DX_!9Al94_xzci%
zo*l~4hPqTP(2EeDxAr8Vr~C&2dhMnE!kh~9lv~faG`}|#8A)xr#Lm9?cr)|9e(&+~
z>FEl?^S9xr?Qdon`xkZ2J{~%c@MeERBbeYZYmUCW$!*_hI`-=}UEv5f_Iib8fiuuv
zT<jH_MbNBS5>C?>g=SfnWLa>*w>(i4rMJ9U5xywDWz8v35mWe9MO94WJ1u6!C46V(
zB{93>sJC+FkYO*r!D@5g;WljTJC3%-O*tF%dZCJPiH^S`_ur&SWSzhKnVx?<Y!_(t
z=|gl8@5gwv1vEph8MeQ!`H&5rp*!>hKk_!1cFy+&!@<rPPPiNj5$PyR!nl>`v~N$V
zxyIjEUE5q;`*>Bk*jJJ2Y6atIxu=`e$rW3#_Dw;B;5P-!spv}IFhz#={>#Ol6j8V+
zQWb^C;!3E)#b+wrmTjFahDjJ@jaO2&cNTS5z6q7=rh`m|i9mNNRWj3ybesy4oke@%
zq}%B48-F}Ei=#5q0^>fPbyO8po0K_z91wILtEFck@&(O?Q2wFw_4NZj<j4GkJ!cNP
z$3A20?606VEcF6Za@&LUb}H&q4uf%~-D6KUC;a#P3zj%9;11BFyJon`KicfdU@MAc
z&{yehB%}!1=^z$ClIq}<4AQ<#q-c;Di8P*)p)lpuy|(O=1&kl6PCrzcG;@ztXV8<0
z-rVoYRi#o@b4;1`ZY5z)8jnb;<WVyPsm!$8i!vQ0ofger!%$}9fjhj)13tsm6^xxz
z4XKGNHgO<Q%ppIp>e*r0b*RbN@zfp7j}Z^Y?vR~u5`IWxdutu=+UJz`_rv_=R|i($
zC<$P>jtstBYAh`TcN<GDYn~}=#c8P39GKKR8dHR&d!dFW^1X`!&5{qOsI8@FS-$tc
zk@<8WnHoQJ1N1qG$Xm;Jtwi5~&}<t)vF#vEA;FPL2H$ptCkjXWz!^H*Hg<J^BKOGS
z2pf!4L<u8PXGRdmc5DV?Km_dJ3B<CP(?wm<e#a5>;vI4MsB}7aMO+ovbXix#b#dbe
zxM4YNil2#Fx+vZ~v&(Jqo(RPI;sbj+Kb|5TL%ScYSy#?qu2KxNb%3x4dTFKuC1bf8
zCOXLAAu?&pEJ#%_hKeBF3c{ch?aCye7zqX$e8O^XqPvc83c|R9nAF`~1EGFtWCAT!
zs~v|~W<2<B$4m*CwN+$YbCa-0$=8EONs&+0>k|aXtNl=S7j?QwPM6o#2cuIdY6G1H
zlZEyIQPLkk5L|-}F7>1icS8kOftwqzE8~7%sn^Gn<o7P_21cuj@rqzerM-(g=f!%R
zj>w(5&X@13$i9;85KdGJNCD?>6>SA$Y4#DJ6o)~SL9;PX3bK|bv)C@n3tufQ+&xef
zH~FLUTSEj={UC|XA8~(Trr^#}jVwh-8Qi&Bs}<C3Y^2DoO{v{YBbd^Zgxm@Tu~q>c
zQmWT2zT$MHrfDR6LLn3+gC4*WTQ1yv2@PNbd{F{lAU}y^($QVI5A{Yy*lz@%=wD`V
z4%oM!Dl6;h3V1^&LJqLDsd+!%OMQSdCs(l;kE^`Niyw~YaBXxCm^zMnB>GREzIkS9
zqUorU0P95nLlCRg&1ffU2HnW7*6aT(N#Pn{J{JS>7SgSSN$$$kDjXY{2?GKTcs@jI
zr^u<W1JIQm0W+tWj?4jlz=g>z!V!GG=|=5t&<(Ty!v}biyHf2#>O|I@k;2GgwW>uo
zGc$(pXN2g=ygTl%jLe(;7T*5Ef*&H!+=7U$-arq+INoo`y>^VS-IcA5N(X&YrCDSL
zn-eUI8#hX{qO@nc=TFyHEE1aOmDMMYKmXlkYyI)&Cr?eqW)ot}5N;lQP0@}iQ?NT<
zfL!$}Z1@r0YzYnH^G?;NaOXe8QR`NTb}!qJ8Qa>C(^<>iCdTnYxn+XJ|3pVK%`FzQ
zKR7pRrV;D_wHJ_>ZM$lL+5<;>2RvB=T~N!^Vx)+X2cS!;ul?h)aCGIw0jjd?Da%^|
zuA_F_@l~#?$J4^4ER|F&?GcSLkiE8N)Zg``?b+iwqEEQ`R?ic4$iKdPfSe$TZ`sgW
zp_{qSPYZ$<EUD6(<99^q1mpU$zJlTkIdJ<b-fKhmuyDw!G6ZV-Yl`ZP{3$v61Ut?k
zi<rPsBX}0eFp~gi8OV{-jyX{Nk!zm8!e7`j;68He2-VMF>$u=(<O`?EesUv1w=t6c
zSLP&pZQ#v*j;0IkzPx1%BzD4%p}`Zr!e086HUH$kzWN%4WsQH&7f_oNDAxM;^y5gw
z*PO<Z;j!V_1qU<b2bt6e$NuscbPgr+B1*4Wy!o|W7$uDdaoP^!Y`HO6sTx>SyKddf
zxY<536kK_CRGCr;*z#1}0o5#>Vx93(Q6>m~EGEj#kMjT!{`qK1m56szi`d%LI4LDS
z0D>vndMZ=+E5Q`SgDLa@DB7lFMK@RE+d%PO)1=o+g&b{3YBYT7qU;{Y_YiX#2}IH5
zanC98v%f0!9G@fZFk3{`R^>Mlh3XMFjNfX}9<5ewc69Ii7*+SE`3=oiCh1oFShri0
zu-Z{IT1f@6#Sn`bR;!cUsk&^n&Rm=-NM*}Q{{aLX#B!OEUq*?`M-_&jQ{`2cB=Ow&
OlJEN^|Azm*&wl_&d!|$X

literal 0
HcmV?d00001

diff --git a/summa/commons.py b/summa/commons.py
new file mode 100644
index 0000000..ac7f939
--- /dev/null
+++ b/summa/commons.py
@@ -0,0 +1,15 @@
+from .graph import Graph
+
+
+def build_graph(sequence):
+    graph = Graph()
+    for item in sequence:
+        if not graph.has_node(item):
+            graph.add_node(item)
+    return graph
+
+
+def remove_unreachable_nodes(graph):
+    for node in graph.nodes():
+        if sum(graph.edge_weight((node, other)) for other in graph.neighbors(node)) == 0:
+            graph.del_node(node)
diff --git a/summa/exception/__init__.py b/summa/exception/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/summa/exception/textrank_runtime_error.py b/summa/exception/textrank_runtime_error.py
new file mode 100644
index 0000000..eee2584
--- /dev/null
+++ b/summa/exception/textrank_runtime_error.py
@@ -0,0 +1,2 @@
+class TextrankRuntimeError(RuntimeError):
+    pass
\ No newline at end of file
diff --git a/summa/graph.py b/summa/graph.py
new file mode 100644
index 0000000..48075ec
--- /dev/null
+++ b/summa/graph.py
@@ -0,0 +1,244 @@
+from abc import ABCMeta, abstractmethod
+
+
+class IGraph(metaclass=ABCMeta):
+    """
+    Represents the interface or contract that the graph for TextRank should implement
+    """
+
+    @abstractmethod
+    def nodes(self):
+        """
+        Return node list.
+
+        @rtype:  list
+        @return: Node list.
+        """
+        pass
+
+
+    @abstractmethod
+    def edges(self):
+        """
+        Return all edges in the graph.
+
+        @rtype:  list
+        @return: List of all edges in the graph.
+        """
+        pass
+
+    @abstractmethod
+    def neighbors(self, node):
+        """
+        Return all nodes that are directly accessible from given node.
+
+        @type  node: node
+        @param node: Node identifier
+
+        @rtype:  list
+        @return: List of nodes directly accessible from given node.
+        """
+        pass
+
+
+    @abstractmethod
+    def has_node(self, node):
+        """
+        Return whether the requested node exists.
+
+        @type  node: node
+        @param node: Node identifier
+
+        @rtype:  boolean
+        @return: Truth-value for node existence.
+        """
+        pass
+
+
+    @abstractmethod
+    def add_node(self, node, attrs=None):
+        """
+        Add given node to the graph.
+
+        @attention: While nodes can be of any type, it's strongly recommended to use only
+        numbers and single-line strings as node identifiers if you intend to use write().
+
+        @type  node: node
+        @param node: Node identifier.
+
+        @type  attrs: list
+        @param attrs: List of node attributes specified as (attribute, value) tuples.
+        """
+        pass
+
+
+    @abstractmethod
+    def add_edge(self, edge, wt=1, label='', attrs=[]):
+        """
+        Add an edge to the graph connecting two nodes.
+
+        An edge, here, is a pair of nodes like C{(n, m)}.
+
+        @type  edge: tuple
+        @param edge: Edge.
+
+        @type  wt: number
+        @param wt: Edge weight.
+
+        @type  label: string
+        @param label: Edge label.
+
+        @type  attrs: list
+        @param attrs: List of node attributes specified as (attribute, value) tuples.
+        """
+        pass
+
+
+    @abstractmethod
+    def has_edge(self, edge):
+        """
+        Return whether an edge exists.
+
+        @type  edge: tuple
+        @param edge: Edge.
+
+        @rtype:  boolean
+        @return: Truth-value for edge existence.
+        """
+        pass
+
+
+    @abstractmethod
+    def edge_weight(self, edge):
+        """
+        Get the weight of an edge.
+
+        @type  edge: edge
+        @param edge: One edge.
+
+        @rtype:  number
+        @return: Edge weight.
+        """
+        pass
+
+
+    @abstractmethod
+    def del_node(self, node):
+        """
+        Remove a node from the graph.
+
+        @type  node: node
+        @param node: Node identifier.
+        """
+        pass
+
+
+class Graph(IGraph):
+    """
+    Implementation of an undirected graph, based on Pygraph
+    """
+
+    WEIGHT_ATTRIBUTE_NAME = "weight"
+    DEFAULT_WEIGHT = 0
+
+    LABEL_ATTRIBUTE_NAME = "label"
+    DEFAULT_LABEL = ""
+
+    def __init__(self):
+        # Metadata about edges
+        self.edge_properties = {}    # Mapping: Edge -> Dict mapping, lablel-> str, wt->num
+        self.edge_attr = {}          # Key value pairs: (Edge -> Attributes)
+        # Metadata about nodes
+        self.node_attr = {}          # Pairing: Node -> Attributes
+        self.node_neighbors = {}     # Pairing: Node -> Neighbors
+
+    def has_edge(self, edge):
+        u,v = edge
+        return (u,v) in self.edge_properties and (v,u) in self.edge_properties
+
+    def edge_weight(self, edge):
+        return self.get_edge_properties( edge ).setdefault( self.WEIGHT_ATTRIBUTE_NAME, self.DEFAULT_WEIGHT )
+
+    def neighbors(self, node):
+        return self.node_neighbors[node]
+
+    def has_node(self, node):
+        return node in self.node_neighbors
+
+    def add_edge(self, edge, wt=1, label='', attrs=[]):
+        u, v = edge
+        if (v not in self.node_neighbors[u] and u not in self.node_neighbors[v]):
+            self.node_neighbors[u].append(v)
+            if (u != v):
+                self.node_neighbors[v].append(u)
+
+            self.add_edge_attributes((u,v), attrs)
+            self.set_edge_properties((u, v), label=label, weight=wt)
+        else:
+            raise ValueError("Edge (%s, %s) already in graph" % (u, v))
+
+    def add_node(self, node, attrs=None):
+        if attrs is None:
+            attrs = []
+        if (not node in self.node_neighbors):
+            self.node_neighbors[node] = []
+            self.node_attr[node] = attrs
+        else:
+            raise ValueError("Node %s already in graph" % node)
+
+    def nodes(self):
+        return list(self.node_neighbors.keys())
+
+    def edges(self):
+        return [ a for a in list(self.edge_properties.keys()) ]
+
+    def del_node(self, node):
+        for each in list(self.neighbors(node)):
+            if (each != node):
+                self.del_edge((each, node))
+        del(self.node_neighbors[node])
+        del(self.node_attr[node])
+
+    # Helper methods
+    def get_edge_properties(self, edge):
+        return self.edge_properties.setdefault( edge, {} )
+
+    def add_edge_attributes(self, edge, attrs):
+        for attr in attrs:
+            self.add_edge_attribute(edge, attr)
+
+    def add_edge_attribute(self, edge, attr):
+        self.edge_attr[edge] = self.edge_attributes(edge) + [attr]
+
+        if (edge[0] != edge[1]):
+            self.edge_attr[(edge[1],edge[0])] = self.edge_attributes((edge[1], edge[0])) + [attr]
+
+    def edge_attributes(self, edge):
+        try:
+            return self.edge_attr[edge]
+        except KeyError:
+            return []
+
+    def set_edge_properties(self, edge, **properties ):
+        self.edge_properties.setdefault( edge, {} ).update( properties )
+        if (edge[0] != edge[1]):
+            self.edge_properties.setdefault((edge[1], edge[0]), {}).update( properties )
+
+    def del_edge(self, edge):
+        u, v = edge
+        self.node_neighbors[u].remove(v)
+        self.del_edge_labeling((u, v))
+        if (u != v):
+            self.node_neighbors[v].remove(u)
+            self.del_edge_labeling((v, u)) # TODO: This is redundant
+
+    def del_edge_labeling( self, edge ):
+        keys = [edge]
+        keys.append(edge[::-1])
+
+        for key in keys:
+            for mapping in [self.edge_properties, self.edge_attr ]:
+                try:
+                    del ( mapping[key] )
+                except KeyError:
+                    pass
\ No newline at end of file
diff --git a/summa/keywords.py b/summa/keywords.py
new file mode 100644
index 0000000..8505770
--- /dev/null
+++ b/summa/keywords.py
@@ -0,0 +1,227 @@
+from itertools import combinations as _combinations
+from queue import Queue
+
+from .pagerank_weighted import pagerank_weighted_scipy as _pagerank
+from .preprocessing.textcleaner import clean_text_by_word as _clean_text_by_word
+from .preprocessing.textcleaner import tokenize_by_word as _tokenize_by_word
+from .commons import build_graph as _build_graph
+from .commons import remove_unreachable_nodes as _remove_unreachable_nodes
+
+WINDOW_SIZE = 2
+
+"""Check tags in http://www.clips.ua.ac.be/pages/mbsp-tags and use only first two letters
+Example: filter for nouns and adjectives:
+INCLUDING_FILTER = ['NN', 'JJ']"""
+INCLUDING_FILTER = ['NN', 'JJ']
+EXCLUDING_FILTER = []
+
+
+def _get_pos_filters():
+    return frozenset(INCLUDING_FILTER), frozenset(EXCLUDING_FILTER)
+
+
+def _get_words_for_graph(tokens):
+    include_filters, exclude_filters = _get_pos_filters()
+    if include_filters and exclude_filters:
+        raise ValueError("Can't use both include and exclude filters, should use only one")
+
+    result = []
+    for word, unit in tokens.items():
+        if exclude_filters and unit.tag in exclude_filters:
+            continue
+        if (include_filters and unit.tag in include_filters) or not include_filters or not unit.tag:
+            result.append(unit.token)
+    return result
+
+
+def _get_first_window(split_text):
+    return split_text[:WINDOW_SIZE]
+
+
+def _set_graph_edge(graph, tokens, word_a, word_b):
+    if word_a in tokens and word_b in tokens:
+        lemma_a = tokens[word_a].token
+        lemma_b = tokens[word_b].token
+        edge = (lemma_a, lemma_b)
+
+        if graph.has_node(lemma_a) and graph.has_node(lemma_b) and not graph.has_edge(edge):
+            graph.add_edge(edge)
+
+
+def _process_first_window(graph, tokens, split_text):
+    first_window = _get_first_window(split_text)
+    for word_a, word_b in _combinations(first_window, 2):
+        _set_graph_edge(graph, tokens, word_a, word_b)
+
+
+def _init_queue(split_text):
+    queue = Queue()
+    first_window = _get_first_window(split_text)
+    for word in first_window[1:]:
+        queue.put(word)
+    return queue
+
+
+def _process_word(graph, tokens, queue, word):
+    for word_to_compare in _queue_iterator(queue):
+        _set_graph_edge(graph, tokens, word, word_to_compare)
+
+
+def _update_queue(queue, word):
+    queue.get()
+    queue.put(word)
+    assert queue.qsize() == (WINDOW_SIZE - 1)
+
+
+def _process_text(graph, tokens, split_text):
+    queue = _init_queue(split_text)
+    for i in range(WINDOW_SIZE, len(split_text)):
+        word = split_text[i]
+        _process_word(graph, tokens, queue, word)
+        _update_queue(queue, word)
+
+
+def _queue_iterator(queue):
+    iterations = queue.qsize()
+    for i in range(iterations):
+        var = queue.get()
+        yield var
+        queue.put(var)
+
+
+def _set_graph_edges(graph, tokens, split_text):
+    _process_first_window(graph, tokens, split_text)
+    _process_text(graph, tokens, split_text)
+
+
+def _extract_tokens(lemmas, scores, ratio, words):
+    lemmas.sort(key=lambda s: scores[s], reverse=True)
+
+    # If no "words" option is selected, the number of sentences is
+    # reduced by the provided ratio, else, the ratio is ignored.
+    length = len(lemmas) * ratio if words is None else words
+    return [(scores[lemmas[i]], lemmas[i],) for i in range(int(length))]
+
+
+def _lemmas_to_words(tokens):
+    lemma_to_word = {}
+    for word, unit in tokens.items():
+        lemma = unit.token
+        if lemma in lemma_to_word:
+            lemma_to_word[lemma].append(word)
+        else:
+            lemma_to_word[lemma] = [word]
+    return lemma_to_word
+
+
+def _get_keywords_with_score(extracted_lemmas, lemma_to_word):
+    """
+    :param extracted_lemmas:list of tuples
+    :param lemma_to_word: dict of {lemma:list of words}
+    :return: dict of {keyword:score}
+    """
+    keywords = {}
+    for score, lemma in extracted_lemmas:
+        keyword_list = lemma_to_word[lemma]
+        for keyword in keyword_list:
+            keywords[keyword] = score
+    return keywords
+
+
+def _strip_word(word):
+    stripped_word_list = list(_tokenize_by_word(word))
+    return stripped_word_list[0] if stripped_word_list else ""
+
+
+def _get_combined_keywords(_keywords, split_text):
+    """
+    :param keywords:dict of keywords:scores
+    :param split_text: list of strings
+    :return: combined_keywords:list
+    """
+    result = []
+    _keywords = _keywords.copy()
+    len_text = len(split_text)
+    for i in range(len_text):
+        word = _strip_word(split_text[i])
+        if word in _keywords:
+            combined_word = [word]
+            if i + 1 == len_text:
+                result.append(word)   # appends last word if keyword and doesn't iterate
+            for j in range(i + 1, len_text):
+                other_word = _strip_word(split_text[j])
+                if other_word in _keywords and other_word == split_text[j] \
+                        and other_word not in combined_word:
+                    combined_word.append(other_word)
+                else:
+                    for keyword in combined_word:
+                        _keywords.pop(keyword)
+                    result.append(" ".join(combined_word))
+                    break
+    return result
+
+
+def _get_average_score(concept, _keywords):
+    word_list = concept.split()
+    word_counter = 0
+    total = 0
+    for word in word_list:
+        total += _keywords[word]
+        word_counter += 1
+    return total / word_counter
+
+
+def _format_results(_keywords, combined_keywords, split, scores):
+    """
+    :param keywords:dict of keywords:scores
+    :param combined_keywords:list of word/s
+    """
+    combined_keywords.sort(key=lambda w: _get_average_score(w, _keywords), reverse=True)
+    if scores:
+        return [(word, _get_average_score(word, _keywords)) for word in combined_keywords]
+    if split:
+        return combined_keywords
+    return "\n".join(combined_keywords)
+
+
+def keywords(text, ratio=0.2, words=None, language="english", split=False, scores=False, deaccent=False, additional_stopwords=None):
+    if not isinstance(text, str):
+        raise ValueError("Text parameter must be a Unicode object (str)!")
+
+    # Gets a dict of word -> lemma
+    tokens = _clean_text_by_word(text, language, deacc=deaccent, additional_stopwords=additional_stopwords)
+    split_text = list(_tokenize_by_word(text))
+
+    # Creates the graph and adds the edges
+    graph = _build_graph(_get_words_for_graph(tokens))
+    _set_graph_edges(graph, tokens, split_text)
+    del split_text # It's no longer used
+
+    _remove_unreachable_nodes(graph)
+
+    # PageRank cannot be run in an empty graph.
+    if len(graph.nodes()) == 0:
+        return [] if split else ""
+
+    # Ranks the tokens using the PageRank algorithm. Returns dict of lemma -> score
+    pagerank_scores = _pagerank(graph)
+
+    extracted_lemmas = _extract_tokens(graph.nodes(), pagerank_scores, ratio, words)
+
+    lemmas_to_word = _lemmas_to_words(tokens)
+    keywords = _get_keywords_with_score(extracted_lemmas, lemmas_to_word)
+
+    # text.split() to keep numbers and punctuation marks, so separeted concepts are not combined
+    combined_keywords = _get_combined_keywords(keywords, text.split())
+
+    return _format_results(keywords, combined_keywords, split, scores)
+
+
+def get_graph(text, language="english", deaccent=False):
+    tokens = _clean_text_by_word(text, language, deacc=deaccent)
+    split_text = list(_tokenize_by_word(text, deacc=deaccent))
+
+    graph = _build_graph(_get_words_for_graph(tokens))
+    _set_graph_edges(graph, tokens, split_text)
+
+    return graph
diff --git a/summa/pagerank_weighted.py b/summa/pagerank_weighted.py
new file mode 100644
index 0000000..e0bb90c
--- /dev/null
+++ b/summa/pagerank_weighted.py
@@ -0,0 +1,86 @@
+from scipy.sparse import csr_matrix
+from scipy.linalg import eig
+from numpy import empty as empty_matrix
+
+CONVERGENCE_THRESHOLD = 0.0001
+
+
+def pagerank_weighted(graph, initial_value=None, damping=0.85):
+    """Calculates PageRank for an undirected graph"""
+    if initial_value == None: initial_value = 1.0 / len(graph.nodes())
+    scores = dict.fromkeys(graph.nodes(), initial_value)
+
+    iteration_quantity = 0
+    for iteration_number in range(100):
+        iteration_quantity += 1
+        convergence_achieved = 0
+        for i in graph.nodes():
+            rank = 1 - damping
+            for j in graph.neighbors(i):
+                neighbors_sum = sum(graph.edge_weight((j, k)) for k in graph.neighbors(j))
+                rank += damping * scores[j] * graph.edge_weight((j, i)) / neighbors_sum
+
+            if abs(scores[i] - rank) <= CONVERGENCE_THRESHOLD:
+                convergence_achieved += 1
+
+            scores[i] = rank
+
+        if convergence_achieved == len(graph.nodes()):
+            break
+
+    return scores
+
+
+def pagerank_weighted_scipy(graph, damping=0.85):
+    adjacency_matrix = build_adjacency_matrix(graph)
+    probability_matrix = build_probability_matrix(graph)
+
+    # Suppress deprecation warnings from numpy.
+    # See https://github.com/summanlp/textrank/issues/57
+    import warnings
+    with warnings.catch_warnings():
+        from numpy import VisibleDeprecationWarning
+        warnings.filterwarnings("ignore", category=VisibleDeprecationWarning)
+        warnings.filterwarnings("ignore", category=PendingDeprecationWarning)
+        pagerank_matrix = damping * adjacency_matrix.todense() + (1 - damping) * probability_matrix
+
+    vals, vecs = eig(pagerank_matrix, left=True, right=False)
+    return process_results(graph, vecs)
+
+
+def build_adjacency_matrix(graph):
+    row = []
+    col = []
+    data = []
+    nodes = graph.nodes()
+    length = len(nodes)
+
+    for i in range(length):
+        current_node = nodes[i]
+        neighbors_sum = sum(graph.edge_weight((current_node, neighbor)) for neighbor in graph.neighbors(current_node))
+        for j in range(length):
+            edge_weight = float(graph.edge_weight((current_node, nodes[j])))
+            if i != j and edge_weight != 0:
+                row.append(i)
+                col.append(j)
+                data.append(edge_weight / neighbors_sum)
+
+    return csr_matrix((data,(row,col)), shape=(length,length))
+
+
+def build_probability_matrix(graph):
+    dimension = len(graph.nodes())
+    matrix = empty_matrix((dimension,dimension))
+
+    probability = 1 / float(dimension)
+    matrix.fill(probability)
+
+    return matrix
+
+
+def process_results(graph, vecs):
+    scores = {}
+    for i, node in enumerate(graph.nodes()):
+        scores[node] = abs(vecs[i][0])
+
+    return scores
diff --git a/summa/preprocessing/__init__.py b/summa/preprocessing/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/summa/preprocessing/__pycache__/__init__.cpython-38.pyc b/summa/preprocessing/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5ccc443ca5a3ed79ef6745a7b2b1126122e94769
GIT binary patch
literal 196
zcmYk0F$w}P6hyONA;KQSX8#35EUYX<5bN+wz8cpg%l}za@Is!!o7j2;J1dh)2j&ek
z)oki|q_Q5jg(>+e_*a$7EL#Q{R;|^zTHY0x`G=1KPu3yW$PNR{F(Irt9?2vD4CtiU
z5}Ov%fzHTuh`kU7o9iwzNrFU|#28wT-g|_OspDu!#DzU*?Se~MH{JC)ocL(_6HUv7
HceeTh)=V}v

literal 0
HcmV?d00001

diff --git a/summa/preprocessing/__pycache__/porter.cpython-38.pyc b/summa/preprocessing/__pycache__/porter.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8abbbde8cb3e080daec97e050b01ac7837f9c0b2
GIT binary patch
literal 13383
zcma)CTWlQHd7j%YSF068-7m47V_G)3GIvQzaxRKYSh1|wPD;2ET|06VX1Hg#9PRE5
zd1jV03Ca(#8w9C~x^7SeMi59q9%`TuLGzXtMbU>o6h#muK~M~eplE?21d5^viU3K|
z%lH3hc6MjEOuOWqIp@sz|8x8A=e&C4NZG*e&v##LJob6R_-85%{%jPU!xNoGB8@x7
zXAEgd>m5T{Z(DavY2P-gj{Dt$VU%A9RqQEoJN7yqPnF9{Yknl~AFp{L{T9(TKWGZK
z-3*l<uXSd`I)O<MhvJp(cr6UXstP*<RNnMd<fEbyN;K%M3qKIo7jM5Lo);_IjMnV}
zLe<4fu~-e;?Ql!?mSHciD=+c_!tV#=bc8eFx9Vc@de9#PfR%{4tE>K-B8rv2zK${0
zC&eo&Y$~@S=C9N*i|P4KeqyeARm@+WtIc2jc)eT>gZ8$N{_3iyXihwCKNfDxV_OgX
zAdW=GRV{R1tKE3vmFu^bUcL1~ttQ-nY8GF3tyU|qco;XZ-NYDN(Fv6&R$VO3YPT(V
zD-f;@u3|R584-D2xh(Koi{tgHv$I=UTlLsg`0qybP&H@YLjSQJ%<2tD_hz<S{+vsD
z<x1(tSmNj^V|lq7c;eFB+#Hvduy0fCDUo(Q)ef8fl&CGBGHgx>)opvUz3bgrY+<}S
zLfT-ADn!AJX<!ldC&r?Bok+C(7BDuwa7s*JfAR9-hS&;KEB)lE#+u8Y*Vp{E{*ILa
z8m6V*7Q5K1X48vk<TScZyGm_~*Zg+N526`CTTYnJYgc0Pu_1)w)4n3oTXQ==NfkG}
z71ebCna!9ewXxX{F>o_Y1Ys<s3Yi|nsvo#*QS(F)Zi#jnwj$xK(kcQ%?>S*z@tS@R
z5ItaIF3nEzg1E|?5#A;ezkvkXJR`c1Ct9Ajo}mUM2Ut2kB1}Ar1}(80Y=a4cC=A>n
z27;i`?n>%nYeu}u#I#-4OXR&7djT`emdi-el>!BVWu9MC!1e;T6R7mU)<9;UV*IeN
zRG^NP2kMY|Q@AqlqNvwkhB~D2)oA@)f+kSjjA2mSLgS`9ulq^}VgW0I7}v!m8aOsg
zY<VF3+-P6)l^b;2ZSk44Fj~Wst%^l&OWg3<YrY=*OTnhAe5?tmP=yW8qowXPbjEv=
z-gR-k4RZVqh=J;Q$_CipP6&qf8!nAD(u3-_(j)PNWe?T5r?57>N7R;iAc(_q(2iSm
zfzH}Ag0&$$*!-R#U{;v~^M7vXv$fhJV2IBMJd=0KaM-9Gei9;(C3y5U5}6XXkf5h_
zh#`UMNu+>o+)dYSyDMN#7Dv}oxxmAV!mes~kabP39(eIWxdEA;`(@JpeN=k}<hyal
zlI9&-TGD>UxZ_Ai7Vs{}q8!7!C`<AX-ea;X593{u6?p{jLvmam#k(vg<T1Pt%j5C{
z-W7RLp2GWxJT1@QJuc76b9f(>7vv-I(Ra){6Y??nQPdohLOzc7aXBfU!25*!2sF);
z$&s8ObJtBM9-`js%xNrWK;B}-jj&b_542VXDj!Wn1fI6}f2#)Wli_}|il8Rf#VW`a
zvIhb_PhM;8rU!xH1=wIsV|u7l5lz5va4(s(2dUili1ffGypyaLwRrAR3#l?vA`I&X
zD0Y~b)439{rR$L@1IWvbt`DIJ%n)N>Xb^h~Qbh3ZNFPGd#M|p$RA;0Ota<ixkUN6s
zc|6fuNXR;9f?ArUbnq1L6!DD75^4|0GSb7cg7gS#$MGD+GlA#WH!Jr|-`F*lja~DG
zarcq?)~>m0oimUhzi;nayT(0Z*S=<KEN#3Pn?zAWP4aZJ@U{skOB$e%GmD8GwYHN&
z)bhP3DadV%J}I>IE9RFIcw132rp0uW6m=#U3%k&iO;qSiQfjy?$<a2=CC*y7<t1aO
zv`LCu$RuN_)TvgKg*8ZQ>3fOYrX=<~HHJEMh?2vU(0ZwHN{&)8LCG;BiRmV$mzaKH
zhKbp&+KICshVA4KSa}B<;>AgE6aBZNs*^a8*IrGI`bv4tZrfGMw9`>yx4iAd-gMg^
z8tQT2{8e+d<4ND0m9V$mVD^R^yR)!s?s<(knso!$kLoL)YP4oSS6h%%Yaz4}gb9kk
z2t_y>L#aR#w`QYmr{m7Tq~hsE5f*E9osGWw`gT%UUiJe&US95^+voxk<5baf(!Zif
ze~!)NpBIl6&5~I$$E}^yni^ALtoM*KNEG#cl!t|>iR?LKG+_`yw2JDuG;gOh(rQ`i
z+Ahcs?~Bqwz45@L*0ONccxY8XB2-%~?o5+Ho%XAwUzcvZ`hvLj?1ET%!@tW4x36yN
zHxqMv4}BU-dqntEOFe>DQdlMgRa-quC5QC9I)f~)x=tpkIECaPo`^Qwu*R_-70lhS
zj+;9thUQKfa}%dY6NfQ(eSnNK@7s8sU1!&7IqKvt>~BnwrtdtoqREY!T}N7HjQh?;
zsh3OJK9_H+7B^n+wb;1Q%k4ta1KLhu;yg_B<&AfHEjQM(+?CV|xQX@YOjkw6bEB>n
zD9~)(4(N-dRA#}zYTf91K?5RZ#f!H{{<Y><f!)&*hNPwsfT69(;JEFka0PQiR;`ub
zwR(La6VHAiNtvT3SFQLUW0-r|Cx8*g)(G`QziW+!Ynux?FPi=)K;|+u`uk?1*Ft}v
z-}k-OLVv$B;=694zh55wUdJyht0%G7>M10PADT=;)dCZmnnqn>-b>726tyMRa_3Lc
zBD#pgI1Fa9Kx!a3%PgBEXCQBu?0#uyVu%ns4H9F$Uyl7~0I?B+sousPWb<jrY2yLQ
zYEWRs1dZM{HY(Bro7p>;d5d6|!vmP6B+qbELxe%b4faTH=+aGyz5+x}lZ2=j=mqF;
z8i9<Qdu4f`uODHUQHf??O)$gloFAIwK;ztOY37-*`2-3vxa)0Gnk|TGn$<46-gA^X
z=Zu!EE^r--nY1N~8Wu92l3^FNSYw*RLw@E|BfLPu$Ry{<6}oAkOZI3WH~0PrzrFuP
z!sPIwrRu2W6+xb450k`@)PXQgdrr_IVi(`4fD$I~>=U_Z)12fXsgte3S5SZmH6c`a
zUsC~F*OEt?Bv`|lv}8Y|HPSN4dT6OTy}YzZ1V{a{!{smPH6=|Cxns*h%ZlE@yO_Sm
z(Ce3}jVxhJ54j&g%D_n*65V}XcB^8#=UijS#k0?fc`X@e$={f}%a-CB^DOAK0Ldg1
z`DJ}fD^=ap7d3I|?o7WhpB3)HNYXI}QosGp6Sa(&Av<(0wSmuMt-IuyK+Lg0=V1&|
zOFVKq*|CEq2fwc84yK}sWFj=Q6kBVMo3SE`hP)F})f78$1A#<`AV6N)3)NmY&A%^W
zfX}Np2Tb6!gNC~~?Nw*s;p+EVxX$H<@;jVa>9RlF8xOrBxWqGJC46%RBXYO<ov<Lg
z$)v01K}CtVm6&fTvJRG16=~HOAY#&=iPcyere<du8~#uD7@b6794Q%&Su)O>GX}}G
zvygH-$A`EZ06DJC$*}K(guuV%IYhBQUBRyTz>xMsQ(YjgmWAIXS~P=~xxR?{-_i0;
z5-q;SZA{%}1F!=SXHhCJR-3TmOWIDB$eKkjhk)(d1K9p}uvJjWVSAq;%7FMWcnI)4
zIN}?582bN*Ip`H`UwLT0XR0C~>^`=%ZeXnP$bBoAV~BD7`mqe+D9tUcBM2_vHwj|b
z0`9+Ms0nHjeTnnO2J*)l@*es=@o`$kF9EN!YjQ1S#H)U?HKy_OS19;IZjPsTj;C6>
zKjT3mn>~L?p}B`$=$q<kcM|dvx?z3ZU4aeK^bizKKGXktyQ{!#Y&F!#EmWcguBq>*
z{{{@;D1z#zkcQBuK}KE=19%lg@${WagI^Jl(jWONN`oKqjo|Pj(0)lwr-8t$6wHG`
zglJ%O5Kxbz2e^MsFmHH7y)nmq0S2H|#6SpEf2GnOZnvq@?=&bb4Stq>M4<#ApwDS3
zhaSEq`AjK#lm@}c@#Jp|j*@mtc{Bo6qBjVNQV!N{w+9fTl>3TZ8u*G*epDKDx{cIP
z2COcTj4>nlPuS;0b(xAb_HB<e-c8EN#Kxkq<Vq}0E}<@H?)?{3aALV}Vy(0j%imEH
z5Qm*&w$-N?<qk`^YB4F1Rwt#dRrL(MsyRwNM#)tqsn@`o`_3^>E{6)fQ*#-vHbUoe
zSp1ciQ@Z{oV58@d7^f>R=*IEDhjPY|RxC%CtZ}CdkvMJ()D}%j9jj<w&|d*FXO{MR
zU1<k7k(o1uZlKeYdD+KGO&uFT=o%Oo=3uXe7#DS!@RNlz@Vbaewdds)Ad9)b2KY4Y
zDj0r<q9Q(m@EgMV(21#TTceO5)SU~Bh<vI>5jkXy2{W*-XEGk*Kr!iL9aiCJ9tgO1
zrU2ge5uZg*T3-yD0i;1|v0Yo}GcJ19QT`B-an`uoG$ObV4UY~b5L9*hY8@&N!En;w
zJ++QOW~A4{<qD!*MCb<04!`c@HSzRC1eKw_30b2mK2I$4QlH3ghhl2YU0>hU0eQ;$
zQ`~*loe)!Aa4$?-q*;o%_p$I|(E^JO#Vp~BQ8=3d`*m>>_Kc?x+)c667C3_w02-FA
z>>#>6RqK0OY16t6?as6RhUg?l7s0&63dSKqv0)f|iWe$9HNXG{!(tqU%4QLSFht-8
zU|1f<nn~gy;5o2b6ySy3fxwpXIw3+IIwa<4dP!Z6ohmK2KJ6d}m57eQT~iU(a7Ik6
zw(-h(oq}C8CE$^9K#^z??P!6~j$hzZy%0dU+NOvwPlj4~lG>pGW%?Ka=tYb(t7BQj
zwh-G!i`uKX0!~xUQ1W<|Od^+zX)(~a2J0m%qQ*EeV@_B*r-n=)&09HE&RIV<Vb^Cq
zjj1MxHNmcA^FY6I=;S?UbuchlKB-nl3;00n*}yV))Ga_Drbm{7Jk(=wh9Z0At2wB{
ztjh`N9fq<9N{a;Q`#mT4GxS?wU4NW$9086W<Zz5<IR1`tuuUQ-ay_#ZqROM;*e0Se
zYslk$NFKo)MsR{haEC{5lI@jS2jW`fQJfl@i5>h5cuoV4rM|;>&M=;D<?x*C%>`rn
zfbpE;bv)l66ITBt4Ec{a$ZQn=ne^B4KH{Hb?LFk3yln(nBVc$07%X)>hv8A`8@1zo
z=-y5M;xUG}F%sgrkq{qch|L@X&kud5kr09*{v-#{8`Zb>LtuBT;C-y><5*Q&NuJpx
z<M{U+4yH}?`mc;bQ|1%76}<o`TRoM7(mmU1lA)d)LC;Sy)E9D4ybr+jIzwrS8sGwq
z*$jS)F;8L4jsmS<ZKoN}gB+e-@84uROa%kGQ3ZrU{sy2gGW1V#(0aypN(nrQ0qCb0
zdSV2GqsY%lIbi@|h9N#Z5@KOL1n3NQJ<|WcOlv&TU-MY&jOS}PJbI>%`pU?e&N9Sb
zjfD6MhR{1Rm)p;IhWbSgie(K}?dJ@2X_&7t+M5{dWsJ6<Fej|+6<{f-p7){UoTlJ{
zy24n7xLnU#j=}4AX6L&kZ7)&KQ!6<N_uvE&R|zP*3B{4>dT77&_=L<~*cvr|htFjY
ztaz3Ro`9QzLsqU#(E*&lKxx;>5}?DM{z)_2(dqHb3J2?138wcLI$fM$Q=CyyNOnQY
z^^GO%j+5o9PO)O)KHc^n#mm_4UG#rR=B^FJmK60N?j9XtBxWOV5V`gDC^oeBYidMg
zFL7Mda_X^VoH&@0(i-kF1Zl6deYHRxJf9W`2u6NVpd)C1k2G3RfJK2S+oO7%L;&V2
z)sLmC1zUW_OA0*Tz5nFyU4%Wf30`p9e$+{Dyc5y!&Zeg`K(8~v#~CvQhBhufBn5wy
zUN+h$#TB3G{9h4Pvza(_-vQwXj<>x;11C#9DY6#6^q~$%>fqZa>1$qFD}}lx($rr=
z#wd^#UP7><Y|h}B<{afKW}WjD^J4n0TNUd(>dUa&E9O&!Pp>8FF4B{hsm(x3s|ZN)
z45(hWOUriA-Z?qKTDWwuvG4(=046}FgpC8T{U3mBbhu;ztVfnVLK83!IGm@hqBV|(
z2Nfe)k3pfgN3>q!)-0?uL8OBuAB<>C!IB}|VCY|fq01Q8Kk&E?qo<#Yfc@b}*zW;W
z8y>GPGz<@@*0VWyE$mGkwovbRF>6Q;gQ%Bt5XV@_n(F4rK1l!_hX69w``qUV?(^Lo
zhF+gvWeg_=SF<N}WdQ={)XobefG&f@CG6ugm|XK4wHdy+(Z-8tp6q@YvwjDc18~&|
zLAuy$va`kVtF9T9Tw3r1A_|_mi~OSIRbn^XM3Vd!j048zNF7>&VHC5s1E{vTjV7r}
z4X)GdPTsL<QnG{uu5ZRmnzbVOlxLol$u56~_=j9)!`8e*UMxSLaN?y41C;($FK-<{
z<(Chp@_&Q#|9_ao+AJiKIByhclh}lDOD17a?l*4V!+ulX@cY<pQ<i|?>)2}w=V@hg
zh|NL50J9qy%7eQN2%_k}IRN6X_d~Eb`$$IDr@2>6*W)=X*~meO<&k5zgC7IWIMeDE
z8P8Fs)lp6X#t%HV4#4v+@Juis;!+&;GoHLzoUH@!kXgKcC3I-97YrR*JwW|nKNMEO
z4#v?t8TS^}k$NZN9>=)3)zAS)nuP`ar`}Hw?E{(e*3j;2Q(i;q{U0{s&NEcrA?nZ6
zVwY(DOv(7S)L$Mr!td=LAsfmUFw;jcQ(KWdGv%X%C;A!Z$+)+N+=2eAZFULv&zcN;
z8%F3L#r7%3RyTMg!&GkuA7CVcNAeBs{c*<gUJlP-@9&PBkr~_qo=M=L!)}_<6O8BY
z55)81{dfRHroV$34$h+AyOYv?ZpR^Ke}cZB<i0iclPln;r30~G&4=j*Scw0%BS4x{
zI|2?U{GhRrmK)j!V0jr>syvo|VJzejI4bWR4$cUCHUbM9?6L^W@$}AB68%@8Ibyd3
zTQyu=h4o4I3u|Q2vBsdgh7>oUD<E`2zKLTMoPpPeEl3N-BgGgN)o+tDe~l8>1tjv-
ztCX;Av0xJFLmt18>J17o!&pSBo<=*^oIyNbCl)*kGO;$&3do@{3`A6tWq8M-+m4At
zMrvZ=S`-IkZP<aTMcpnkPEu3nI0qXrF*j8}-(I9-1qqZ=-pW%4E2S$xBBk^WpbWcj
z)R93)3Tc|2o|!iMCUdM;*E7wsy0bZxF4rzqY!~;xt&!&3m4g-CmoaP*M(^b76%ZlH
z>A4c0&!#aTC%OTGqiZzOhLjbb_-i9CB0?|p&bB~~IIV?}dTT#6U~o{MYYpNX+xa{Z
z-!mXSj@g0y-0Tu81y8lllAhb*QZOQb@QwIt8eV9T1<}%$1Ktfq_?g@^mC~`}Vq&%1
zu;n=x&s$Alc(z0aWAW?`jkZOenMOD~8V8i&RLqmGLMrAYqV6L%;2G^iHDFa_KXRC;
zOx{oQEhISNHd`fi8p}*pHT3j-C%BN-UhGHHaVU=aOQ^Tc7^IQ-@{e)P#LcHg+<?Nx
z9}1?Sy+by5%T!<7bzn==romc4YYGkZa=18@))lf`G0TmmIlxy7JFjPJe9KG2^mO%*
zZfxK-&pMrzLq4n`Tu%|<d$?iavBMeYU~Ja#4gc!Br|0kXcW9B9r@uSZl4ig2SQYg$
zC7-2)JSeq8$?s9}`$*DTFp0UYW2JP>W#s*bL)7~+9e6)O`zFxNC`_0|uwMl+(=xm(
zFmDm>>~H5x-ZRQD<nBd~mzr(k=THFk<9?UEl@*gKWTp3F>h}w~_AX9?v3W?b#aD5w
z%A`EbMV*babjND2ghjU&6FO5W;0zFwvIg|LGS%Y1A^9xT{vndYR$i>=hRQI>R+e3P
z5B~ph1J_`0XM~G`AG!fDfopfTcsG0>W&mWL{GeGg;XZ|o=JE&TL)`R9T|WyZtPRP*
zQOI4~`oLW5fo%rs{Q-@WhOPJdkC4l;9#1Rp1}LhFY52@Y=6Q4Yn4k72cO4b7*`S99
zOHgzf3Cp^3^jdCr7kJ64re4>4w{(+{?l>+l?@-@(fOFranY0P613roGi`A0^7PuX6
zc{wRBFYCM8$XAw^H@a>+{W7+Ui#ZM8e*!>!i$}|({Q2N-W&*vGw1B)o>g$y2Pl0oj
zR7VT(B|H(8N{(}^G*dcUnktQzN~JTU=SycxUno6MdbM<<{8;JnQjyy)VnW|Qb1sa+
Rnv|~Txh)WR^6EI3{})54L52VT

literal 0
HcmV?d00001

diff --git a/summa/preprocessing/__pycache__/snowball.cpython-38.pyc b/summa/preprocessing/__pycache__/snowball.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..cf37702aaf33a342d28017a6986c8e9e0e61a870
GIT binary patch
literal 98836
zcmeFa51d_9buT{m+;jiToyibF5<&>!5<-%Jn2>;gDOwan@L{9`t>zhqn>ja`o4IrD
z%s%%{$owYM615gBrfMy<R1-u?{MSBhc@}+rw1cg+)}m6YrxgENpw?O|eKaEczTdU>
z`E&2wNhY9v9=~5Fx$Eq|Yp?z1?6db;Yp?yP1q(_&`1|Da%ZDF(i_`NhdeVP6crL>)
zn9lU{cs<*Dyo~46Guty>)^qF5Say3h(*t*|?vCZQ=f?8e^O>HP^>}%&@P;0*aKPDK
zfUoHF!q*F55x$bw2VY;cSj`@Aw`UR3?=3*cf@<+Vc6%@U1KvsSpCtYg{0qI4;Xk?B
zi}*hH7kQ_^e@e9ke?R<-y(RE35&r`CmwKndf2#Ng;9usQ2LEZ|KMDTj-s$k4F8+n^
zukg-*{|qVbWcXKlXTpD`#4m!s?41SwSyKKf@UQaDhW~7dUkv|h?;QBg5&shS*LZ8;
zUn}^Q!oSWNgnv-tPlf+n?>zX=6MW0yU++B+{^v>h)8OCWZG?YgwS*9N`*P3O)-!ni
z)UMJtzp-~`rCu)w&Fa`#)dZ!|^&_>QJl619^=f(CG$w0aH7Hlg;|<d+H+GeqBh~Wk
zTwZmuT5pV3z4Fd|<yTZpv*wqtMUJZ3P%1sj{@?m2`nzW^6E4v3ZCc=<6Ap}<>aN;N
zL#8@juMAhifuOZ3<sWA5^q&LIW%vav0IEG0IX&AQFS9-CIon;1qBrzx&v`l2Fdv>2
zk!p*poSE<GDV6aTMU-t!NkXqj?RGWl^~T<szq?$o_`6$`-K@W<mV?$f8=>lLk)&5t
z{8})wzI;WiIgH0k{oQr(Us>}t;!0EX8S}EL8LRl~CDT<ce|N>y;4NR>tklJ^wPE&F
zcZ-uv)7sst2G#ZD>l(-<q1Uy7fDWa5T*@t1e6PH1Z`DJYXkylgv{&l88>ZGA8AB~h
zL{k|Z>NSR2V^ts2mq)6mx<#`vn|8<g@+&r!SH0Npz@S}$(P3)i<7ko%tE4E&XBxXr
zWvsmE!VMRc*KK;iCFc)rDQ~*q{O4`D;Nor+1?7>-WVO7rTJ_7v8ZN|*ssw0oJ0_*|
zhcQY)eAI30foQnWLMo-dJZ!2J)K195XjS6I(!E&Ti-y7YMQiw#G4`|3Tu3C@{OVpU
zleyxS3^tG)^uN3oeEP^aR%zB6XsyXgtzOw#uWGr?@<_8ezGdUaz`j=-ns^r~rWI^x
znB5zbezH{~&YC~j7)8x0K{*&{?DbJG3_sgJFM837%DYTs41;fMtg-=b3Tif7#dttp
zM1_=VV;q=q+e9OpElC99rsg--ttzkDu%}V;*F{ZelWmAc`QTvr?DDp1wY+zvHas%d
z=$D#q*swvKL?6}Vg`EVfTeVw;{wm_Ve5BDpZ&hNno%uHq&$?AFZZ#V_D<gi@muw^w
znf5k}cJ11VjbE#cc{N*){nf<ix0D3%RGQ_zm3?eqjFDZfdbzf%ysy#1dr<Zp&GIPv
zhgpP(r~%5&1{&XDL5DQUrsX$lW7Q4i7uPYB`RH>@bNkju$SQRVI&Ys~6HFu`!nU6H
zeJOOf;@fT(GNTmbV7xk9+f^Hm2rqjzQyp&B_dS!|*J+0Bg?YTQamz2X(~yl{H`tk?
zOY=*T&IaEyUg29G)nSWFJCP3~hn*a>b`IAo0me3mchIb$75Q@NnAy_YH=Z=p7F6Dp
z@hZ0b+Hk|u9@%0lHQ;%5rQWK()R>0ZQodR~J^C8@jkv}55=+->Q`!lDBfraEMCN*o
zs025ul=3WngA2nv>bSEu9OgZlY{Q(#Nj@x8b&?MYyL6Hd^Sfk{5A!<G!`_i-f)5Kd
zo#4Y>Kbqjf{CI;OiLk`?RVVteh}T;t`mlg`f)jmM2y~K%LudG5e)FQgEFAn@j$iO1
zfa%P1&z_#qf?3+kc$w+U(w+mETQW`O0Adbh5BGS^sXeFl?02Ra(lg{DwYw(=SI+Cf
zS2DYG>g?-N1Gl{PoM0^*g)d6{(c2If?AXC)wKr<XAS@*f85VZe@$S|UI9wT*>Q7Zs
z2OW~3OUGaq&uV>FSd37F{WgINnIk>sEL7=eZQ~feZIz84K4%qw;}w-=W#hG`u?Me5
zu(9G}cx>2NHN&GD(fNDvbslNpJ;yf@UJQw9(A?Op-qbV|e{`cvog3Sx&SZWb-xn5#
zhH8GTIW+V-HmIy#Go?&_W>Kb?nOfF4TSkeuq8X5rUy`ArurxHJOD4GchlVCvmAZ{7
z3=QGiJ2YgL0lPVyfE7%ERrF$6(!=#kKu6JW&vQ?auwi)H{^He78NuzVA1{lQkL$T!
zPF6s9^lc$rHgt_$#cZ>)jqS#%ugUu@%e|3meY|Rt_qMTX7uJIEVvRR2CU@5AwdOua
zk2zAmSmnyx^OwHpf_l~roSrn>r^7j?U!CeNmoGdPK6XwSuFka{)#8V_;Eev*Y+3v+
z$1hk1(8Q~7AhRcP$T{4zA1}?Jtd}{QF{`Jsob;SGI(T8=lGnqqV`~5T^wUodx}nS2
z%dAF7n8SCvYQkdG_X4gh1|72!;pR+&q?4t8&qkP!pY(5!Gk_5`b=F*6vmqK}ZB<}(
z;wlWl<7J{sEG41N)GMWjIPqP}Pky=4YU<>O<r_YcTs&Z1<u`ThVi)rW!oc8{tt@Y#
zUAqy>lzZtp!HZC>L6jzSK9bQYw%3j?0+A05HCaWrbx-b?)zj2?hXkuRhxpeJtR-M$
zndcEC<%mm~lD+5Q7riRQV%{)p7{9hZ(s3Dn0bjlDuL_ziXV|EfE782td@q*8M*M-<
zSqZQ>!%~l58B9jIYks93Ejnf9YFjO)TJLALuzqoQ7uKp+-Qw$qd53EV>C^NYd#vJB
zxuQ`r)&^zNq&6&7jMugsun)p|k5vmAErU0TpWx`V(iu&1`1R(ftPnQv%Y~_e-`z1x
zw0IZe)v&Gm+=G_=(Cj&c&(+~frl;vdF1}|+GFu0;(vRBx>=d(}AQ=PF{Tv)`#xHOI
zdiWkK)hgO~Eq-~nqHFn{b{$;_(qv$<$Z7lpb1~!g$;cQ+NZa2$hgG<EXd4!O9>&Wg
zK4H<U`!;D$nHu9|684Fe@t{@58i`x&v1)Uq;YkyF*ctoD^47k_ySeqaZj(-PS2yh{
zSN7iQ%4lbr+E}J`i_yrAdW*G={>Yr+2k3JAf)4|Dnf=)&=EEjt$OD-J&J{g7-nrk!
zg2O$nXWGG%V-ek+vkVIi`158FQ+ip3o!Y}$^gzaQM|;e8m+;)EYsQfpn|7IRv;O8T
zsoc?=dAF8wDi}jHWpO&=^kgt@GCdP_Pb`|w;!Vb?4V97h<?MA`wb?TI-Wu$$rO7bV
zv8>l`QruqYZi-qbNWI9{ZDJ0#ti@B=#aDm<yJ{wAVxi<eZxY{vI{GZ)9aPE^qMLS-
zgBQkP@mpZHM5uIz#wR3t>$uh71_S7$)oxs+poM)|T-t{6OLT(gU?B2}x2J_E$-~R6
z6-{W4QS6lUQMuN{tBx-Lmgn5))T<TLqd8(Q8`P^!R4?5CcFj-9biFS$>RLZ(S`gq>
zkrUHWwA5$4b-Ay3yDar3iS8++!2&Ho-BFvQzERP#Q7ed*UtBX{O8sFy=yCpwX<~~;
z%^tlxOs@n+g{QY65^H1_ZE4RfFQ?**zn-k3zLJ`c3xzFh+ZqYSsv5!RWtV;5(S8vx
z08F$F<owb^6GLNHXPucinoTf;=mcPAob}~+9*9FQ9dx3wyd;}s1K3j>QQQLPvMeGk
z+M=G7aFD_t!#Q#5qnt_%zN0-yoG^#A_wH(#tyg`_!(11IE?X$fYfFcY*%W4K9kYA5
zaL8;L3R3gtZy{%}7@%i~izWGr%=wvt%(BcPr<ln*Q|EM>V>fi-nCnaM0$@!MkbDU?
zBH}mj(=UPJ<_$xJ@oW2wzXW9jcVA$%t;H7(ZNpyJs~FEV^VH4K4afKzH1{@aw+G!m
z2#T#diT4!~Tp-x&x{f?MS@d!03ot`~rMMY8U(7Vuy}H}v&xxPkRGIv5hl7(pes0GH
z;KKbmOvgF<0a!|R>H}c?$yALA`kINQ7Dv{8M2I6B<JeAj3WxQ_I9%X3IKfbN7dh8%
z<slFnkKoiqPhJDt7jF_~A}n`xts6uW$*e}v_SpPfnC+7n?z+v<rzHA{%>RQFw;DL~
z923vzvy>p=6oILy?X#2+di)Ou2d3=<Ve`hbb$TilhSC3BUyu~tsX1Q=J?@ueo!PYB
zY#tN_q5UAG(9QqT(rk@e#`DkZ!$LM#{1vg1j>sPm3Q~hVm48IB?fl1B{1HWWX6BM!
z8Dvq4SRfJya4OrToH84hbcK>IkFS_bSl*)F<rj0k0G>Y94v>2dL+l}Ze$1)h&FgXk
zEAnbGYC8$Xr;AMFU5yr~!^-NI0s-=<f{kXA*}P$-IaW_z9CJSAU;V0@&2VC)EUbm+
zGx#EcivjQr-E1zQFWJs;j}#8z3urbEMYG|bAuL!5(6hqL=vQ<hzMlM=@;P;8=XZ%;
z&{Y0me%PZw?h)e8#NF`d9)bG<&%s*#F0TLW$Kg6KBpom*qP@fF)ZQU)&XrYt7Sp_|
zt9nfHcTJq@UnhG4;laqB;`U=;DR2jY-9{&talao}auH^`KX@l95_j0~KO0@Y8p+p_
zLr2-t<)Dhyv}c#jvJ~g?I%xP{edD|P>bd6Ym>3@;0fQ#mJ;)q_MVQK4b(g`VfL)u*
zRH^ff#U3L{Epn4>N(c6sv<>A~BE8uQX8XMGG)HRqwki>+Pv-HbdD{ffmDyeTIG!2i
zH>;-_9{nB3B~m+=|9kcaR`eWCq_Zd6csSWch=kE`mM(K|=VaT7;}|;#neHCta@x*$
zo8ScB@j`}N0B}UN5GJ1gF!fD#o%;HR%w#<A{R^^+nn2G|7aV=^^!qX86`h5#tKXSA
zqs#P{;Fya^c`0eTj3AkOlO4nsMEnVU5tA}sTyGdQ_a1_EZQnysn*o{(?)JQw+g|YU
z+lyXdd#_jAUh;aq5_tT5ULRJ&{o#VP4PWJr(JXg8wuKrMi)JJKhJAat3{O@}d1xqF
zv!rrpkbY5hi<~4&Z;~Sv@uXsR@V4M2xE)mz-o8hU(0Pp(#Og!oWGUh_Hko6=t>8$F
z=<Jd?Su8~*1ViAuV2iY+e2sEEl%+5Rv2nD?Emy|Jt7st0G*LM;m_B3yuMO6=7n<Pf
z);vM8I*y&KsJ2wmMBA*(Zm+#6W>qXYuqtQD@}}t9lkjTWYS@XcOZ9*TOMv+nxOpB0
zrqOjT@Phdlc)>ghOruK`c)X<=r(zv`={X2ZHYN%oag&zY7He3rS;4uT^p3f7i;)By
zJrNRb6#P(lN>*}@_v4+eRBMgazL^iqynp5s;o_ac-mcvvdq(SH{`drVhm(76nmV#L
zbaw6xo#Elo@x0L4wJUUX?+%@jk<i(*Cv-+fL#JL3ow2dd@%_*l9}k_03Cw^dbb=ss
zn$6IeoD7}4dqd}@n?h%5s<ng#r^;%Ko95)cshRf=mLbK4%o>Z^Ft3g<KT`Foeh}us
zcdmMP82H1SgzCc}P_iJ-M@}Y@4A@`?q00<_NI|zRu<A>=30>e}8s=k4h0QR_va$#U
zFa$sd7XffO$nQY7<{-UOZHAd@m<b?$b9SSFJXCl$exuF79B|fayC3JOv_A8R0C?dA
z4oIP$+U_>NIn(fWIex*R3<c+hdPMq1nFNLC&S3{)@}50e@VR&G&rUms-03W&f-9!M
z2pM(EcC0DE(?*IMc-=R1rE6E1nQ13>J3gg|*=Bwk?2bO+f<wZH__h!StKBzDtN^Y8
zN;@}I+O2cpMUHzWZY5P-4pe<b(pg!hl&@~qpa`6|q;Nir8stZvgBi1MF6{ox=R>u$
z31~i?0WP*j*A~e19L!AYoOl?N^n#MjT=%0bUCA|qcHnNIFzN!wNE(Oay90R<icec{
z>VfzT-bM~+CHkImF(rHhjKGM?uP>koPR5SAI*kMYHj_@}$v5QK+y*UFIo0i@+Nq<w
z#k8}3<7$|jF!~;S3c}>Aj{-a$y0zWlO5Kc5Bdea0IhStoCc~veI*qaJ(CUdkz~Rcy
za1fFDx|s8rdD*N)_Jb$Os_r<F8NPdbG9ia&b)uj3(Y1VEU9`!5HLsteurHN1wS3~<
z)tmuv1zyhJN;rD@ita$61Sww;Y~TLO05+|QGWje+xSpd6PltZGBp%Xr#uX8_9M4jA
zYEfG=iXe0AV1HNX;2TkBxV*D&$uh*;iUj7@2;M|MQKb2G0ur98OtKbyF(OW7jTXTH
z-Hv74gu7YSbqjbphVgT+3s)m{?scJ;$2zd!75K7*{i2I%2TSEgC{l)<teeQWIiXdc
zWH`}9VEZ`yIjwRo*m(l0j-B&acBB!W=(>aL7QgM74$ve&qh$w6>ayx+6)|G$s^f0F
zgEzoysC!}p04y1*G6z9_$=zA0-~(N+?HLWTJK@Af7~f>9iOsJfY1_xMfB+)ak0)dW
z(Z|!v;FFm-gb$;aJq(dP#kum#d3kwa=ORz&?8_6o7kT!1{qkI3pU}}4|3W@RCCBkj
z#%J#J-XbXHoPy5;wDQR)+T|S1?9Vh^Z*dcn2rE&z5+Az^K5~mi6y38Yj}P>13^@&Y
z{`hp?Ml8UwWLiYX#K+WNJ}FLo=J#hNZcDHdlL)+D(*^S*7IQB!i<_8aVZd@9=_rdb
zk=bQEGTn2ifEdtv*m<3^-`$@}O1GkRsguQ9z{hf@VoK`BFt!5gMH<*X__UrniVv{)
zT$h+-^3nYqa-6C)-|zaXx~Ah|;rof09UtW<_)*uz#lg&uL;Lg7?xEi4ynNK3n$CNt
zVS(Y8dQ^LFIjQkZ*Zz`F*IUvS>Q3hl^?IiPL)P3bDRLbtatAa1df;9$nn#*77-=?r
z05|B)bBUGe^@65~V@_Sx6S;+&Ge)zT!dsFK$9My6Z6TnoBLwY?(pM_AY;*5S;?%s>
zExD{a=NnL77Rqm*liFD^-dRE;!aK^J7mZj#Whd@LPd&xn!Mm3^=o*YbZ&f^w;P=kv
zTYLN(#5GnN$E~sS=jl@Bmw|sd`hPWQ;~M&wc;}?rOWoRD>dpzpYvSB%Qxpp}Z&lof
z5k?8IJ)myIsP3f4qV&h<F^ulg6?k`cY{BTp*p$)jnupMj>(GxmL+11nZ>_BdN2|JX
zQu9I3o->yq4QR)LJ8OMQl=A%<e_7n3d0C3bE$aU%${55OlruNA<)<&ne>+m0E2$om
z@NO*{^}Cjk^m|8%b5e34$~aHT_))qHe+9zVBb;~?rrHAIE<dsSpgZvp-XbpVpEsIA
z$~ObUnQ=eHes3w-Mty7bZSgjseKq8qbozKUk#+_)jtarKOXh6UB7^f;`b{|nUkv$n
zU_G>P>Tl!kFxN!fJjkNvIlFhb@^P|AXYL9!6?Glf$A`>bgsc0AK0YGMQFZ@;KJE!K
z4Rw87AD;>{Ep^=+X7&jWHq5+MIj~{oYT1s6I)^MF9{sWGRz|8d51iR#J(1l_t!{G-
zC@?Q0xSrqu0Fu3pZti%(%pP+Kp4W$&;YXhYCRvO;`V+X2>>W(@P6Dnw%x@ALBzQZ(
zqm(vr>tCsW6;KVGMk91ut-(IoC_!u9o*OGc++~`gi5t2+=^~tC-RvwGN-vG0eU!Nr
zn8PzV;@a1+x|hO+i$jOQNcLe&lUjBT783DXxPXz|NDcF#jxwpa`!T;j@D_rzVi0e6
z-h<V80Bts`*|9~)E{1xW^R%nbf%_>}VHWP6xC&jkpL7-WmNHARIa--n<17VS__X7!
zaTd9YGOIHeV(YXV8>mv2+ajdHzy8c}Y?_KsyUx<5-0aGqxJb!=Ou=mwQXy_J;xxRJ
zU4c!D#5$U$Kg-P({y8f$14!E*N1txZmIE6xE)%Spp0O-r0E~EU&;~&9A~5F5*~QtZ
zlhaLY_iDwid$m7eGyXBbJp^-Zz-~kAud{ur+>h2hNj6|u6T&-d<3(NP%kT@{1u*x<
ztCJxQW_)RTzt^`N%UN#$51jIlDG!!%*@_8j0Tx_1Hp-)-JRXXFD|svw|H?cLihrx@
zzq394w;IPl`R|-N)@>iqBcR()^44K$8w>~Rc|$uQEAItkE8gtcxD(m#MOjXC!<X8~
zi?Gu&|08r7{p|Sk_1d#!)Yoeroca{Txmfk*&wopK{ycV+X>_R#rIo&L!q`-qe~Fta
z^C+?n;T&bA_qoRtNX%#ByeW;adzlG`>A2j1=aNVw)sq*_zauZ4M?a>~b#Kz=&%eOu
z&!fOJx-JFs6j7x<#6=WdcIeC0aCijozS$RP6jSX`qT}UP#1&0F^^B<;32s04BZne!
z*mqxtbgjKymMZ9vtG7d8ke!aw^x&QQ*|24*dPcCFF~f@ERH2B127Ap*@oCY;(p7X_
zO|X^Vl?2xkTt~pA6b{pQ)!iebW4>t)W~Z`{J5D)kgQJ<X<lqDW1SHuSMJY9kN?L*0
z#++PpAwE~Fdi8x_4qbwWE1oREWU+<SlsC8x2NkE}@JLvYN2QMAjjcMwH9=UwsXBGw
zc#t^q(%1DcFGs>^VZM%Z^%~_vO+FM;4oBOAG5Bz741vJHxZ0%nfE+#ugoXht3U@N3
zHU)u?TWK~3nI_BNY-4u~@~;}2?Jrof&$3O38D|S~Joh3G!HmKuA$&q$J#>ZKED9T3
z1nPM9T}q>H%7>K!m#4OoR-nXYp{qj<7yyaS5_6V#v!EX?pw6<;EJiyPqjl3HB~EP^
z3s|nd!%RKQ2BSzHjP47wUJXg>1WkgPR}XW$*-D|?f~f$|R57e|!>5M_x`pVykdHyt
z43(-hafUX`4>y9b1_F21;LAbp0^OYF@l$_yn8yh(J_`_JN<abHJXHIIkgsALa$p?e
zBVWB~7(I{km|U2m=htEMjMjh?5IA$*K4#3-_0g1v3$pj|bSvn__yOS<Oe7p05RNDz
ze4qJ0?3xVLSO#*f9s?U5JqBvKJtjv06CORL0D!?Jga)Q4KBYe}z4R4rEdo;#udPd9
z`owE%6qtVT+Ij_HuXUkrCF?{*%mSt=Xr%)j(v#XFO|QC`WIzzv_5dR5A|mTVsx~fY
zFRB@HK_44PxS)DK213G&!47{LcBtSa@IcIZKBqIM;iw9Bs<2TZ+))V4>@&xwa61{2
zF*jgu3tK0azW||`S48N|UhkotS4!e64>a%PwPfKJUP+z~1c7<WA1t8!w=x90h$1+Z
zZzUGsC`>!TM*%*j1`kxq)BW_sTfIK$efRBmz25yf{2;6Y75(6Bh_DXs1>z<(1D4X9
zsN0<s`I<^s&hln216?^%fwm}&l8?EX{xqFQSVYZ!3>t8;rtn+sF9NT$C~aC0UTF?g
zsc3QQe(+Eam8Ml7`Fhq4EmBAcUUMBG;2Cy=Xh{#Cq}Ky$pYVT{wc-8pbirE)&WFR^
z^$Om}G5;rW3sxJR_a8v|Mc`*R=2kh<)J79{BYALP9plXl7lp|M@03)D>edp~o%e4*
ziHn6o@S`INA$nc0zc^i#b|*E5ye0ebZXR?d{uIAM(^-UR`s3i2JU>JWeHpm=(L&CS
z*QZ*@F;AfUrNZgF8=NrhkDM{lh)eg0cd8w|I5}`2qcPwWWx+Y-`0VH}n>y#qctmP&
zd{cQ2NuG`#wR!TACl3ztXW1rc8ai4gYwktMV8R0L^egz?JMDrGXF7A?J?)?l=NLN5
zmiMLPEfX&09$Wv@EDv1kr@kEMA)jmjW5Eqx10Pz$TaK29+6^3a$;TRC^rKaK_VC?v
z&3ooGCLRQ}kWe7~k8D}Q4j*!#62FbH-#Z=erut5{v3cuX5vR@t>n&DLX|X~z!nW2K
zaXxLme3CC73$`t6ea}GcbLQ2y=aZH}FMArNV<>J5^ighN8l(4#xO5kMLmWh8oxGL8
zMWipPLxHijAMAVNotYkicpKT~-?lWgr)e8^k}_`wBqjP?-%5+kjyCks0N#(>j;m7d
zM;`M=^j7piZK43F06C~5(vQcPO%A&L50T?6soB>%YSP}CtV!N{w4)}rRnu>ft;tt9
zYN9El_r~?7Q0^*FDhEBISw-IhJJ+?%d!Y0x<Tx9Y7R+4{rRb-4KK53pTV7*8bHR*6
zG<Qke)(5FPcSO{8$rIHf>)!*4&H+V5b5U1{*33cC(XJF}>h2U3&4#WNX&zE^+I~o}
zpdM84)(S<R=}OVMt`rsR_q$^T@-+2a6y4XABFz)Ek>kG|6jAn2G)HAbo}2pGs9XD4
z-T0FDHzLP*s6%hEs%Yzl^<C@GYp(6us+zic9eT|hy4FGSM0Lpcp9e+H14SjXvTLhu
zn1iBkcBM#Dcc-WX`59;Oj+x)nJW)Sq{7XU6MxjXG$c}H2cYarjN~YPBB27IPMR#|l
zNb}e>@;?lEHldeG=6mSnb1_?Pj(x{pVfe^>0cz6+b+~wiG51{HzX<6s1b)tU8Bf0;
zCrN(#qji&fQM=70nCJT@Jjj%mVMbjjD~>g(8MV*siD%RW=2n!&8MV*cJ0INmo^plu
zeB|gi*GS8C_D%O$q2Da*+Be6V75dE;UHc}P6|lwzMQpE&gd!>y7Q7XycTnB>4yv0u
z`jO*ewCDo+Me3}>C0)l?zj?509W-_K@wLD}d!x&U*F4eqng%^D5PGg<4bt<${8mo>
z7GT(dHeiqD?JAOPw#P4|+C$yi9_r>Ae!;{$F=qB)yqt{n-R-H-vH&IoB76flcvSE%
zl~TA8;(PEyP|fgpw7Y-NGpifu8<@BdGzul&W~}1|%<F&)>lAqp)fc~q{>`LVbR!1L
zq)=wp=)T91Z(~BVGFOo=!dG;_go+O>fW6<rf#CBN|LL(WS;=WEY9FPR+;RD`TP<Pt
z8Oc+Baq5dP1C3$_s9#2mH8q*SGa=78b<7uHN8ic<eDn3cV?LR9ZnuHIYfq%yi6}jl
z_dd9;gdHn#bHK0ygJx{3<%2b2U|R>gpmpFXm~#OhrLys(P?hR2pCY&g0P+8e?ms1<
zkW@Hz*CTj_p8E(s3IP001uD)vq4!;z68@6NW5ev;n_g=^#AGM|?5BONRS6^6Vi3bV
z$e6z)_%gxYFw`-BPxq$@{*3`y<)nR{0be2DIT?ihE!__ge3jte3I3Acg8)GD1-d^&
z@UI&1S9E`o;0GG;*K~&f<lR-xKhXQQMtzCy&k;OCAiTVPWK`BX%z&>Ee1qUe40X)6
z=>8_b*9qQF@IwG-ur&>hlMF6WRoM#}xN^ucd96fjXvi{>l~Zm02eW^S;Nt`rFwd#8
z@~HSr<-%vqilv+;k>sdkxZE{WO864aks=Y5gsW!Zm?c&VyHk@$Q=qfj*#NPaYmUq*
zW!~=C6VU^huPfPX&nLKqZMTSxfoe*N4asp=8@4kYCg)TW$v64Mv@X(30{#vMLf2&g
zQ2K?$*rif0B-SDaSJ}4<i83YAR?bc3S^iX-+>g-JkRjvAbRx}Oq%v>hi6qV|tSj<U
z{_JK-PF}((i)Ks-I9IR~#}!zFOg%~`@^DJDA-!G!I~1yHO!;;{q~Ik;%=;lt?|;(G
z=a0Ji(owh2|DSHL{|UD=fVA>MDeEDFhaC&x7bvA#iFszB)G>)ITKXB@PSx@Ktj&q$
z%d;-{M+v<?t)$<!PMXe6db*XD2OhTCVrfODD5=^uGz6m7i?j@&Ex?lTTbU_Qs~~F6
zY=3s@l=cyBbpsZ5)eU$)XW<tR`~ty61Q!!rLa>G4Qi2x(%<+CCf-rL-lg_DM@Cl6B
zi#dAtzybY&GwsNvns>&{VU%W}$??<u_tpHMVY$Qjwf*r0x(vVIQh*cJU-0^&P`<!h
z04;-oaG?E&e`Om~abyiZm2w+*B3cGfmJ`)7z^NfvGa1GKx%i~wi)rvCsKz#_kA=ER
z_HJ_jdc)rx<4@O6$>rHR2or0j%!EAQJJxaRunoa%(P?ssKu?ZDBq!nRBF`lJgpNo?
zXnz{I7c$AuU%dQZrxw{@$#)f2qhcE!@v&2+NItB`Rf2dHDgyI4CTRytys(Fy8ddn&
zNXB=m(cIaR@m2{_l|8X=*UUXFO4-Ca10QtjJ_5h2im?{1nLnWK4*^<K3`CN<)N#+u
zJ?3)8T>&sy3f<94#cy%>J<3Er<gygz!df)W9#;IvnR|rc&S*mx`=c}W;BaL{+~YHM
z4fcjvKd1<DKbW~I%tH4MK{&DK1Q1F4_yJ88Xn;lZ83$KFrdmS*E*3d}0gRoSm+=dO
z(~c}w%5M1PDuh`A?m@W6$Y!%5LCq%13Ig>7QZiH<B`%P-dR=|>I#bqdT;0Zj#!=9R
zV;mq9aU_sYj<0Ti<}O3sHuGBmVaCTi@4|vO1@L32JvS|IAOrkk5;_)zj<Gx@1yB+s
zAsU83V>U=qRrhGiRrk!?)h0saK@$GhRl&y~OvOf1PGQPHlGI5|9Er%{+@PMud?yV?
z;RGSiJwkX0OCt`L=TI1ji1UxI$xTinW@9(45Jusye~l>ML&SpP0yzpu!2-8=p|C9B
zmcalFZ6SiEErbTC?wwc-+VWsbWp*4*QFk9jL|`uHNlVyCgy@xGwtIxi-lPmq-Abjp
zaURnD0P>e4|K0P+f!1@qJ`u>?==JkJNmOTVfwF(tZZ^d&k|J&)n*tQr6rfU4#O-EN
zkgfsHH6V07i84-7>LC;bBPyVB{>e5(#h&x3t$#0Yo&ubXA;T!bxfnQ|fXtW(=Msj@
zuLb>Gz`0a#zCDHE)DyrUr+4FXy>V;d7*O0UA}WLsb>cpZ0i3mkIP}S641m`ALB%qZ
znKgI7r3Yi4U>z|YmV>%%Fpluk>5^jr3||q4pCRGvZ2FaP_?Z%ZnGGkyQ}LW7;jgjb
z6xe9^*>L5AB})9OV_)=E@K{phZmXSl&VKZYzY;XAK`mUee4{tOzCEOh^-RLoTD5~-
zX=}uEyhl=6ghk#1DYGraWywB-;}iE|%)JNYuR}SmsRP@fcM@}<#Y;&E8V2?vYa;Qs
zpVZxsZ!7SfEA_b#_<98wlr(G`(WWWH|FJ*ootJuB)vc|hZpD2RIM+)a9;k`>QCmG~
z+hew~{|R7tp7agX7v@zesaIR5I5vSk4#-lsA+~)Y3XW3;uw0*UKZz*ze)bj0gi^qc
zgLnrpmvELwn|>DcqWB{(0)vzFjq=WygCEyPP1WwxoUJ3Q*7mu0USkM!<tJVTG4W^R
zn9A)KVK|BJ<wQ7oJ4VWRcpEkeWw#3*UE0Y181fc^m)pE+MM(K_o0npff}t-NL-fz1
z_hhcU0PRySizU5|)3))8dUHm;P*S}dvrfA0qH?;m9q7?fVaGs07{^^lR9n67jcdZu
zU%-r^ZS;KWo2y>@vRouoK0IgLndV}oDH^^X-Dz3i<v>q#l=BX%P)z6h^gt!w&!Sm>
zusE@hEOGQA#gtH!VhYYhag7Hg<z4Ag{40}mnqH4cqJ3=Y=$pw{nO!Vis3FNgQT>Ku
zL=`7KN*cs2!6j2)h*!+pgcTm|hM&8Ki_A;0DBY^OWik<3v|a)e!D_z*D{-@xE7{Db
zFtW`nWm!8KRFx}Uh1Xn5V420c>8=rwb35po*U^0g!Honv34RFxtlkm2_t5Q{9dr*7
zR0wEIT-Vmnm!f+Y0L<ZWRc&5H@M?lzCg7^tyoNwxWsxgMsGh0m$h*|Uzektk;5t~d
z-2vD2U`Ss_9IqwVM({!=xE`)YkLr1)c=;TibK1^i|D9kI0Na!*NF1ksf`Ghw6A&~B
zS^(H-;22YFPnc<%Mie89@5;<w)f(=_avHGx(!y^~WAG&9bd#k6uC?8?apa|~LYz4(
zg1gbg=#Vh1^*mJfqzN}!nB-uKb(Tbg$@UhE>&^tTSS{g-S!q&ilJN9du?g2>j`EYu
zvvTog6ETZ>KKpzYE1~DJE@tN@N3&#roW(E!tLP%*?-#LM3^-=xe3r9FOi&bYs!BPe
zOD(@S4^A#kQ7B)NoL9#4-%{|5`Qy2(Me&I#@>s<u9ANb+M$J{U{ODzwJWhj=Bbvua
zFmjpmI0aS$r#b(m>z1B$bNL^;xzdl_yv+x^%j67~a#r&WxyXwxE$S=BzKC8J)Ici5
z;5uu>r6xBPkxMRI+Y&h9N@2vaT?PMuvzXKg4_xz08gOy(#A!A_$8vPxk`xN%+!;&d
zIk9qP4N93hrF~RcKJP$RKJTyc-FYLyn+Sf5fM>nTEd&P$-aznXf;l<3FF|ZSdw3~2
z7#!T?!nvh%Aq(!U14A9IVVYq?xBbQ3Rtlkx&u#UJ;I8&s^KH1c5L{N6Z)>xZT6qXK
z6DF(NFLXjYR?TsuEY|j^;<#+7pAx6<1P(oIp3kAD&1r<)d2gE&!P{|bxp_W^p4t$0
zFEg34j$7o<n%^g*BVN4xOxb->R2OdF3cM<IRdMIcp_vcO9GUsx%-u78F!RxwKbraE
z%%2RN7xvb0u!W~Gpt&D8c|HX~03Lr(2cOjzEQaAp8|M%%;;abcTVf3zPaAGg<tzAJ
z;(SM*-%UJmnu6$ZwTV~BBN*d@2SXT$H-bJEVJIr+eLp}S<^($)M9zaRz+t{E397hs
z0C|LP^Bo4c#0?14=ASeGc+Gd=6q{wW=FAZs?!e&^+C{6hZUO`n^C;71tF4=a?Z;Fi
zVh2}m7tW$Eom1O2c($}o<!V-j?SvC7z*DFh4Jz0mgh(8taGR4fRn6EZVsHo~&lEK@
zji!{RG!-*cr^L^EIpF4*5kAm>xUY`w2xLfuhfRW;CE$~@N%EMIh0V-I(Z1G2I$a_W
zC~jatJE88>jX*_>a$;vb+B8#`dQoARgG1R)ttlet!8J5xXKRYv=CnRlTQ39%{fwwi
zJx(_8%-u>YGO-GLfJ|f{?Nz0n4+JV!j1L_HQWIASfC)Pi4ag2NKuw0}%$=y3bxGBj
zljn_S9*?9*H_jYEtzqM?g4&_XEXe?9n#X`b8OcYg_8HjYMJPZx%y&W=1O+>l!2AwU
z3kP?wAVt<#F>zf@mnuo11laSr8lrYt{N9c{ZEV;+1jHv<?f?Tm&4vXlRxJPH2`zP)
zaK_fnp5?egH)m?C-^+px3p*^+(DTMMFK~M~v9gn&&T}(`lO~AcRWP3T?Fo}L2~OgX
zvn$_C<Hhz2j9su$2crDKnn{J(@*bK`dH!ZgA<n=MXGfmisEjm?8fF<<Qf_czlrEx$
z1^}hzVAtY=PJ7M!qSR@u6F1`=3pTo}pPXYU;B-dMA#QgiWPdN7Fb4?j@dx*p&?Cj^
z61L76oZ=~Y{X+exu)WQD3zRYC4S-FT1DmYmQM1y4otM&dQCjX1rbEA#l9Mn@?VpPj
z)D0_`?GeT4RNg|}@LJ4Q0Q1R6jV%%Kk+DZtSIh`qq;<6{-M7QW5@~S^8L@e9k!7YO
z>3CixYqoleB^_MpbUtgk#9~y2?!(?vi&0xaEH}ZXN*Up-6LHyI&=!hLn)u@>T*{VR
z<ejFa2Clcft^5e<rKvpBpJR)y&^8Dz@y-xesuk6ml2_FBgC;!s4yO@V!*|DR_wJZJ
zu|oq}g<{J}LSd`NT0+s#Ui1NM75Vo8k1}{a7vuR{2cA`NuQA7M$Uz-^*KD>q=<0<v
z8;&M7fE&INefHK3eczaBR^Lv8uJj1mAJMfArI4lU;vAEXDY#;~q_@3uV!Fsq{tx6(
zcJhbg93KV)*^bf_Ej)*Un2wThRz}#;9|fMZQoj&*IL2JjU2LZ_O8t7IR@*&S+0;36
zRd?J7jm9vw;n}-`f<I?AFh?|o{iPlCTyA^a!AZ4}u$XfO<=VWjk5Wf9mAUb}RNG{H
z)WoupKN08qMAv+{1BJHAq%AW3oybS6_ni4bobLx+^Ce>hv!K5R`PAahXKcQ_`OKWH
zveCYW=c85f=5^lrQnS3dLRw{`zS*-{#r8hRYH!!Pd5>+^ym`}n+SR8hB<dLojMy_e
zyTH|k;q9mwS-T9^_O~h0ZLh7D!pWJ4ICUoKz?n+dJ}L^f{&r@YRew7wz&tEKlRY!>
zR<e|FVL=uQ%y(j@$4SE-UTU5vyH;n}g83ro?Zrtqc{gxI3L#zJ+JeM%dsBFttuXNr
zev>%ORt#7one-n-iM{sAQo!lIl4u_nacW|-^eSA^qPABt&T@Vosi^y3G!#C~e$R3+
z9$SL8KU3T~J4M$yBu6T|=L>6C@<d-XVYa>#bU%XjDh3}A>LC^d?jHj80(|@X&|gK9
z%!2YgjOJc3zspqArtdY6MQ?-sCS0uVkN*VPmYUO-fVJOiIKr6c1(HX`8+==EB2MGt
zZ!TJK0b23iRKN9NcF=wkm-h>T<0d=)dSTD2*V_nem!f^><4h8LWk%3$)IRUU*-ZQG
zN{`gIPM0Aa&nxtrPe^{d0>GP5g6=oNzmD)FwvN9Ths1QFReL8M!w)kV+Wj)Ldr4UG
zaf)PBCM&W%ZEScKEm1P(b2WEy&;C5ldBFuyPKl=^m09n-81;ZZsz=09_nxmQI@f`q
zsDzVzB{J0y;Y=NM&G9UFSR3I9gF3P9`AcRS0#>HUb64~HN>MBiGZW?z*YoECYz;H7
zQf=<L7<4DW`v{JZd369TDvE!Qo;wLX1b~2VGs$l;;1;;BKbkbplW6*|hYZ5ZP2eYI
zv9ZO*W8zij3C8}XWY+B^whiLo{=pn&?2ifFMX-;68x-@~bp112D3kkB^F9WAl;Cj<
z{SCU`3IO9=-2R%o81Q}qZhp-N=pxnTc7op_csIfC65Ig*w)xxW{yhRwP`{0?g9N`x
zV3osZF3Y?f0QxVD-4GkRmjN{DZQeofP5_X}eYQEQ?hiBk_X#MWpaF<l9U1?CAyi*C
zAEE1Rf`0%QJjMJC-G58)MS?F8JV@|&1YaijdxD1u{*mBef^QRihv2&ej}kma&}1q9
zOc%FsP+$K%-476af#53yUnK|$zDDqMf^QIfli*tf|3vTzf%MDw==v9e?-Ph{f}1|I
z?e_cT2#XaN#!YawsC-j_P5PO;1KBE?3y=#9@o&uhuLM6N_yGa8jj*-ef}M3(#(<Ji
zg8(Yr05D1KR%`V-nwizn((aB#6;!a-`#Y#_4@=f<OwM)-Y^QZb;tG#l!aagoe@DJ{
zKBbB@sMa6mVfVtT-bA5+WZf1?N0=kU7n~zO!WHaZX2QagqXZb>u|m9Be50I2Bnsh4
zARmTwVLQU=#Sg?JXae=KL={1Z%ps9O{34s~Ed*!9U=>|tUtbA!LokWH5jliv7{f-}
zip-k)(#$&W+1Eg$e6{fI*ML($pz;HRlmAay19|N$;9Cs03k`O$--eWw0YGK|*J5zu
z36~=cZN)LI!pL46IsZv0Eyd9fEqbvR$C4neKp20(>kt#m)Nn05p+VEIWTuUt2@cYz
zaZ5!5of3)_R<=Q@L>~G2MYe&u<)wyzlvq&CP(thQ#z_3!6{=GXIw++PiG>51i?b3T
z)aGYu$}5<=<Sa*f@+3g&1VV0VQQPPe(ZnzrG|?ZqK`wrm;}`IK0}lmP=*mii*JCyD
zt%Df+a0a_D*h&U3fmTAes?r-3)s2&-vZl`Z7d9OT$0fgfmD{h-!j2j00XgtjY<eiB
zH7B;pbyvm8M?hH2>b)*8lpo#M`f9!L1owQ(WvTYoQLUh=f#N!g<We$w8<j}7cj87}
z+$zay&&#}+O2rLxU3C=`L1Rp9mq?{Apu(NBAcaY~Uuzg+k;3F|Qe3dc^g@l?lWZ@)
zB#m;nA!G3_MrN-V9;4Aqc7tjecih9y4|tcn5QJBrMi;nkD6e}d?8e9)a5PD^zT8@i
zyK2`axB9l$4mav+d5dCeZL>1ImRG*bqR8GGgUVIvyqdQ%T7@ckBPK*{d9Lz~tV&z`
zVHgXzpm2_B$48nQ29vIsb>o)pH0hi6i+Sblv7g?>Tecvp5-+zX;1XUPK%h4+D6Skt
z4Cy~MPCWiPZ(UHV?b`0lyd}DDp)Gf}OSV$&kV4*Z8AC*mwH-FEyLd5$&+`J__KOcs
z=q6oNrQoL;=l`-8X<-(_3#!4DajFpL?O1XSke8)X<fup5<YW6HdfUGkVc!t1!`dYu
z#I4|g2g{L2FT=vu;Aw|H%z%0=7H$GOP210YJ8_kIk{e&V)hyv0f;9wd3G$Funj%4o
zppT%RKxC~2x)PeiwTju^g6b^cr))8N5V$UasI}KE;$OD-D?Pu`Q<4<Jh-~`<x}MAM
z3%Fc9zPOdQWqG|2xt6>VM6G?{f|ucb>i9DD$`#QBc|As@MxBVL)#lJq+kSSV+enU2
z06a^3mM41gR7Y``Q!q)*|C+02hxN>$86)g=iiT^QMCatV^3I=<AbI|0RGv##CFgU3
zLkD4cy5&^$Gr8z$x4b;<5?v`+RK*fSv6)YWeLH#26|cE6&9D@Enu7)G91QPv8q@7i
zfEU@c2t-cB6%%YdXL;aDqGZn`JCq{DgTo`duYwRCMp*P41r+nqVYX$jt-uC&v`y4S
z*<UQ`+KfpDORyff_lZj}u)xV?z%nb_#B~c`2xYBE>Q-w`4gOf<Ltz(E+RGkz`anX!
z+@bi3j9wa8%Uc6$6n{zTHX%@ygC^N1FiYp4FsfUQl#oczMI`jfvhX&g)zV33q2NhT
z{zcY~l#zNaE>Y8xToZ9gmzX}V95Z6bzNED8&*GYh*%-)(lY5{X*sk7&AK08bC%&j+
ztQ}8@PJB`HfoVK02O$>4dXIqVk1N0BEXD#{&|uxd7S}TdNC7rulm<PZEN1AX&JLwc
z_UusCpG0{lfj%6`2eyUMHmaKr?NPAcBC|`tY2AX!x?Ryej`7YeUqaf-mJqUCorw{&
z!w6admkI2+E7c0HoDQlM2~`h*3N^z;t=p_AY5S^CDd`;=MS~a+^es%vw{E3X!=DYU
z*@?xV*H`8+&a!-4bhx$8vSvA`<#~*(`7o{+apN9Y+TLrD;$7aUsT8Fog@@FnA1uVI
zc^B)G#-lZZ;3O5UVGvVXgYs2i_x(2P7!eVU81whLVkiN_X`s)AiK*q&e8+X%&8~qP
zT<V>E>=fqubQw{-AcE{dy?+oT^Gt_po`AS8X}gXQFSMz!E20f4Y^z*zoK|<ucjr$L
zMrLjT0?CQxvI&Ltak+2@*2m?!N*J#+fkFl>&l4oZKhB=bfVHb1*>XG=O@AoE>b+_&
zmkY}jBi8%~>(dkib`zr!L~9itUo2XpfVEj@u81iQRgONu^!#dxIyE?C?_j{0D8>9L
zUBZpy2M_oppv<V#0g!eD-DeQU$4|;zNv}kRwzBZ}sI6>%kKnz;v_N%)#n8nC4CVd+
zj5qBkyQGg9Zy#q0(I;NnnWB9Kr)Dme%}Qh!f#8;P6u|6>CUJP$?D%75P1K9Q3yyhg
z$`(&}WOK2{-oZ`IA=HiRv3_?cOhS+)#vd<3D94u}@OC&dfmVW1MJCrWuz+X`f~u^M
zVymWTSA&f%QCO8(g`CTrrN}2Z@LVOXWzJ%8FLPE2JFVn)xy%8Kq_$?WI<DRKSZA~I
z&mmY#u$o{E!8!ujXr0CGOu#C7mmrq+4+jh3!yao%tkjxuvl2V^{_0v}ncx13w-Ki=
zKfqN53vf*VE<nJ(YauT|z!MiCVE=VWxbP~h(kiBwY{C8#>oHkFatjv6p2#jN$#kL{
zF|W4URK`c_odpS=D7<}37ln2^)ICe%Tu=C?P|9`2&+a0G_V@C?c2?zQafR^zf9&;R
zHoj>7yW@)@PHB9<ar;ZaF_=czUElfQ`4@QcJPJ&s>t5g`^Dpp{c@&sN*S)|O%)h`F
z%%i|Gx>SMl+5g9L%+G}dFcQt*-M9d!;-jH<8}6C;)XZV%b$?{$<1>FebMMS2aXYCq
z4BcH1YOmfD9J}|ys6T!_eog$Qg27YuUW6-w1$@uppyP&wFuQyAKJ(uYtCvl^LehaN
zpIn_l>kRu+_a|(VBtbBxHz~lx!anmVB6y8NOrcOm%$$KAQ6F3{qS4WrBe?Y47k9&l
z&u@ymnsQgH8{aM%mq7ZKbwPI+7imI7x|0?i{0i>X^s4}qxRD?`S*^ER<Y>-(JaYW8
zI!qK|W^BmLC}bx>U^t?Ib{nZqN&b<Ud(`329M=5)%n^0W909u}JBk|=oSnEWYbPu~
zK-<`XQn0KtpO-=1Cs}u=(QJYF*)www^om<`b$?2qlQW0KKM8DHd?K-Lu0>Xx!<R#D
zW<Np(vvS+$%wgLgg7UC6igCo}4HT823H;>2fTuI_sk*9SA7<#|6h%Y-oxw1c!E|g#
z@uOpY8*Y`9_e;GSXlh9?a}Ql?%1Y#@!tppW&m2x%Ge>F~b|elvf-tEDLxftSAtnsg
zBU;#*7SEp6sM305&&;PrrMQ_-`RL3R0b(VHaf$$KU@;-y;SWiqh)<20)36ppJK_fS
z3VsdG+>bKh@obyoR`WgRs6AHw`|y|&ci6K;TK^F*%#4K@FA?(V1|VswbHn@|sFXKG
z@dR%SgD(d^sL8?0heqZk&h`b9+p0<q7dggohgwx&rH0Gh;pP?Z#NY}#*QTu25Q6lh
zw$+frwmH*6yB#-a5D#1)T>wZ8P!$s;k^}%~Y%A8rB8L|_qIQj<j?`?_5fttKxl{8*
z0Rpcvq-AJo+j=##4IVu$P_SX6wnc5<+ye|~EC&tcR7TBS4C*%KA;%Pl>E-wZ>oAOC
z=3#8A1gw+n;Vc+%ye__H4^5Q38&8-9*-_`+v0kQG-~n_TRnBl*KTWOq^`65U4I=Xk
za6$g7nrUFsb<6Qi$pQAA9;8RAD2D^7@4F{>(?l=IyeGMA-5e5{^xAcr*vNtsH_-?u
zGq5d0>L7Q9;vS2bLXj;;nrJZ<=}^bNM6RXp6>arvJ93t+#wu$}`ZcG9f_AXVUW2;`
zlv(y>P|tQ$nr{GWPg#r~h;d3AdYw4ap`a#8ovvMH4Z6fBHIIOz8$b{BGe6u$&-XzO
z_3$0o5|8??P4`ejS;us!<G&91$k+t?uOnY)Jv5(f8_{HJ?;&C9rYYY~nw^O|K=b!O
zGc`PKOVR9@I}ysU*p#tYKwce-(hD4mpz0GbRXG`<7_+I-*ey@itd8rdc`#~i9;N6`
zQ1uD?SUxo-Karv;3)ZqLXBM(xnnx`ucshH@ONToC2=dYBOxD~V<x96l=eXz+=ghlR
zvlBlAv+NSk4~zag9svCrJ2JDl?;;wduDK61@sxyXUelE(dnO=iuQr-APU}U}Ij*^*
zD~^tF(uU(iM~gf0S=8$Z)a#^<db#F-xL!GPJL+``Xv&%MyV5k@dXa$)&6ln?ewy{l
z;of6+;sMkP_QH2erTQyp?vCq~2U8n&J!9PG%y%Q2bY_mmo_`@y@XSdbcCVw+nVw;k
z;oI?M;yuhytOkqqZZQ&Z>ePerl6)Jr2VgHbUaUWN^;VU^N%{L$QKVFZ5wZX|fIwZD
z$Pu-@^uC-Q;A;rpPwy+>!qV=Q47ir=tobFnuOqmg0Xg$px?e@WFR}3mh#yhg>3%gq
zm0%Zv=u+m*8|i)%!EFRn1Py{;0{|=W&2;}dfvgGsAG*c~ZY6jd!9jq*lZA~6Ey1|2
z*m+`S!E73e_dnok&Q72VC>S-E9Tjnpl%0B3mlW;nop5!RB(vB|DGkfHP^MV1b0tYw
zuHjrs67|P9lO*O+=S-4t4(3df?DgkNlI+gsOp@%UF=!Gd^2Hp2ZzY(Ef4mKAz2IJ?
zApdv~lmTf*VL1%qF3;q}<bxO%20xh`V?roKT23-URzNY)nn|!cWrm76p-r_M*zkj5
zrgE2s^Nf5eQ>jp@geXjMA0r%8s`4hxCoJh4eX6C%T~74J6)m@9vZ`fuy$okdkE>Ja
zBi=Hd#SD58M1d4e+Z89m7elO^In)Ee7S^6PJqA&-HI#-Gn~H!Z7ULX2R?k*gjm122
z@g^W=R8H35<UlGP)~@$VY=@rX+`01RATi9!b#U4RS2<syZH{=V?V{(nNmX7t^c>?>
zIssM3iYhun&v6rV6;*N1YggNgDXHwjNpH~x+*LC{GhzNxUC_=4<#jI<{Yf>89e@;y
z(qLOQ#?tVipg00JRvok^XyxMp#j;(3Tm*1weauQyQlhA+%9ULSE_V_t(^y+sF*WHj
zy+$<<ElikfX<}1Y-r0aQsBI#A?YtBnXs{TERr2(44`ieYXtgFgG&NJYr?V6ulius1
z^JqH<%?~}8s+ZCPZ0;McO7m?&QybCS^lQTnuNqTlONtb@+QkwYs>A6St37V&q$esX
ztxhFskI(?Dj73AmiYcNG$uWrFu!!puDLcl`Nc(aY84>2RfJ4aWrhU1ooAxEUB2~Gc
zy4t0Tci4R%#?6>PXAR>KVZYbUK=Trtf7P+vs)n6#$%7m!NN$o(TM{xCA$eGgnoUgI
zhXw0Zf#q35C}0)S4nUzYwTAc(KQ{b2^_)gs^&$k2%Q5w<HYPql2wMn!{i!o&RjnP*
zRvaZ+&63U`SVORu;J3J_c^AR$1iwx2I|T10_+5fK2!4;?Jp}J1I7A>)#dp$`)FG~j
z8AL>u)R$2XM2lyMXfX#^!jE=yT-eO%6=lv-B=s;7w*3J^&t>=p>i~`|YV-;aAr`$N
zCi~uS;OZtWe@z64dw@uW0^d{{cOv4$D2u*??b42JFJ`R82}wwD8N_j=aX+P7!sjn`
zcrGbc&PUE+`zig_W5xWlQFN^75bGx6I0>UwWgLfKv*yDyAA`M`b8tg0Zu+fP?6Yd0
zxE@);8~TJjW>|zK9AjESb;F(r+zmp;HYFBN-H3%7u`nr(kYR}hR5xPbM(nV}LdF2h
zfQSV|EFfZeeKQ0o2yk%ivV%kp?q_)jm4O3>=wQIv!EH8MGYo*4Y-h4Ic!3<nv(|P{
zm&z2*NT6==gui9|@}!?8SP(Byrl%bo)LowN*Q{TjQ9Lg?#z8#$;4RVW;{!fsk-Lx^
z#y${8J3JM`BY-rqGKC5QqQcM%RfuZC;tr|}DDGxO&K@$LS!<D}3&+f%*mZa8`mh|f
zBSy3Wu8JmhKr|kf2?S(Y57UARE|JbQ0)*l82m!(x*3b*=^&CQqNoJU2hRKTFkn_v9
zh}cwj?u3~c80?8$d0L1;2%j~Oorj4SoI~N0Y2`Vl;gF%IpSH~NxO-dj!B&o>SBz+B
zgBJt&@s#xPWDf*5;KJ|#OE!5*eq3EdZ>};~K`uV5k4+C67>I}ZBxO}o;=z=7Fpj0d
zhllzkC8{ogni4+5xv;~8&cO<?AqN`aM8(n05{yM4mR1~@Xn0RL0qJ<g5&;Hc1NXH=
z>DCk?kNLDjqF_X_jsiIp0^Tvs+Cu&SO0z%=fYHDkkLW_@;KFf$NpxR@eN~xkw76BO
znwdkU0Y~Kf7^D;+K8xBCAZgVmtsyBo*;FMdd`OBZgrI7^FC{_5V*VXpD)U2ve*?g1
zAw?~wYY{K(NfF>>#MRxOMUC3z5L94}m6+d@l0#tU=7>5UxXz-shx1_a#2GSq;sk^|
zVQ$VUI`=~fGO_0aH`HqTsVeL(fMnuI`r!t@+Fa?K1db<^XQ#y&s&}%c#jZ~jPpv(l
zE1`^RaT1U0`feq+Il>E$Tn@X(_e8lx6*msCWj!&kvIGZmK!p}oB-*JvCBolE-FspS
zJ7^auPrKOF_Rvr|sX~kJCTW`R&g@u<8{_h@lA?D>Upq}TRk^v;TP*6P$t|WdNS-mD
zXUjxsRACc*!}bsuL+%LSt#$2b^0<F9DN7Yold{xw`HfLs)8%=~tYn4l&{49D1D73f
z9W>ZDs7E8NgBlx`7pcM?NENO{`#_EPV0Oo~a-*KMj{`dv1u<-%h5Z-Twi?{ta*jvn
zeVuMw1Z7*d)ZDuB)44;v-U`Tzvanjj^k<~ft6S5nThp(U^f$Jp2mLm09#$xb^-M`c
z`#n(%xd)Lu5B@(>m61w~WH0p2vSl-sx?#7-e>G~j3O!8=KGC><dsYpZOKZzqWnz~x
z1niPlZ^U)wj^g&7=k1s6Azem?P`f%sZPZe!Izd(XZcxj6Ze3B^k7^5kO;U5Dx&^7!
z1`n0s7kX<`HCH!AHsn<Wv6<tV+oN%(ZR`!EMqFIJ4Q0+%$a${t)~OP`FgBsiQJ`kF
zwO7@xy}BNCbLCFbgz6YKY)#Cozoh<Z35P1?QEAe`bC%5U6pewXRY~bw<L@}VAd;o_
z{*;lIerM%g`y9;MI}_tHPaX{5{i3At($wsegB%8RXv^0fQ+{Fq5{CykKD`a7P2Rj(
zYHzK#$hcQGY_cKEqIsk_U()z(cz9Ii6y&C2_{rWtY?)(Ims=sDE!A^YHT+7EH>iU6
zArUZ`9KYM|#z$B~KWyF;yFX?Q$L>E7s~+ax@%c7~5UO@PRGnOo`QODrxxLUar)J?^
zK|p<Qh(dmWZc+N?_g_y$J&1y@S1TNkGw>E7pdyc$hz`UchWQC&Xx0LD93CL0Um*A*
z!IuCK`dPaFnjj?j2ZA3lgJWoLR#wwU_Bp!$hT!uAe@j5Y4U&DA?tcQn2BB(xK;MTD
zbHq^+NF1$5FkBWnvfTLr^A=O;D$)p|w0<coE6w^sx?V(ZK#Ia1Wds^xBZK|jRNB=x
z^{GR4lv!?1tk`}jP?*aAAj0WPg2ixZ&#t1F?x~U!jB}t##44y>B4Jp->PsC()*yDJ
zk7>{08Z7F9qaau8IY8i$vNn)~8y21cpLC02<Dp%l-CB281#1QTw-|y2=jl8|2l`J{
z)t6d32-fPdwZ1$6;RRLQ31Ow$DZ+9Cc~A=x0-FkoF^aWm^d`ZjV!@jE6k~*m;*2OP
zda5dYWT8O?8Insd_tVybvl#ZC+xyAt!uNLNX}y%g<Vu2<5nM$urw07{(Hp0*foaMf
zY^syNm@2q^?kR>b!)*2RAHsSr!!KA5aBP+o*inVZm^0W?rEuZahS>|F2nl2A>p&MX
zrsCKWVNE5OPMA3rAJM%=?%SkUffG9P+j-_U#+T+%r&(di3v*mn*mH?9mC_SWq@>5K
z;j>UopARc69v43sW|$uh>@Msu^Afx%;mJFPdD?dmPWv{-O>=VJ)ZiImVFbHn9|mvp
z>ab53-lgB@!#6h)0a*dq+KaR5!JP6QbsE`kd)%{wIprhJj1x={7^Z<zEEHa$V~X9l
zu}6lJuc+8VlEcMRdQE+WZh{w|v?-!+CY!JfKyrh6V-h@udUdzi&lorvAi-ILq70lE
z`4ysiJ>y||YEl$y+ji<?9#Fn|Iex**@l~9yF^JWZgH<18tbdMuGBt@{5eC=f!0Imc
z$#v+Qh}^ZAr>>S>OyWg$9~nozz1eY&?*juaNRg|uC~=j0z=J!mlB0!QTuUZ)GK&&J
zX>5oXTpDBd0MuJkccdegDg#$6b(BiYvx%=ytUxK)KkrzdstFGzMz|zjgjclv7EgQ^
zd8n&+ZjuN4X&g&zuh;&Jq`wloP{eVbo%&Kd=K0|-SIY6r@Z43Y!AQK0-?3jNP|X@y
z1ag$>dXH+Us%UcnVS^=I7rg*(=%K_d(6JPPcEs!`E`gG{T9!b1Bet9tJiS|v=(yfo
z=#r(-oFyi6=MguxT9P%=T&fr=a1CU@r4(02RF^10LPkZ2A}v+Hc1b^Mmqf?)BIOK7
z(L}KV_M4yx(OG@FvM6<3S=`9h-$_s*pj9X15zM)~xEirXxx5$vkyu`wg5^cV%|rf`
zHH;f3@#(*Kfw2yu$6jEx$-GX!wqcsB-K}a+wS8QfOs_7I=o499q;j3``l4+V9rt_q
z9R1=aN>9hF;m`QmhNP?F=?{{5yvsiu`PX<n+`4@8TT3u_RRk6t@ws>APF&&eM}yD9
z9StJn;Y|aPqcu#iP@wx#Zb>2M07D=4^W}*v0r<77%=~`iX4ufD8}gl*k5tq>^Fe>O
z2BY`f219V+Rj@pOi~-Uec{G70C+J$#X9AYW%?+`kDE^r{nOn{_C{l5PqM48T;#BnJ
ztHhoY?D*<W(Y8OtJpxhoHxMfVqM5^xraeya(acBSGT&l07eXMUgaC-P{!ue4L4vG<
z2qqhW`3p?nSUgNs%p=HQ9wYc3!S@Mdqb&R8zv3SHrpGa*^i&L*0F@LEWD$u6@&)nO
zlQ$3nA(?tUnD%_cdZcxcr&`mNCbGy)T#<X`Ff!OQGe=TxBC6TmI1Ggplt!F5FO#@G
zl5!uW$cMd4c@Ul=LBdnyL+mb`{~e=5NglX6f<B7C7^n~412Hy*l<3VVq?8AwTsKoZ
zg$%&L>>L96s2CX+RejX>5F?t3F)%ai0iH$T=%G+75ObqCNDf1ABy_R*97d>`gizdM
zLq4J*lrh;5jD^T4{XxoDUr;8Gnmk;ROTiT(iM0|h5#4=;R6+PfnHY((LP|7T&78qc
z&shLSfW{>XLNo#(C3Or(lTjun;jpL^Gn9}H!ZoDuWDEkzsKuZ<%t4BmPy84PhXb)(
zDqf5RiI#y2<bfGQW3bHV-VP2HV&#CZ0k-5_IL!J&s7EMWj+=wyQ4bOz2=nxAU=o1k
zx=|mC7elFX(1)d)^H||5M?@B-Fe3rpLE1d!`a3XuNpRw)Vt#`!&kY1TUDe4vOV<8>
zEg7mUWWuk}f)3ARY}|LDa>}a_e$;Q%OAeo~_S%;8d?7jHFUK#q1a*#<^d5w7_(9YK
zwrHCG4$I}9LlA^;nV%9nL71i?B6iA&SjC7~B~Ot7?vO0tUSitRSp=bqgwgF4M5qE$
zUMwVu++?RmZnD$!Fq@n)k6|fY^!ie%)vc-3t*QHwn$~ub)Wl~~2<sgNqQMZ(is9)f
zhRo>5O`*pJKsRqx%)tD2KBgOHy>U=MMQA5Mgl65O3WqpY%UfA9?_@a&Rcwo2WCab3
zLs?zoSndYaScEO0Pg<!UM;0<Tlrcx0Af$K#)XFWx29|%il)dB_WosNo9UlXhrKq!G
zuIY;9)MH@LIAW;-%QCbD59*NO(^7q=ZtXL5Yo9Go^+zOTV}C?KsEm&bF<^U!qN@$W
znr=Oe%v?u^l+cz&%Q-{J;W4$SuiN_Fjt&*d(JXmHfu^$5__u<VGliCSJ_}kfPLDMP
zMfB2H&yH~;qL=K%eG@N-;Kdce3pb&ciQpv~MW4ec!u#COLQ<pl7Q)ulh43Xi@j1Z;
z+d>dxUBl5YqD8thULQs}FYi!a(OYGEm%XKKSmXjNcQ7}g9)eR*kW%FGuG4u^a|bB8
zK`1yIt>v1hn6qn{8Bj@KmTSHu>6g$Q`~dS<-a7~GoGHgOp*w{KB0i>Cqp6~iucdfr
z>6n-KFiXp7m=|-jCxlVQ4gA_!hhoaf+V;7-gTGRxLhqY$DmSb1kEfi*&9Z8}erq@B
z&5^8rgeZeCH{$yRCIG)|hHFI?BJE`q*#V*r0y&G}SE~*HcR+WI;1E(EbCd2d0DOa?
zGbKw}72RYbl_oA(_c57BiX79T+b4Jl0fk7A;3Uh@{S3XCKqMMsNBjUw%c^K9G5h^N
z^EAo+Gk%AkBAULG<c1QNaQRTu&)cE{HM;FoY0au`39u%b#jPNtLS;a6W=U}*=1h}p
z?q&;c{wIsLnBe#VoLkAJ_zcpD00+viR)j;d({g?U0vHu{z$%c6NcfCIFH6N{5Djsc
zkqTFPB97OF(jP(1MaUQt_6WvA^pXyvyJ(GK(*QSFA-HywWeYuWf<+BoL;SKV1Eb#j
z!3-=fD1|s?>8zeQrbOle4v;Spe39Tw1aq=Xb|Ll^Y_XI?rq4Yc3=_r-vl-KW7*9Qy
z;rC1=GACWvfT`D%K;~<}CfjFf3P+JA!Z3;RJQH@wf9VQWJ)SAyD_#Eoni1A$Wc*yX
zGsceVZoHX3yez_-x&HuoG9MjWh#hjR4tpRjjB8MrQrrllcBHtWkf~NODxZE|O|KE8
z3!An|3xJx$*&1CnvDg5u;j8c-!C*qgT+W$(L)c3N-ddBIow2j_xKJVy_a6X0T<IC5
zdK~Y6kLJ1mKn<LtNzH5>HHl}SW-LJGR*+kVjYnV{4;4;R6V{Ptvu;U3wbx-Lka$+D
zB}&ZA26ng-cK?CIU5nzOu!(rI-Z1z$`mny5Fpe00{{co+piC_fiO1Ol8(*n29xA3#
z{{1>?tZt2E==}#o3qw+q1ViO#rq&0s!Dq2adWJ9=cvHCJ<r~E55|jA~oU(%#Jcvgi
zVZ#i*2*sk|E9UReYv#)U!r@?;kS}q6&$zEdaqK8d97|N)UOZ~T_OZ<_s|A#SwBUVJ
zf{L&L-~d+ospX4PVY2iv-yv$U3J?U$Bv-t|#p<v}2y1#*4B2XWSIoEJl8QlH`~Cy!
z`t*C7HM0(13(>Z%5aA$vqS9+Fj>20Ee*wcUA-Et4XOW_lYc6ECu;Tui<vmIoDZ8r;
zBgMZ&zQ(8DThnV+Vtp1-Z_%Xe#P1;w-VPimqgD>AzBJ9};83j?VKj(p%ngs1kP!F7
zR*j6?mb?XRzFO>K(wRGBFO%|CSji|HL5pWhx&*C_D0M52G35s;2941CV>Y)%MWzN(
zp_1|{ShO-ks+9mOp#@lynY@*9*Q}fTk(oQKNBFRUQb(E)Cr;3Yf+8o93v$+z!NOBi
ziX{mMj#4Y_j}x<2HO!Gxvb}t-B|{0EUhAq>rhu8F4f(CMKV4P9iY8<rQHisvDP>+@
zo?7gxfWR?{WtG*%3Xe%<sV;eNN(P`ML;N6k>~W%#I#?I-@&E{Gu$DuQy5(kbG%kUU
z6D=yiG057GVT_>a6gAC*VF|zgKrjp+*<)2&(ZP+u=6?ePa?)F?v?N)D4M#kzgE(4I
z2<svtwJ3yjva~aZR7>S3Rwqe;s<pG5pr39}IIxu-X9?BF!I@JkI&&vnVHP}Nz3WIg
zbiAGjitR1)O;-JWlI*n964T6`H8IJ9mQ_aHTlmr=1;`jPZ^N^V7spqF<Lz?%g7ZNw
z_L^eNJL1J9*O<kZ%ihYv%}|iWWwRps&O*qJT`WVaCGR){`*oV<nf6==<*|32c7<ET
zBg~Ro&5>Becfnx=>#&~NmIGXqt~sI$V3jMFBOeePtcWXE00)2<z-D%A$JMaB0JaGJ
z+=J7w$1I$tbEm=UgDWF^og9u?7b67$?68Y+4QdAVt_!#{CLLGsUC-TL@Sj9}TvvNA
z6WjoonAQ=OUM^cljTd+WQsa|wxm(6C1PZarwebHGd1&Ya+_k8-=~6t)vy17tS5kG*
zVpByF?Ey|28F9?wt~d*pqo8$jaD6Y^xf7?Rig4nZSj*o#SLnBw?TJeJ=7|R}Qh3oW
zct2?8*Cp)?8ZNahoi(?4OTn33ij+l9Rq$`^N}pB12OsF|NO@Y!$;(C88TyYQe7V%{
zvt2Qqt~K=k9O=ljbIcFBraQx?dpFXpL^^N~x~4lbZn3<idaI<$w~q)<Q{`b{yXciA
z-P^lji#R&q!lqJnV$)Q4j52V<(au>IByuJ$o$fv81e@XeAbguGck5uP#k1xQ(c;7m
zwRN<17SyQ9J$YBWknwEco?Q~rW(_N?wr#Nm=~d}{O_ZW-M6R*vsEpqJMo_iHop=do
z`$OTtJuDo!he0EivEQ7c(S_A-9ev`mSI@p@Hpb3TiTCZI>B6CYzQf?IJva^i_n`&!
z=}ZI_dtP))JMi6Tdl>Xx?+t>Q3)`sqrllsw>#@7!%uOsq&TnG&7y$Q=hkyoPK_2ss
z<-wghOJzIR4L^rfw(-?ChR!uOWZbuyFZ28Jj|3sXHwfO(oF4+f7iy;{+<?O+AF78L
z{r3c9#=(jVw>;)+4EQ?1R|x)&;Hv;A;~(gLh~NM+Tn~2Ix9Isc!9Njvhd|V|zY7=Q
zA7Q|w1pi9#7=bJp7n*;j`+EdGAov%8e<S!lK@p!{^MB}ig5W<1juL!>)%h`9PZIou
z;3)#~8lh9&%ZyJm;Ok7u>&i@q;D-$RcLJ`^t&TXoCB`}gqIlkG`smIQxCA+ZJORJk
z=06Df=`9d&HH@ZNLibXD7JKT`?^6SkMOi=Vy8kq$XkYxBQyI35fU$#%&FOTnAUK0y
zCBc~lWrDK^))1^ESV!=Df{O?)Cb)#)1q7E8i0qI{4)a2Kx!!1Tjlo^C-9+ci6uVvI
zg&YUM4wYKme_+;qhvBlJh#Sz!=ZtNxE61}uJ0Y1P38P3A)aOW+C~<{zq)2f==SY#L
zW$8UqhuI7?AP)_*4xM&ki3%II3tu!`XsYA&%5arBA44A0JyKQ!R<Xs%5FACb2KT`c
zaZ1~)vs-SdTv7-w=VB+1O9OF@U}ivgli-CSbR|Mz6(@3mr&-J?ceO;-E2oqt*A!R!
zWpSln9>1KXEoJoy)pjQzTkIjX71sjt0zhzU$t5NC6%-VxOb>1=)ajk4U5P7746g+Q
zhjejA4<A#njMiz)WdtuKcnQJf1amS;AH@5!oQ;*zD|6ii_;-e3+%VfP{l^#TGW>#T
z0FJF!M#Xhri$$~TUI}&;`f$;1Kc4+u`Qf?1JIPy!=YV%I<TZ=Jf$Lg0oSUe%Q^b&P
zQ8n&(Ow{qJ*;O3|Q=&SwyJ}j#cU&_D?RFTBl|N-J*(z&SyggD_rGA3Nbj>&$zp3u3
zS8rlvg4V8GwVSH2%(Bh8aIGJ25A?t`Yet4=wusRgnC$41Z#MSHs!8r5ogd<kEVFU?
zjh26c#CBwvjWcMWSL$RxooG9B&pEpUsDy(X#)}zGxF_exnkr*HdiHkY|9qIb`X+VZ
z=??ZzfioJ6)Pl8Bg?;Kcn%5`zb996`35S^ncx3euI{PNW%;ex>Qv<Eq4Xt2gcmxmp
zPx~vi8!A(Mar{^<@$5@`s=FIWOe5_9*CQ2^_S8m4l9+w)^hf0g6>+JRS|y2Ukia-$
z6~3bj021m`ULn2|A+1)YVs|a&-j{MWstHBa+Kq{)miFvRdm8Y>c%hLNyK5=;zLdK$
zcC-{xJXVX{XwKM;){Na~%-D^#jNOft8!Z`!qaly>S^TVe>|txf9yVj_kyh-rd8*Z;
zk+YUK_a)9oH7*NH6uZ$vu^SB(yU{)|!s=M;tR>EUiIZ)U>PqX@ew1OX1GJ=W4VQ6k
z!`033upoF~+d~{^U3H*o)q!@kA!98Y5;@SE8p5_L5|1>bxTF=uB~2(UwjX5_c(Ygx
z9a0`0QXU;ro`kR%35gt19z*yN6zPx`KwMHsammORmzF1WWqEW+d2~p5bVzv;!eS&O
za!7d$k@D!!^61j?Bspp|RisPD2OJ2&|D(O)lvd8Ob*iv8BCTzo`|J}VMH-*OM7tw$
zaDYe%Mu+AFr&e*;=RW&{J@h<Vgk|tqvrm>mKi*hrOunt!1(AbqskSrA5F8d4%z=xa
zWe5&DrJlgaC(GcIW$?)|B%T;0LFAA!BniuqG!_@jpr2((0z8Qt9mXg7i%)hJpX@F1
zqmRUIpX@67IT8zaO8e6x<B$#+hIB|T6{};mlf)&dioPF((Ix$sV+P2N@IY<(KxO$r
zp9vl4AU^q4OZoQ6{*t^<#AUQTP)W&)2g*m)@L&z`U<L7D{pdrr=+g%)2OsN}M+}D+
zo<cL8QZYPbwC6xM5#sU&eF$Dyv>{#}mJfj!K79yYq$FP7SG>Njc#%lTK^{D*jEZIe
zL@NNI5vZP8JxW?pWjxpdBvK!&Kl4zj7B!VT0x?cq;6#2<Dv-EP8gQ{JAVGva1Q!Vb
zF48V}1fn6CXKX63J;tZ>SC+NUvhG>dJj;4#S?et8oMnx(tgp-3PQm^@KHIL$7_i8H
zL(6;=tA5#{;;WAvA3zlpWp6y11w~U?(vt->$wn8c#>@`v83vsxHy|=6SUb4kC{&O~
z)=uGOfE%0Dsaz|1+|X)H!G_MrTC)L5|D!I$0TImWHmIHp0cds~LDjE_SdC$<5doyk
zbg@Q$1Bh;@AYY58_NxBADVWGv3mThi&3|x9^yi?;)VSGxh;@>^`TYokajJ$~TwylC
zcho`p|3vz!EU1K9Zyi;t8`;D^Ni@8*Lf8%nZywEp)}szu;Bf|zk{huaZ4XMY$dM;z
z=A3#VEl05U*y>E-LhyZ~5DuwJ4IP@TAiax(=!>-inMvF*t5uyu4@^pTOgU?-s--M!
zBT{UBOmLJyDks7b_JvspQ3gq^bnqF>$%xQf9uATII|)7sfZof4GQ8^2XTb=&vk^4c
zV%P-DHkk>PZGc5(CJ&-;a%r1AP3Ytuz&~&pSKyL*2T}+a{=v<Y&Zuh^dbrX62mCfn
zESx4^yyp<KA`fTw!!-PUa(Q$3<?$=vS41qNC5JOGuhYA~1Rg3hn|FL=f8TU3WE*{u
zV0hDgxFI!fo}BJQseN+uXAT;{)~zy@Q75=J6<_Fx=Y6S1!SReSs8nJaJJ~cIl;7JC
z(u=yd;P+-GUNqtT6fn34v%!~9OGs*VY)xtjxyZ|(8Fr!V-JtK~z{<mv-)zHr2(4EF
zkK8errP`-U`$4YrR4PT676gvhKQkK9hB?sbOuWIr?Ac;KO%F}ucGIAV`a%xVR$m9^
ze!-OxX~X%c+H|Y06MR3FqFa5lFjVwR#|(`iWha`?qP`XbS`D{>%BX?dM#c_IDz%NB
zta;Njqf==7GU$90-!@P6q~$${Bv5=qtefv9<s0W>e3kUwgaiUI)Q&esr6cPhN1%K)
z4}&k%6~2n;BCHq>kY`w}WnMe60XUJC?>o}+rquTL5{G;D7eTE%@dipZQh0elImRnE
z@0%_H^L>)$B*6!FDh)NNVdx*y6q%Dmw|L>y7viltS+C_v+3w(<!-~a^K8<<%ddMUG
z8^0M>6I@BK3Bl%6ej4!cP~P;`!CTBVjC%#as|kLY;57uVCD=;vN`h+%eu>~Zf^7uX
z6TFIG7r}0V?F9dw;B^E$2yP%4BDj&DLa>uyn7|{b61;)njRfNa69k4JAowW39}xT@
z!T%r_A*d1TAs8j76O0k~1Py`@5`2i@!vr58coV^|5xkk;*9mSVcniT>34Vj%HiEYi
z93=Qnf;$P`M{pOx`w2ck@cRUJ6C5G<3xdBS_zb~c5qy#0O9cF|n!h9X7{SL0{)pg@
z3GN~Ip9G&E_!EMA2|h{iDT4n+@TUa-o8UfzKO^{af*FGQ2|i8mS%SYN_#DCC5PY8C
zZwVeC_yWO~3I3koD+FI92nqgy;30y4BzTzMYXn~>_y)l@3BE<}ZG!I*e3#p%DL
zl;AOfe<t`I!M_0D9))Bv$AzPLiczwxlNF6D#$*k|6`A2G&-8ITi>ZdEnV4%m)6cM<
z(90!^YN8+DkbfnCEQAIU`^;GT7%yBk)XuaX8XDebYIR&3Kh$gtiT)&oCT)yu@@IF0
z*BA^L4G+(om11r_w2ErZo|R)Z&leXvbQau&?~H48J5#lBdAr27acsjiv+{T4iH7IQ
zO4p4&+oeUN71^9o(?YTs%Z$SQ;Sn=by=k}#a~_^{8cidGi&Ayv@zBYt7xqidBHu!Y
z2jsF;Fsz4%aZEBiB~6h{X2htHXy?C$<KV0qtfGsemA66z)hgz@5ald{fG7{k)FoP{
z)_<{8fijfv9Z{8Egcw@h=0CdfA`()63F0D%htAe(ev>qji=rh-;m(~xMNAeS1Wr`S
zXC4(l@c=+nBCRkXRuujF845c|?L$JOPHpRas?gz0(RKbByjGq)pNJ>4mtgoZa|rrP
zJdcBC4i|m#2}4HzZ+l+?CFOOUSzYy4b#((xv+qzW0yQ9@n?(po2n0fV3j;!ONupKV
zub_dZy7{YHxZ7Bk1VXZ`II^6KZQ1d(khKvjaV%NhWQ`|CIXRiwM?NQWPIt}ZoH(9w
zoMgt6apEjW;^h18eg9TVQ__s*jOPqZ-FM&ezyJOB-S^&o+r2y+fOAt#oC9D+5XWe~
z2`7_q>j~$jl28<~jksx&^HWsZY5h<H+ALByX9@!+x7j@6p*-S#9uJ-d9?HY|{X7!L
z<F%6y4mQ|QG@ukKLZwLYP9JmdpMXq%@<%7Xb2@>qa76uS3@06^{xqg1A8?+;sTl{A
znWqzn<Jjezl)Dx^lrKC~n2;!D_8k<12KzcPr+T4gID@mn8JvYPneJgxN^fgMujJ?5
zkmZ7#3wUx^BoLU5I7qV*P|xohJ}9#hAvX&Gs|_#|!Ng^D5i&;)XJF64nq8m`1~p3&
zdMgS#@rw%mg)8J`(Bt3N*MI70;fVkJo(Tt>*5B8@mPQh46avAZ173s@ptbhGwYi3i
zBGwot_-6m1pePXC>ZU|pg{ZI};Z<0L^@wm?vz86D6}=BL5|u_1!a2jjhh>A}fbNV&
zWYtO|3c=PD6hCCP>GstDYzk$1^L-eXGAHtbz5TdhQN$&S2l|f(tcf!PC%o8xH5pRN
zghE;W0(JUYxr26F)63c5MtIL)-|*6ApJ(Kv8r1VQ>=b2(rG^{I*D-#B9rGx`c><AL
zS93_`%7;AkZaja#yM{gt6|qhTy*!H$4DJ!*3uVU;wlJlfPlyf$rD+ud`xY#R$WD38
zw}>85y9Yb*3&rq)=h7}b4KYMgq4N%@>PwDz^=A)#tgCo!q^ITw`aShr_@rmX%8T?}
zdUKL}n)YR0pC&yu`ZMg`OFB{M4F6=Rc7}@`d~soZU0fJn7k9dKXIOWpb!VB`5<kbf
zbIm;Q&$sRZDClLri>$jCs&(nlSa+#eCjRBtU4gr!j9+QpRpxT>w^?_!StI_n*1ZBM
zOPPMXbvKxH@po8vquC_>&DPywt`z@P>t1EHiT`TrUSqBm|90zMXLg8xr**G4yTrfS
zx;L0T;=j?lH<_Eoe~Wc*HMfcXcI$SUJH)@&x*st6#J}IV2h5$~KWN>%%-!Pupmnq6
z9`WC6-TTZT@poDGev=b_H(h&LuP3)0?BTbw15ou|#BCdBgBZ-Cn~d~hN1<<Dx)nK}
zlF9WA^ya$rgQ3oNXY_$ny--=7@r(~)Kq=xLgJ-XLmzA0QcYFEC=r<TH2yb>(eYO8A
z#u7;8w1?E6!Oeoa6cfGRU~f-9de<X?GMfRnpJHFA&=sAxblRfPkb_*LAfcW!Wa}ra
zwNU!caPJ(=Kt}3NF8^nZ*!(Z_z;Yn%ou{PR{Tch0cfPDapDZ<WYryYlz|*prpg~{J
zpih+=j%dKE8t`jwFY?f!*EHzY<q)I>eN}@V7U%n)RNudW8Sh?=e_ca9U1}K7fUjx5
zXQYqTps#DtXJv(3gTA3b&xjpf4f>`AeNI;IHRxL!^m+G)U&J>w=r?3}UsHWsgT5eZ
z=Nj~;2L0Po!+-|-I}I2uHMkP+{f9K*S@%OApa$tTzH?sAD{9aqn(8?j&NS$Z27R&A
za99J*YQXcQ_;IDl_aD`e7v4<_9x7_0a~kxby9@HC20f-hFO}j&``qIi@-i-uU_Vw<
zeN4kHyc=J&N<*H|kl&QiRf9gRL0^(tvIae=L9e)LG0bYvuV~P3$vCV*pU|M+mO)yB
ze)Z)B8I50#2eZ7gSyyRBm3ekQ2ivolio{xH>VO#@HscsVp?MbYpDk2vw+f*gIR1~0
z!W8g#9fgg+|G`n12LAVs!Y1H<=O}Cj{<n_8Nx*;RD4Y!ZZybe~k;ebpQP=|fr;fs@
z!2hSCuod`UISQu%{~wOR>A-*DD4YTOzdH(N0{?H0!dbxo(or}Y_+K~*=K%k6M<K@k
z_>UchSPhB)nWJz%@E<t}7XbgSj>3h&|BIt=5%3>63Ks+aQ%B(v;6HE_W`O^Rqj34i
zva<fAN`=)HQ(94rWnxN|i6n+|94wS!3<nFv4aAaG@lO*y;xXtGpDF%844Etbd1k)2
zf%vgl{7ax%gW`fGc6iS%hbjw(Uk0TVbOW(twfNVVwc-Zi#s=}Xn+|aUF=LDPuQXf5
z4aAFU#DA^XF0P0bc6i>I<FRson455T?#}fb4+ctac&%fY#yANBq<xrbM#j1zujqL1
zm!|5V%lUwexOSjjD>KDt#w=+l7dy09lo*c=7BQ|LJ#rLh^k@<8#Q)Y|yOxPQZ}g@d
zuvR3AGQu$8mvIc&xzOF50?GX7gTy{Su#eytf?El0Be<P_BZu2bj9t2W7lACWUQO%{
zg1rQyLi8HJ&>6d%iESacl0eS3uOqgefcNTj$Dy5A2f;=H3I*;aVpN0WUL_dw+eS>*
zbgw1Gk=fl&>^OQ2_c~%S0o*}sC&Bduy9jm@aKLqMAodYHv4_|w!)_#Y6T!^{`w0#Z
zoM-Hv#JCFV9we3}xQAdV!vrh?#<8hl#qtG?O>ZLhmwIfPQKeU}_|#6^Q&>TYTt={x
zU=_h4l4=RTVuB38QiA0K%LwF{`a)t02v!gT#Puoc-hlb+9Im;}N5;^GItLo%Z@@}x
z6I8BaT{HdB*qZcJ=_@5&50XUwfn8!d@GDZ^O?@ZWO%qP3H=Aaz-<4YS4?x#PvDePN
zbNkRLqysyE<ue<1sZ;8z)ni*G{D_8WSK^)H_R@tX6=7BA!3B<5$G6dObG|c>`DL{V
zn7ia@a{m)Ma^8G0*LMU%#b7VS2$_S~WAP+JM>xY^sVhv?j!U1458l~b&pX2zR)1%3
zKjxA(`3qM0ZduqLlR-ex?H8l{wWv+0%2D*8X=t70s$bt|o5Jh(Fv%(ol|a%b5U0Kv
z4&~_th)NUy;t*zd&q{X)FQV<s_8(M<q;cSiMgxnmj^|-eDJx+(SL-1;t5R0<UmfEB
zcpLy4M^#rWRb)Z!XrMS&lw3g(2CVrN-F~M3TmzE8)*uSz4lv*(fG8M1-cSxJDrEUY
z4-`$hOhAQOrKDu4y0!k8<vzuz4-x!;-aq44{g~i~^o}s%!vv=#3114mnK|PIs<nR1
zgrbI`ibmMB$vD~%6MqFd)aaQURyCW$IVj>x4Ce{}c>+>-7*viA=iI+TPYk4g2rrEd
zUwpOge)l21|6zhh07N^)QD`}3W}XKs3#?{WXz6?f`MYPCv=MoT{#gV(>IH}nS_GU!
zKq++;i%}>JTq=r1HGnn6FRm(fc($AGa?f}<K+~wakBS)NeFaggAgC%>1^Tj8uqyA@
z1cGV;pk4wgk0uaQ697d5R5Qv43bJ_O2tC15q$lGf2^2F>RSG`@id@wZeYh;)#g~Or
z82VV)XAABt{0{4VMRlK`IEeUdFG30`Nl*+#eA$bT0)h?jVnV2c7)6w)`n4M+rGi#M
z78iJbK@__J?FRe)CKG>&;I{xIl~uzkb@m!#zDn>q!Pg1CM({0wAuc(SuI{%zABpI`
z>G?=8oNw&Tx!>S3-z0d0paW0J)8LGP`yB@TdjKz}sK~QW6U<jgy&yV<)S}$ZeG4A<
zNwWH0Gy-8vG_z_tTP5z|D@8rTIfUn+pKS+U7~H=UN%GUok}BvXP}<76+l628C%`y&
zeKsZs7-4;36o(&i7zzvSIJAi4fe4|+LJ6UTz+;Z62Y8Dckg&Lr!Xd_v(@Di}(C?>E
zq8Wz`hLop%VKj*i$Ogpls(Cb~(JtgewX0mm#xg!_GGYdah5kq$wNhdmv(A*X^;ch>
z$-<vhq)k$JEWY7^@^7dtE4NmoZvqX=-u;ne;dFTz=)?nJq+mInl5%*hO3eFxI}V#Q
zoP1Xl52Qua?!urpaS$?w&16`-1ok~_Axj+-H!Y%UH*Tid1E%;+i^~^U9I8R^Ev@*L
zxcdUX#Y_vOS6tI8uIXuiE$;q7Bt58RpG`uIDRZ5HR7tUm=*3WJtA{6{`1U5IBUdt=
zlsF|SFA}uhSynHih$PHs)oM!KOEsVqy|<wNOLL<1HbK=Ou%ekVbA*$VKAwb9+Y1Pv
ztLh?bDr_vF7Bv-draFsMRE#02h2{ALp;W|~3JOi37J~==sO}){+lgeYsGUBgvS+i8
z^i#Wjv@01mOKh!>eu^_!Dix;M3&(x({n5)T#qoqKI0DIfXawCHcc0D4x82DvmbwT+
zk~sRf4=Qm}qUOS>Yz26xj&>!rMP8bqwnQavq%ZKx<M&!}q&Imog9EBDsKH1TH=yTh
z!j~OFOYwTUq`MZ1oNZrcmg87<sNWNnCz7Iq>-5{p-}NNy6|Tiwn(>y;N8SP}sAdJ~
zB<125%B6E-zigG&{#a^yrB~B7Jms#GR-@r;TXp1$J|lH<1IqX`$~XySJQpfs%H4u8
zu97mIuO~O8nP%cF7u1x1zV2%%(MtHJD26_As4aJ)V&o9}H>s0vqC{KRj;fC%2*2F4
zVPz7QH@R_(TBfSqHI>I<y@{~wpTXu5PN57AxdmB4lj#Ib4ZNpCJo`{+sV%hWcB$1n
z&a<O$<+PChwv}zV`O}^~ooz{x6Cu3vm>4B62v@*F+IUt}+ERVLl_Fd*<fxK^hzh5C
zd4XpI+LEd={~XseuBMDJ$apcn_yEFS%Ur~6)#F=Djj#I$cnaDQIDJqd|0MJB=Ok_)
zy+44=BQpxurS}AZDE|EqeC(eX`-j9vStjWAWqg6)y9^zt*z`Q3RFiLLmT^HJ7?cN)
z7AZuxh-5BWM65+dQC=sid}Zc=9I1Om?#WQZa(|sUh@2>je9vgULM@@&7nn%Iu4fr#
z4KrQ?yfmlFvekZ0Nb|05iE#&GmsG@}%FA_JxSNnvwwG$8k0`q+e;qRnEg;}zXVnzJ
zbic++g4uqhM&K)1Ksj#cTXQU>2VA}S9$F4t#&ldUn?40n=W5ifD7(OkLRAI~MZ6BI
zG{&G-!tQ<5>q}uXNY4$@&f{!6uRbdK<vGHM>^xelj!<NQKRHoIMZ&61I10TJhk>GD
zD!!ObB^i7ce|%TeuH#}`mS#E0DCzhP{JeC0mxgB~3`#UA_X3k6GotlowQ5YNx^Vev
zGVEsvK1=Wn!RH7*PatcSOIa5Ikg7R^*za=Lw-u3Cq~r;pgfj`hdC)=3*d9bg{;{p>
z!0-L&Aa2=zDsMu|lGL#oEK6$Kds>#XkG$t)N#B0s|F;ME|DN_?%Vhp2i4j!BRUWJH
z7hN+wxdK&_9EL6slff_Vh9Br1930rTetogff2upz*S8L;Tk(IWxUSzlvfj_6xW1?y
zSZ#3-%}N&shcABd;`0|@_w7O+_YFg~U8Z&+U0f8sonu=k!5sixYcpJ)$srC)FfjK>
zr=O0CTU0i|AQR*ZG&h47+|U)sRPJ8p!eCucAu8)9Gz`RaSRjM(DAm2m6L)G#@=g}u
zAi@0%lM&IE1aOnnbpt~(RDKm&h2V}AjwId)#~mh+8ws>2>3*2tG{FIYwqz-Jc&M*$
z@Mq{>Vtp51hh+2+BgK^D!@we&?p#4(!d>_k`RNe-WZ@#bXyF@>JdBBW)PQ8%%~Grs
zO_w~Y{UCfe^KFG=8hKQ_ej|@(#mHkLgyE3EMp$~OkuOeVzkc!wh|idG-?uwdzBj}?
zWy<{#pTs1>H*WcnhQdt5K@XtZ@bpU9AQSlb$u~rxZLA{D+WzpDR-n~w&qq+xouV-g
z0^CFJ1e^hJ@XKHZ_jdr=nnHFTA$IA<mx)~zZptDkAO#YXUR9Y_naYk4B8=6@C8ScU
z`fCCi%FVM;E#=1uSVoNF@{cGPahoq}*RjQyDgsN6ilrN7!tNvOIzm2(eAt9qx4uH6
zy+-gV!B+`hC#WeJd;zg<vuq))TQnDqy-SyIJuGtgAHFZP1HWP)Ky=U2B(OK><YD>5
zB+ueTbv6YYi+_SQ0>=WN;A!AE#VdFdaA@HYycu|lnJV}s;5f-E_+&F3Q@a_ZDSP|-
z0=Z)3lfF=);dWl-;(*49fj+1-fJG48Rltg}$@CX81H(8gE?bZk*yUMF`(TNMd1m@@
zg(E|`BRF09Kn{2Nu}6~04Q|VDgeYQ&ID##_q3(4!wz>Ym&>(h5da*WMSU&)5TRH)d
z{31OXDaGF|QtJ?a-=j5{n2AZxq)W1K+%P8=KkZ@Y-E5fSV=#bg!(ctfVvQEVeGYdW
zUI7_?HJs99y)o3q>ruthH0v|iODYUec~xna4f1X#<=GB`l)Ri&)SwxD$|`{aeM2tv
zRB`%@D@!)z5m-QGD04*9_U}iPbD`fj=7af)Gg0msE+2_5Itb48FFrCoQtNvE2jheP
zkGM}~jy!`f0gU_n4Y<#L`RN#z8idx+aGVl9LP-1l;dFbE&avq@deDy$O?Mh;+ftqG
zmyow8%G@3&Mt#nzBk>Hp+qq>Fob!C^vof#M;~HGUmji`wKO)XskK$Y*XMb=u^RcoH
z<4rWY%SjWc!^5{NL`T5i$kHe!s*t;<CSeAUQ;0CTCn8K4F_;!tj3Pv91abQip0Y$s
zgJ|^o>4a!ao$gwcNapd|@llXxj`ob?kHaa7Go(&Vojl{jPS18$C<K@S(mMHL0$s#C
zf?%l!t%9df5$>l5eiymB7YWK}`3;1%la|x*Xbjw?_Zm2%BQ~<6DoulyQk$Z`hx|em
zT}D9#R~Z_hWSewIMCe6kgqj10A<B$Y3qgN*x+Ky;hVs)<IG`UF)*|^a-7}$dq(A*Q
zhggyBcM;xb-wTBbF8zLfQcsgQ-8HDEY@!=VQm7MlJC%)#;Ff#)b8kHcA+k6E-6aA}
z3`;~_BYI#ygR<M(kBE|x6T@;zdpq}_+&^S}e3#&R06$}$=HGhEeaG|U;9-q!C)Yv^
z?J<!Cq$Nq>qi8RD=&eV4)Q|LsP}}0kz5R%46ZwzG4hMAEp475)H8%Jb5?xC+n2lty
z_-xyxyrw11t2!$Ly<Tlr_zCh1u|oA;$Mz;R9V;;0>#(9Dy-p|{7yzt=<#x*o=|~kf
zSypJaRSX>xmKEfdqTkOk9jo3;H`!-}%KZKU>C!g8DM&{@LiD}DGA*5dE<N7!_;hWp
z$&p;oG1&2-qXs;Pz4fRxXz2%pt1v}Dyg*K6M70ng6c^zvlQqS{$4bs%uBTT*hqBJ#
z@bR3seln`^sPNcyRBS2HgXa@4djv+sGr-`*AqFX!u&3mh;EMrr(tDKRJ$d&CbNf%o
zqiW~zbwu1suHosf7|d0nBSsfe&OIaZt9BwX_||5cu_)7Q@%s;AG^^QATrHS`iboi)
zu0|@JeM-0g6tTE42{5FstkkoTf`&4Ip*qFfrOzIdk=vIL?)B)bcKZDu-7+N8{Sm@j
zEPG5vI{Fb3Vvp8N_dkPp*+e0CESo_0PN02*R9-d-FRn{J4<ZO}WSyRo!m#oRt5}DN
zME@z+UqC`!aX&Te{uO!}V#*4(+sGe-kRh%AE6i*SLA5U6n<&q{L1kuO|BI&7GpN*X
zg^nz*TA5KnsdlY)Mr)N_ffYDH0!Xwguww&_2BL0sRH!C;NXLxs4<ul#Nu0uI3U;5_
zggcV2&BSluyN~m`MVb&M=J(c+CUT~p4sT!dE7yXvWb7+u1OxA4_l#d&`K6tbf?o<O
z;4bM@8db;|VyA!c)l;IcOItmgxiC)c@BB8Ba2jL3)2r7S-x-!%JKLsIU2f&9sWiDr
zD?mMZ)6}JC=2E6`Iik4iAY3w*ODel8LP!8vis#*B1YbkMOqMMJU%J6brd#3-&1w85
z;Wq`pR{R*&7;li&JxCb&N1E)wuUJCs@FrgQ#HeGS3?`ct4AKWCT?|7FWU+IWO^XlI
z&Nc~#z4L6dU>HualLUimekek2f)eCrC_tW+ooXg$Tg{a0G}DruZl-2unAR*VPiAMC
z>Dk$4Ms|*wnVoB9W#^gM+4*Kpc7d6jU1;WI7n%9l#b!ZviCLJ<m_^y8W^s0zS(06D
zGT9YoY4$R+EW6Sy&#p2nvX`67vTbH%cC}fRU1KiKt~G7hE6nQbI<qFb-mJ}Tz^~n0
zk?p{5BYvCk+l=29{I0}rD}Gnuw++9m@w*1UYt1^d9#hcm23IQKUS~SYMz}l7CbJpt
zPJ^qIaIZI8%~f!BnQi83xVsImP{O^zY&X}z-D7r`op5h7xH<{<CbQez0QY9I$J_|_
z7K1C3aBnrYm|NlAW^OaL!@b?$swCV_v)6n8?j2^I*$;QG!4*lkA20{aU2ylAyUhpT
z?l-s^3HN}x*W3s9PICwojjqy^oh~d@_1Gnjk*n^xcWA?wP2E?B)1!`2C$G+7aW-#I
zr(K<n`$p3G=!W}7W(&4qmqfK+ug;$P+LAIM8GBBh=hZo{&KHN+*^Ip~GV72$-O<4k
zbZiv3*~`6SlaFn=Z)B#X)HIrY^L=fO95Ng~Gxp;6SwY8M5|o0YkBpxkKid}9SHCQS
z_SlPK7uuR7Wc&$r&ZzU4#E+jFdr^@y<Buxxm>2Tc_&FPbC`t<BNP|d09}|>cH-4@y
zp|5<=br7f7Kkg<xeC&c72QHD=0Zb83GLmH<KR5n_+sL3Mf=L9;1Ze=TWar!|44TX!
z{G=e`kHc8#GzL#4Xd&S1B{O+&?1KH0u?zM~Kq&i3k9)`yK=4!yY4B|zo)qfwu@{Gg
zvIxaWS}RicsmAqQ-T22~*5O@rO8TznzyR{Zb7SW{<cl7nBp!Qi?D@9%5Z^ZbFzZZF
zyo<FqB(!{3-i0mJ7(b<f;OFcS{{_I~w3n26lK!l|;-#^d1VIg*X>*38Ag_);VUe*5
zUdTBw<hizlzWs@@R}n2`I_IOH976HoeqCX`dO8bJtESu;h=ma0*ONO_1EQ6K6q3U%
z<~3VW%ppFPV4em@Es{ax_1J5dN5~J#Bf=hJ6cmswAuk2K0JwY~sSAHVI2_qxI+7FN
zlJ#=MS1|lCfCGCXD^DaYskjTj;yM7VN^^k55eYdcf#ZG=Un1n=A&U6O0a?z%mP*E6
zLmq^3g~B$hu(@HK8QZ)&Nl;XI0!9Ewu+k<eGjbxLEVXJ&a7qiiA{`vJ&S6jR=!j@N
zhYhWvZkUrOOL9C%?XD(Qy1YCF$41dvb`1;_dIpCii>?zn_jnPs;>uGgDFq6R%F7r!
zwF_%ww#;4F((arJQgPB7OSQl#Y)iw)qDTp#*&vcWLT?VuU~#2)HxSYry8w%Mqz*?e
zxhRmLewWj6({Lui11=|5o`emoV;rgxZfhmUf!p_`;G=#?9&Lfv#ZXGtQL<Co+}N|)
zgRqfnAC)dGC?^q4qYrjr!4ec!@tB2$GNs5Dc6A*sz=R^!?QF_b$g(uAJkhD#DNRrr
zHJtCYQM1dR%Jt<B=LWsSz}3t-DX%H7m(~cKq0;jnR*bIQa7A!3G*Yy{eg*$V7DqlH
zT1DmBKv_qAn{b0VIvlPe=;}e+z=USUAnL)cA?`p8#kyECu`V3$0>t*z$%3S9MB0>1
z>tJG(s-E^FCRI{rvUYV~kF1fjrE!WR@&&k<+MDk=UThb`B&9{=vDlqF+JnRUT^sxI
z1)1{TdO%SYc?F^5UTQ{&$sayifRd@&IdmY0m+jiDR7VPp0A?M^>!aB#Q&)R0Yho@a
z7n8Nw*$pZAbFi4A*vQ<-*PuT1bGq6qT8m#3q1JLRS`%2mjk{Y_E0XAD-6}PJT{6}N
zYTyuBQxa*UWudN8LVwm$D2=5&CKfV^UyIU%{2bl24wAL%phH`V%qurD`4)gM#oUi0
zuiE75I7M>J^vMOYF8Fh3-UWX(N!3xCR6LDjJC>)Tq^e-rBvhtylR*}`arT=;A<3Q*
zrsV>6pU<>Oo@qteBxmmxX}K)rF?i~D(#5%Oq?NVEa9VU`=#*Yg+ePv-B(p3j>g-Jx
zfjNC`Yb-4-=W4Yl(naRwX39XjP`=bwo(_sSjtw2r9zOg`yS9V{8iv*p<m78pt2?uf
z`ro_Jy_S3-lRST>$19in#VktKL4FW~_<{DbWCadJUdur*23FuKJaEAjOUv6&MNYN3
z<LAiTa>q}@++iCK*BG#EgoYG%Z?Rzs)F)N<+F3g46E^_565x|}g-^If{Xi%!&&StF
zOP)G!(~?ilNBAU~FL~;Fep(0T(8D|xN^7Y6*lXTRKKH5ewNPo5``WOIEeb+Ku9nT3
zaz32T3JWvivT`O1GZvY#BNy}orTLYy@14UomDZNOv=(O%VB9XPuRFENt3HEOxpHPD
zS9at+O}3<XNVc4f9p^ARma`{&F2;jRmtBfYJ94%8wC)Z+>24LikbM*pnb_ewA&yA;
z93ktXn#S@1I{bx^%9g2#ZsCzD+7i(282OGad%H8}K011Wy04q6J+G0fBCyCXNve6Y
ztAlbqwJFDQr?!P}sM^B4-Z|Xsvo&wEy|bgcJklk@NM9WU=ne=_h5325w1--Y>LyP2
zcSxMcKFih3P~E>cvb2FL(rrj*0kSNw!56A(lzl=^NH{3jaYv*u8NU!cAz^0uZLyOU
zsyIU6k>9UMndK1{g+ygXgz%HKN%Mcd#Nb|sMhc;zR$KIG-md*{U8A!+WTgVH%MNAa
zRZjhR)h86lE2;|xLHFC~Re5{v9oBdqQbDcmHk4Iu7Pw@mL9u|tZ;eiats@vpC+IYM
z0#_X|L$MzDFVSiEU)PcQVb!K_NwQ2d3SE*cm*`8~PV#N-E;RVwYF%h3_B~Uh`!C-o
zH74jnw*({0>Uu^Nj42&<oK1w-17m6(eW!}|x?xvMfUep-w*NK1S~jpmR^h-gQb*@F
z!OZiLEKo<wkiBSc+_-cf7Ul(Sx)O{V?e&hQe$#00e)s(`L;Ej`8T^qH%us8l=d=ID
zK)k5h8w7JH|C2YyafhLt5o*nwf~P7vNPoF*Wx)IM%X!~huCu%z@6;8xL}^tG5dmk&
zySUVa708oCoQ%`&<GZSU-#mXj-&yux6TecHBSnbBTGy%?l0f;BwZHU*;7gw=|5Cfu
z<;@VcadiM+pRkLS=RpCLy${%!WF#g9sk~rg8Y3~B!98wcnj$gHBFXD}CWT_SxH;L1
zWQ)+t$~J|&nAksolI{{>|4b}HOqS@E68lGlp-Zk<UDkyyyR`5X)`cwUa0$x2iVv2}
z;9I)7_R3nAPTr@oDpnw|&XRa4Hx;h|8%7qES5r8vin{Ta!FC{fE8B0XSvOP>4khDy
z?gF!QE=5wlzb5F5*&$K1uUPmC5dj0VPleK~NQ6HcRig`*%tIg3ky}N=1hPDI1+!DG
zST>J5Q=0;}1r*p(n*yGo0$L@YpdF%1lqR6YO#wBeGvbopr6|%JqDaMxw^AsNRoObI
z(!D-PmFYNTcqyt(&>0T~ROt;<#b2o2&9-G*?`}{72ZzfzgzJ6-{<@#HmY0zAptAH#
zX{l$$K3&9XOH~T!A->s|lR->rxRou64XiZ3GKtS$o0zH^IUy;B7zR1B55JN$4VF@8
z3r!s?SqN<_1i{J-D%cLM*WH@W2Lyg9Okle_#bvX@==!*9c6~Xs>3TGX_8EV<?C>D0
z4Zw%a5;nyWc8QdYRMY5kDt3HONjw2q!N5VDwjyxuSlD<OihMv+go`|0ZO#;?cXy~t
zDr-Yyz1pbSTMYCUu^R;4$Q>?wsfp86ZyIHF*)4Usi*=bnj=C0#D-K$lxN6DT<g%0P
zNhYjU&8&94Zo>;~hq|sty-HJ%y28S@9e}n3orlbN;>ZS_M};I4CbcXW1s#Zxt153p
zoC&k0ED+yCas+a%Fm#1DQ}I;AcT>dJg<tXd;Jdd>{M{z*^C$Niv9|>--EO}>X%Awt
zo!}G(_R|mSK`$^79X3^^19wea**~e)+M^WNBosMBir}b}9dC7s2wmtzRM96RR3XzQ
z@Dr-6MLlBoJJzL8hfcI=4xo0&j=0_3laNM%X@N}wn*~l1IQg{R*qFk--3D;lXp4_c
zwRbYG2kgFq7+4b2+ZxBFVOcyjI^Fjtij1G3{=$6VGyOP6;)1kP9`h7Z%<@wtVfXGF
zcsvd_gP7S~%;+2sJJyIe)a~e8KO}9RVho-hou@H{!@%eJshaS%V+e`n#G`wc&S{}7
z)9LW@!E>Vv{I}qniWk^Q@iw@l3%!`pMLxFJ!;Up0^*czl#0weCc-XN?i2GaQL@L1@
zU8*rqei1u1`Kfpjw8dNEE@`~XdunvKkFBs6lp>u@KmbjQobpdVWUxX9vucHmigtCF
zRpYq{{)?z^FFOq3ADt)*M?1Sr_8PU-T*>xxBf(yR{RDRr+)L0!U<ghS6bT*xs60R6
z?ZnoM>8R}CBL^NT+kVC7lzTm2T`>(VPuG#FBTY{oX}T+y8>%di*38JC9k$mlke_CP
zLI-K;Do}eJ1?sLzZU1E(D<`4$<I87+l{2Eyc+y^3?V(tFb;V2~9Nk^{Eum;Dy-l1(
z2lC4A4d)}{!h}h50H{3D5@KN-2*JrTuD8matJ0Jc{pRT5qj}TCd$c^}OGO^)E1MF^
z#kC_GmmbICu|`gGQm{@{@fUXzu$P5D+OyIhEnN{y%MTEO4xt165zfyYhw&O03d8(T
zufMgz0v0j#<HAxLArj^^j7*QtHTeOZZ3oLwGVj6IS8Bw?#et)Jd3Pndh|3Au2-Xr@
zL9mWsJ;4TojRe;a+)2<wa1X&Df*ip<g2Mzm2=)-%L2w&EFTpW_K7xJ%mtcrsgy1y6
zPJ&$oy9sV0xS8M<f`bHi6MT?hfI#LBGR^o9y{8BQF+(~`c0}*J*K{A^W4}i5Ai+lo
z9wvB%;0(c8f=3C?5j;llIKjsVo*?)*!IK2PLhuQKUnO{o;FAPT6MTxG=EdB9fJpaY
zo)~`t9G6Zw4N#r}&Hl{^kHvsBItgbA*eLj$>a>V20R_YiZFJK3H3DmLQh+JJ(}IIp
zdCeC;4-yOi#nV&pHx<7b=|$<uY5tobuo18^ol4IY-{ka6_?ldnxov^!SQfTR4Zzk(
zUQ~(VHXm-D_NjDCah1#fRwP(YK#ASmN$?4RuM=F4<qY@Z1hc@!ZX3b#1UEypa}N{z
z5y1_R_C$|~+*y|M^HP{lJ6_8CmI0K~*P!JO_2suy$SfWJaMHYX+L~y&sb%i;X{~Ep
u54F0jAC0ZYkAHWIJ4xXEfYSt<4%pkeAFva@2PN#Iv8&(?x8Biu<X-_R6xlHV

literal 0
HcmV?d00001

diff --git a/summa/preprocessing/__pycache__/stopwords.cpython-38.pyc b/summa/preprocessing/__pycache__/stopwords.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..1e04dfe2f084c1f40b3c7d8dd91dc5d3a1c53027
GIT binary patch
literal 19856
zcmaK!O{^qGa+n{vv;3(wEy1v0$fr6P*o)@olVMnp4H<&HFtFImTo%>c^}6eIcU7}~
z-eW)LvivvCz6dKoOOeCB1+^=4L5MbQy>s^U4X-}>=8FNJ{CyGCZ)PRH<g19xtjx&B
z$cV_utnB&8Yp)d-{P*Ag{Lhv@{f`$H|I^I#|M~Xc@C^SRu)273v3lXr3#%6&y}0_`
zqwlRAJbJMD{-f`&UV8M>>g7i-uU>idO8x!%qJFV{q5fX|pnj=-xqfB!>R-P4=+#xR
z`oY^5kBZel`tijNU#tGt%NG|#wcC|L^F~?k>g&2Kucl!vcl9uoi@IsoWz(+edR2DU
zbzd&JaZ^_9@uoVKt9q=OU3u9}eR<h;?O0a3zOGisa@_RYwBD3QUJdo`x*p2Q=JHtV
z8+#gs%_#ZHuCL2Il*^k<H`Ha<*5z;<#(EE}p(}@OU*BwMDgmH$3F&6JEr(5aQ*NqN
zS+~&<`DwQ*#|{d+V=>hAUd|^t?`_IBN_i>An~ruvxo_5+5n{ZmE=MY@I&=GS(+p(=
z_bt?XyEvBXdMp{c11?)RTvlz{jRnG@5caHS?IW+pay9k#J#MCfJZv_5a9_67zApQE
zUG*#94>qrM!QIS4vtrapb|{xYxRG#?Qrc9(5!@-0kkh6v>uR|vFQ+~@Sky~iCEL@X
zYh?||4Gnyc-3|RR4fPE&a7vK=vYM1R<8Mozf_YW0t4{v5`?7{hDK08RPSsVp?qV3b
zBdw)7lt+Y9(F-ye6)`W49jrc^`Jri-^3hFWseTnVn@Z7#idYEOhsiluH?W0DsAXff
zXOdJ0o0o@OUBT<krrD99?j>3&x&DYo!0w_3t>7J=uB*Q3rlF`7YSyA=z>ILaF55ap
zitOt>qp^8eE&C2tA7iKve?d2)kDw6W8{@I$Mbr+7Vgq%5(*!?MQPu@B9q+2$h%Tc0
zm<9Y$HZbYTYX_6BPtC{(8zc@xs1}n1lgMDztlDx{VR$#8$xKC#>$<6!A!HQ#y{QpK
zfwanMT{SJ$w{?9e>v36XPD@p6H`=X2lg-flR>SC1A5zzr_Pm#;?xux$8O5`Xbp5{W
zF{2x#h2bcz0WTmCNe&RI#t_GB;Fri9G7b`*bW%*L1kM}WVi>ffVHfT8*nRku7*{(M
zdwq`7RE!!K^~}F5Xx0Ebj4-m@A%}&TIkkwft>3VN=o0#<&4f`bb!ubR$v0AfI)tYJ
zBKz4RZJ3`PEx5rQOf62StoH||THZ9;a5wdCm>tl*8%FegRCzBOs-(Gv*t_sH@Ta?2
zOt=P`m4~Vijqi`C8fZ3hRV<~Fl@xXteh*V-=BQx*$LM)qVYZi(qqwPC%3&R%3!UeL
zQQ+MUB$y-$TBS;b*=z4mW1X}dm918Uc}k$s1%@l2o6z{wGM$U2J~n&ohju~mzR0Gy
zhn+G=Ez)xEMR<T^-(WG-9*-W@hZ?kKJmNo=NESb@+eT=c3S*qrDY`&I8(Ox<vY~?d
ztZPOYuy|&KPAO?S&3#iH2FzpBOYG~aO+O%?YB94{4yt|(m`3TGG}_eIa;&!DJW!0J
z0&{3vajEcTr_gqbj<suQLx@2N{!xAy35(vd=B4$7cSR=sc>zN#=Ayigr6r7u;t(VY
zQg|fShwxT|Dtfbl9>#r{e4{$N>{n$&zQfJ6od&d5KzMBUd}CQ3P<02qhIANT*;g_W
z8igY%STrwnqZF#9GFDxyEuEgD`Y(ULdXH?Uw(?Q)y8z7f0%aLH7s6`4Xb|n#Q=uDs
zyu)ryqkwa!nscm{z6(k9CCW2&#}ZM*I@JyEgPLG|mJVYT05uAnZ%`LjfR3eHi-B%*
zd+7Stv2YMx6Q@F3I1??Xf=vgY7Z~LQ9Mc(^g1XS;(7Es*rWzX(TDq$3S4_Y0s>Qw<
z9__60B}w8KB*uaTWVJIHz}IGID{6~nwW^SCi%Ktd6JkH~7!Unp6ZDFM9qRRj?=sau
zDlwg%6D;#{C5fL)A=sDKi0iuIk2l42MW@$@>AE^r<$5CKfd^M?xCBCbL<&qEKvH6w
zL}5!*ht8L~&gBJ!$fmMlC1QD{;dDf*JBxwUz&d8sGC1pW`nw4yxnX>*Dlv4B1Ymd0
z#!xNbMo1-K0GTlZGa}S(hli1qe%%!dR)&~K1xiSUO;8`!lAlc2CxH}wsK7C({h>1+
zTM_;xQtIbaRW2J`A>s~%T*Xz9D3&p3Rd_GBc1cTIo$-AQd7t0G{+w!Yad`ad3XSV=
zeCTt#Z1&jF07<og#83cCNCHH2AQQ1Sior<4NIkL)EwGNv*S5(Lnq9M^;_xOdimo(A
zKvd}=xW`zFu+-&5Bni+ISYOb;JsJ^)#7rZ2v`ZOc;Vc~*c8no0EX@hT8*fmq#CA0&
zo1vIf*lD-wiAxfNO_8;3mYgdv0<<gU5l2@o>C|{@gxOS@YKg&Vy{mecHjR-YKk^`o
zS)Va<{1{48tqGn%n}(;;QJC84e<%$nbnlCv_f)VISs-=*uvGx5ue!bn=b=9#OnDBa
zi@|ZjG|B>*L#?B$4t@6qlfIf&e;#1yEgdALS}O7xM?q8k|0`#uS+{bI2z%EP3SMD)
z@#u)B7UE@f#N?{BT4Bf(m5bEOvt6ZX;MEO+@frcDgeY<Nk$@Np0Rrx9X$Sbe<#Sfl
z5*#iuSRI(N7FZBU#XhPlNOyY7aA7!JTI=&jMZIzMey9&GFbx5H5(28Tp}4HMkycwF
zl$tDJ$w(CM>Ar|8lM2=kp+Z>+Rio&6YFa}!p3#{`W~Jf6!>*y?Pt(DuR_G{7x2m;}
z0u*VaXVO=T#+vmKH@&KH1n90$=x^w46Qn|eXJ5IhSsWO6E)Bp#wPemrObmR+bcTq`
zCc?s2jroQ(NB|EFJ`xzq)T+5;q1E}`r>uN6zmzpQ33s5|Vq=<2HdT>?;}whRl^(}+
zAQpjK3554c!ye1U%HUx3cOd1iS86Yli8>cNWT}>fv$jTE%)Se6@VxUPZ2`lNKU%32
zhPX9ul*h~*Fz9Dv<(Ebz!UskQmICLSRbfsZ0?mceGk(bIOyo==38*VpGKK*r0iA4S
zvnB2`vMgxPPan%|6fs#dp2^Q1aW0(cIGs_FwIdTWT_SmLrl+9EMOkN8$$AD+6ZTBe
zbd4GWolGT#OwVJ>Owh_?5R9bjLDC3`2en}ymlcLHE17w&_L;`BKHzjFM(WH)iqmq1
z$l+2%uav&gMEyW*QNMB6HMkfsx<K$6F%@wYmYk89gETO69Sv;9kuZJ>H4-r(5Q7Pr
zb#o9=&=!GgX~;K+d%+61<a^Cg4>1)zubp`To*>ac;1%kzG4|SIP$M!>Trm3q={7Jd
z7LhQR<ZTwCmgZLHtYMtqB;Q6HO96{iz-Y?Y$;eHhsEHPuQPP3u&RmhrHwr2fN-9&?
zer6<zo_Y4w9OP!w5#i3UesI(xc+x)Hud+j9WJrP($0A0gg6Q%XF@Ra?9UZ0zce*UL
z-SW~HhA~r{=9V%RamrBY&4iZ)ghJ046Pr`fGGEd%6SRwjv`jGXGnSJqxo<aMPIFnl
zJik#9eG$`dcZ7pwvvw6ZNA(;ksh&;i?)^F3msjOgWt4Ko_OmOuWro@M{s^=^`nY1u
zu57!VkqeNRK44!UU4!o36mKR@1xrpY=$QSU8V|y~WB8<qjjN-9pr-_QZe6|!-qD5S
z1fi_KCZGwN;9_rt4<RO<gV%7@o_qSjO2~<a&^2IYd^aeZ{k+s0lT$-Tp#l0iOgAVE
z&Ytx`<0M|NZPy1InRx!-5rq54M3hGsikaX{F$(B66K?vS?L(}0-iNeUxQot=*{(zZ
z)e}>7Qpf=_6Yto_P}UOHaE9e5$g{;E8%u5MadXRaU=xaHVkZiyu?){~ICKND<R7ZR
zp#@^kN6s=^)Xc=8CN6{f+=#NGQPk0^z~~rF5=5XJ7U6c(IxBbyc#fTmXQ8y)yxi2z
zTjHTvcnTW(%^A^p-hR%QE;xo>HivVpP0*^If(~d!A}f*YibRnmvW;5LV_v9HZklO&
zzpKWqB7S+je7&Axo6hK|2h5c*5^mt#f&{FGB0{{{>}{2B_y?J1)p^CKIj0=@3Xhd&
zMHh357g1yZ!8vxpB66sg3^cc#eKAzKYjSLS>dJ?y0;=R74>zGZ@6<(>02yaun`UV=
z^dKL1;`W0x9iK=9AvpA)$DYDDi?SbGGr920f$xj$j&fOeo>J6j38eRiKL*S>$DS2Z
ztSbhglbkCc9ijGZ3VIFtH#D}36P-AKai9MDW}Pfu&O3NTK^!ckIqsOp+{B;rt1vle
z9436dz*hv@0+RMh3s~id75{aAcyvBQVqdG?8wR-DH`TMO!Y<*?fu)TS#>DUwwMAlf
zo)*Wk1_<$IUQQ#0kY;>-mz&VJknU`+a3gU~-z?|8LN~RbnIscRZXFdlj*NEeP?abS
zg6kxQJEtr;h-r=7Oa~90Z-WBsedJj7?aawcly74?&$N2hU1~Sy10ALnQ;35cYIjls
zWYRAK=4K34jstwP$Low8bK!=a0!|32*>n7AMlkUw@|-XO!!vf!SrTK(BphVo{1QEh
z5+;Af3eq|mam~s5TG{SQ(N)(P8FZ0?b#+T%O>|3OvgH5YJ*<oaOAc5@W-uV*x#pmR
zKk4T&SxkC-1)>BOE^IwkV}`HmTh6uk0Ol#1=Unvp?3`;}U#=7LCfAi>#+;3pZ<L%J
zSDtL##*T@7-Sb<Q2#^1Mcn@cB^u@7|Ct!3rpM)ji#OSjZ{Sa$7$%jwQg4oxyr{|Qo
znm8P1wm3!O@SjaQQ)I+4Z(66GI;+YfV0Nz49>`9wI{lRiQhxstxQpc_+T?=6j+5Qy
zU|<B=RJ(<nN*{NS%pz4p<XA2Ne92f%?jXrYGAIi8N(jsX_D-Y`KpX_utO8{{VZeN#
zBxdCBo~RH=FeZwlX)q_$#C;r#d$`a3wrn<Fz5<neIHfDcdR3KI9t8>Z0r^Z#nkWoN
z42GSUw)d-gyd8@z-)Kt#t+!n(i=vUBW$9;wJ*)=ssEpDHi3v;&v=Le@idsLj3hW)P
zVJNfY`!OL!84EZ&rxm`R7Xn=(4e-%C@nwb6dtBrWf+EO^gt9mAZ4nkgH1MP0#7?>o
zU|z9Y^I7W}tGj0TxnWQRU+S1^KV8|D8j1XL$+-D&>4U%mMggY^<Z`a<h)4N>*md1t
z96Wpsi%$l8B){q6t3u~DnA@qirMFwY&Jn2h#q?+2M_Hk6IOY0`=5Ww}((g;^MeKC?
z+{2Z!J$=NPC{ZRB;JQROXrM#Ps()2gc@d{D8pUPD85es&qMPt?&^!HtMiYw<Bv_E(
z`1L#-+Vd@yJ;dq52o<luXp|7=#?o{8d|wWyuZYB0nE)KjjqubA9xjlJqX`crFo1*K
zDDYh91JN%W;CdJ){Ti7#<?u%Et-;7#n79MG?v|%7aL+A4GW!FpmMTV{Y6<;M(QaZ8
z#@E&wPhap(fyfwyT!{TbV}Ni~5gf6EGD?Sa`bs`pqSYPyD2|w-<2)tNG#yp2f`#!3
zJFDn4F|^<;9Gi{hp0NZA($xRM3uSn1Xb@i`79q@=&LZA;uoWOWLTotROvX{AGZiPA
z^SLMQGRuj%aRZqK`5Qw_!qbOwZlb3)hX_G_t^#>XnYsBj<tfs!!oOSp!*uc_DN)$A
zWeOv4`g}ebhZO#TG7zNf(bQZ$=Fo<x&q+S(%FLPK89#eVB*z}M34QZJrf)X#&!<87
z4j5%-;iHHWSQDQ)+f?u+h??STC`(yy<|rBmLrEs9vHzsg=s<2V0ZWlUvgcR#weI<2
zJv>+YGCPkpgiClGtSY0wm=0Jzvm4Tn;}XpBGU}v~3(L#XZ$jRf38NCoUf@|BsWFcy
z)N`+i_*f65ffy*y>TWtI^fE#=Y@$CU$C2NsJrG8jvNkqNayG3mv9KsKZ?W->YGo>4
zq)N?d6H1Uuqn&9HX9|&sb_}72it|`(8&eZRqDqV?vcZ>Q!hc}G!0eYCV<KzJMkdD;
z`F%0{q;T!W8*^;xw-B-3b#-<6ns9&^cI_vN_;`U$lH@BeAnpt8#^LmsXu$wFAQjry
z=mAK*GchrRBSqJIC)jp-KETn2i!-OMv9B2$Xjnl1nHa&tjvFV5AejkPERL#Jt=Nk~
zkPj+6T;16<*S<1h5J1>YP9L+-0Y?vn7pKQ876I`v)!K)cH6Y6an1vDs=9BfvoMRUT
zMY%zn@k;;;2v6#f&Q^$u>s3BxC~n5FAc)zr(m;b4=JYXL0WMEpn|RFXRmT;ufY<`(
z;rL~WfIAi(ZcE`#_FV0Ple*n{OfEFPjiqGQtj*xmsLgP>Bg3gTMM#^6t5Y0K8jEoM
z3Nxtp&=zuDlE~>c#r!8S??pF1F!Q(oiGUpVM76}$yJ9e_y10~A0k+9ffiN!7?Eo><
z%l`BjVn^Q}Im@8$K+!j1xI8_k{pm40Q-60Aho9LJNzo5*$LmN)ggm_zOqN%CK!|QF
z(`$HxF%jjR*T$G{0c1p0=ZaUkK7FiO2ZH&)QQ9+mYk1<ZSf18Smp~WFA0ysy8-tYb
zB$C;nLfn8+3L?4!(GyTW2$+zX=*0$6^T2Z41A?O0fhs0yPSE9sfM=t+U=Zs7L<Od$
z4T3nwg=qzhn}AhiZq(i;IOiyZ&n8|G@Bq;PaM0za=BOgXb$P+kD~8Up_282&yrC~z
z(zMgq6o$W0=8MkNl-Dzv^SuNKZfV3b(EYZp4xC6C@a}zxGK2_3#GtpN7$!9*?I<9n
z?36gBTw~fO^13R7G}<*=jw_oj3op~OdM;_P6Swm^n}EA(K-!-^W-N&iQe@!=j5q_T
z0-1>N$J}ILwM4HX!PeE+=gf0eY))|?gLs4Y70(q9teExjFcb|TkfRd5fRM+>Q_eU`
z0L2@e0s?Ujp`Oo?`2*Ppl~0d}89-lOA&WD%gDkOPyc*fX66&pdFnX{11SX8CJ-TS&
z&z(t}fp$g9DJ{}(=^67&bS^+mEXWEScNN(olr2ps<3&f-s)_(jyFEQtu@Z$6MG^*h
zfdD}9HWaKN=bNur*UXPD^0~Bi?_c~*tr(o!KfI@F3>QTS!U2r43*``!-V@yuj#8zs
z$n#m9`RHTLhl&F@hCEbE9W#wea*vePxJ2h#1UZo44If35NYR*Lpoz@G{d~*?Dffng
zO!}6I6;Obw@C1#kDFjYdd|xD8i{=uv{F(y!h7!8Z<Hl{vB|HjP15pN)U_d<rpwk1R
z0J4}8;&L7cpdFCH5&LwAV);CMjA{v)OVHkTpm+sA7e$B%vYuIw)cC`v`el*@pV0?Y
zc9|GP!V}7=>SZ<oVUbX2;}@0ZJnHdk^59CyI1)36NGO#^cu6D^*Nl@?TrlclK^!?a
z@eGNRrFwNmy`JSw`iX;vNhBLZA=xqL0A^3?oUoHTaD5TfBLkF%0AuoqP!OJy8^L+3
z!z~}i&Qoy{DpzfAlgpVYZf93L%mB9x3&h3;pov6duWz$|_7*Gg-!^CQx+$*-pm>z!
zHHwUE6Fn<HH;XKTMzcdF0z!w3eTm}LuEe$i;9wY@mKL8XNT>x2-@?bJhj1);?}K@3
z)~AoXd9RuBgp=a#U*W<>JPt(0i4?J^kSuW;T?L$1nn<HjU}kALx>4tzRK*$6eH=+-
zX#fc&gG6H|AS@c%IrK%1`@awC2%nAV=!Z3&&YvA4xl%ckJnZ|<F<8yrg8LYZ3hcOr
zL}EBK;;%W@GQ^7D%8<%?{6t%v-Z1He-uC7nu?AC5L`3joECh1~I03E7E~4OBxjo@J
z7F-kZ5STpnF_`%~LWYb#Sk2vTp<&O0g~}*5uBC49)8{zBGFW0ro`85I4ZSTl*N73u
z*xAY5W>x?&?>&|gXux8)35?%m$uFC&u>qqzFZ?c*uIXH`=eC9qy|KE-T0XDcEYh)d
z7p&5;QnN6^!j9;D*Q{6v9j?nT>lEGcupCydzPT_OQskP54`BK5oa=r)G{b8|qc6(^
zjFpNnbzHReGP|vT5O@9IhYiPAT3mlV3845%gE#`?cIg^(E4wY<gk25}`GJi(am(AK
z0WBEntvf`$ETi@aBfa_BFB!PLJUPQt;&<=eF68cAD<qCImf9)2<w#V-gCpWDSR9*s
z>094ZPM)Kwp11<cUQ5?peRg%>jPbz2H3QN-ffxcubee}sh8~bW%GU(^SPaf$L3a`E
zVJVX#?m`X2j#Pgx18evsoPH6y2=c}x7r*)8P2DR9knGehC1C?>8`>3OjtvAsA3?hD
zyE@Ips>Ecd?UB;&hSs6q0HSIk!&QrYG74@BG&yDfRZjLuaAXh+iDW2sc}A(tb6r)L
zL8#K6nTlyFRCP=^l2{Z4Fj#O0md=S@(LD}<tcT`%yPhCz%?EF9feTWD-EQqppP;rX
zXf~aLw{yI<V)O=N*bp5sA*rOEtWwf#i*CP-U)(rQJ(~+3nj%E0P&ZYGd8G$0rBTLR
zAjk|ykBfmGX`xU%rrgsp+Sv;e?(EYF?wNTpixdP3K8ET9&@YwGWdN`4&jwVZ^7MDt
z_@c3xe)baH7wdJj_&}8Unv)a`*ha)iA*WFA!q*}K9@h8Mf91r`CESn%&$slB7@v?U
z6gS)84eN!s?bWF@TW$(ywam)I<6X%~srR;g@e7Oj8cyGI=2yc@0!jPwJ6Uweo-|Hy
zgA)eft-isVh1^4+bNdUI%so4tY|t6_+^2y+GhY^t1nZ`d69@`pY<9HlFdSp!D7J*E
ze4)WWr%2Nb%0nZI!{jNG4D*^>i2fv^@%Yu~PO=U>4w7i%v1JUvQu4No1tBmDw6<Sz
z)rZqlKJnPlKH}WalTMWYu7lN(P|S*7rP0B;+hm(&noMCADLu{}gBJ3_Yq8~Hl-JIK
zTq;=j<a(f=J-n3RB-$^@Rt1j(YzX)(rfR|Ep!vl-Tec0U_=O}c&w1z>Q-QWHA;~jo
zM(0^Ly0Qv|i2Hhsl{>?986V_B6$sANGgwI7^Q$|K4)sMY5QG<XiD`fn#-LIZwjhv;
zp=fxiZK$M`Qm;s82dSJ%DraMutQ|Vb;eQbIQU(TqZ*npxkA{JClKrc+OlT;rxA>Qv
z)8`z*Vzp{5ql@p|(XS;$BB4nNTf_MU-ys-DX!8yMaovR%p;m~TiLfQCM}5g6pf=Tw
z(wIz^PmQN3PoD*A5P-ED59Abw3eAPakP%%jA+TiGVWwyGj8~zDDLeB{u-zsxE-}Yt
z(O^4kqR37Hb?7~3TBGDUQZ{x9m}&fM=w^zMGBrI0GGec)_iR0qzz<3yG@LA7tmgXO
z?NAB%q<}*rk$s@uD2YUMHZ$qqQPHD0DQB7VN>LEjI<(at%rPpOsb|#CAm-T*i;K}P
zLRzK1N-|6-JHocYwx!A-a1D4jR7EdA;EN_BGp9M)u!^H8e9=r=oVCq52ba{HmB68e
z$4#vY!a*V&9Y0&u-J~@j_Y{A;R~P1<z}PfH*-$<BahfH7)`yPd%*`Y<Ojfy=BxS2t
zP`)yV(}y9?1<$?%)3T6C+@5B&3O_d+mdwfip*ekn3}T)Ty1pcMWWd%ikfkV;2eI=?
zJ(O4du`|JDdsj^V`(^h4Of<hXVl!VYZ+*7ij!%Evvbusz?!D;{czCy~pL~l0g$@~#
z><L!44FPf_I<^OxAAmB@E$o^9?O&{Z`;NsFn*NKj5Jx52_-%9VFCG`iCC_i)1MGZg
zdPK$mY90My>G8>%0eti1O-rA=X>nU!?OXZar+utc+yoLF0Z-mEZ!K)mUwZhX4=x3Q
z;A5%Xo`>t*t7Z4(=jHO2t6MBEVmMpg*82$viCg(W0qZTgOiw(#W#3F<3`~-%HMe!i
z!Sa>`4*1)5M8gvB<mZfTHF3dlH9h$jcms_a<h()#fR3#yav@{{G?w2sFtp~kXFP}q
zcq>5m>kweT^9YK_aUIoHwu?K9Osv0*jW6c7_4yRhpVqbQ(U-Sn1HCv%+=8|oC<`F~
zqElcKV7ZAme8A3k@XYz+7Jh|u*==SdxIHsQlJiMG)Ktb-Xas}x6McNHhcVJ^1p|SQ
zU?!e?n<?5W;43D%;7&ScoqT$LEvzm86Gi%#=>R|ZdAzzMSTpx+;0ex<Iy<Rb<jMHH
z6*YB0_wQSN*eQU$Egqfc%zr_e)l<mcO9(50uL41bLWLQa6IQY)3<!#&1Ky6OuYv?f
z8loU#LMmVzh@eA{rlbUx)P!>*Fs`xD-L%kv_mxuIqutuGzlnBrTTywBawpc*J>qPw
ziHWeW^ANLJo5!2-(6B?O0ADaE`wk%jb`3vv2HU=K9)xq@lW#3OeZ?8;<b?<*u@fYU
zFsOqU9-J7O#1n~IQ{3tkqBemTzu58eJ&Iq-dfrEhf^+)D%_i$g+rTh4D}1QUKzK4G
z6>T2?FKwf>w2X>}p3!d3R+)DEA^$JBE!Q&Gjdn_nSg(U=e@ofn)pVoG^DS3DN!-Aw
zHx{_b$PZ}*4c8OAis_?s0Pl6C{K%VO1DpDW*4)pG6rCP$Zlt}rNkS@tC1l>F$b_42
z+W!H+fp@w+Cu~m<l!f_z#qt>WsC6v+4#g;XeqwFI{`8dz8sflwSR0UA-ZsJCv3usn
zyy20!0qDZ9@O7s{91dK0S_C?M1rBEvM~t2@VjcjNU;xtxVitj_1$Qdg8u7<W@Hw4_
zBR<x%TOh0;%hApNbOwX~chF#07!~GM4AXjEK^Sp<{DTa12-wph?3hMJg;2Or`NkVe
z#2-MbL*sFWn;9vYxQlfIS3LY;mWkrI4NwL2@(X{fCvQ$`uesnJg*3x(mL)%&1~HNw
zZc!l5@`h#P$<HH&x(-Wzbt@jXD&sjxwYiXvGsRsQE~8QV$b~Q_n)|8928bpIk)%lG
zZ}|n;4F$IqZ9)FwdvEJo{`r8Zyk+Gx6@V-^Ku8nXu^kJ<`zPN9c@gvj)rsuDnc`U-
z*><w*-+lD--McT!r*A!d`{@V3ckVtd?>;5-_T3lYcgwrasPJj|^u2ub`0m&DHJ-kI
z_vzgiPd_Lj^1;)0qk@EJcdmUd`+z>~Xd84&`?~wePCt11GjnfQ{w}GzPwes|X_R;0
zBu^#%Jhu38=93Y-d{4&Sww`Smhcw<I`GLh>nR|!-CH(Zgvbg(FHt&ZQnWsOS>D+xo
zga1qeX})(qD4W7-7Agqj5i<H_t{EbDuT(4!<L$d&Nd?Z#y-Szx+XTT<B?T6=Qj8xx
zeN*0NdD9W}9V)cHvFCX{qP@-Dy!%x!<Z$1ouTSXYES3-f!>9PuvPib=knWMS{V@kQ
zT<9}hA-(rgKJO))QKSIp$#fV@j9rtQHG<@?mCvtb&<;Z$GW4F3dn<<)gFQ=+jvWls
z@-DOWPUe`2zX@h@puvB#`!^+1N#&o#>$@)*L1<kL`qR4)naf$2R%0+~76QppILSdX
zN5Q<l&z#)-b&MFwvW?Uj!e0we%NzpJ5F=BDYDJU=p%2b{&U*bB+VXXo^X23)6m;wE
zm)Qb!O@2R{X+$o15BZ0R0kW7W%6@}uRPBqT0uwQf%#c!%_UDY1*No*GhKwqpyox+5
z;SLM?DC??=sppPIy3CETNh3l2&V>ST7H1LA;NHMg*|#B2iH+~HTsl50B*auQ-=Bx}
z>!cMm<cm28da@1?(PvSpe#8tY4`|9>F`GL26Uyh9FpH=cBckOj`QMzC@Lp^o0O`*|
z#qc3Z?@WyHO3k1G9nMw4ML05zAH)2T1_%{Cqc>WHHww)UUKK1VrAx7=|HfSZttu9l
z!BC&Vxwf7{S2$2auIkCJLYb-aR%opfiV2w$>RL=N3JDn-$SV#@1j~0ZmtXkqOX&WF
zVtmZ|_NASsGR%Eg^w!-kWllfvg>@YjZ9ju&G$yT6;>jUq6azzhWW?&~{>-C{A^VxX
zx9u$W(SXoex|k)346H~$AK}pp+CwU9e0ukZq6qu2551|3!NbQPsSiUnU<lG#0bWpQ
zSc;hlcj1IcFt~<_-Y;s)KDDWo(*<XkN*PAa+Jd61jZrskE5=DaS$8DpIwK2$kWUp3
zjm$zwp(VCJF9eKc9y~avff5aXwq)n!2y^>e2JuTp;?e|#;)kz(_Yz-#nql+Z%XP~k
z2#J?CK<2B=cQ5mk-*`jZ*pCxG+ZbvJ2~VPW!xGQQ+IKG<_@r(#&M_9va`^)P+SmC%
z(_9k2_!FMtp922;V!YVC)c^iS>JKk|%s&anKk4=2kG{M3;S1lr`XB!IM}PX0KmOBy
z_hT}zc=SGTQvLZw{~8Vd@6|)TM>N$#?se1a!+*;U1RnmUo``EX4iAZgo8gZ*R$Xo%
z@~?<+)41klp+A;ck>#TVyRnBOpXE7Z+CCgOQm!8I%M^Y><cSoYxE^v;cDUhd*ziY(
z<9C0^r=*|Wm;7{b{OOsS|KUJ5BEPtJ;T8Vxe~;9Ue)xmlLG}*>-xKf&^P;y}uLOF+
znv4Dw!K;EF2y7W6dk*|#%D#K$U-1EA+SUKsro+DgJosl1{^f(>!S6lz=MR4W!9RKM
S2M>Pm;tPN8|NrySYyS^Q92n{V

literal 0
HcmV?d00001

diff --git a/summa/preprocessing/__pycache__/textcleaner.cpython-38.pyc b/summa/preprocessing/__pycache__/textcleaner.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3ec345bd17aa32d1e7d04e2ec50bc2758c610ed8
GIT binary patch
literal 6821
zcmbVQTXWmS6~=-91yWbbvSr1I1K-71rhH49iy~XrrE$ilT~SIJNGc0)A(|El&<n`6
zSn#FJOxuUd^zsL!M^Ar1XWGBeUx3#><+*QpX}_}|DUx#9X(?f`yVyN@_FTR_XV-^^
zvkHEHi@q%HPAbYjsL}m1(71;u_&ZY+u5jimR`O;RlWo;f9rYl^)k9`!jdSQrxq2;a
zrJ3Sr2LoI?WK7|@Wqhr87c|Am>}JI_Q|M2APs{HEmhp|^sXtF>XRRzZc*Y&94Ov5&
zZSbr+TpO`QP*?b%J6apF#!w%!PB__vaXyT-Ps-W|Y7??HirOhz8$<21tetR9Oe%bQ
zQbEt8>^a%hbA~f}3eP)yf}i?UvCi_-d=l?>`58Wi_j~*ueirX@{9XPY-tY5s{C&Jl
zZu0YZpXV3&MZ7Oa%9l`Hbd+b84%unN$v(S$s9IO}Wqt)CuF4T8W$!ef<~j7{_y_zN
z-XHK8ejV>?JkMwGp5Zt6O}wx3Tl_ZOc{%$|YxXR^$3NsBeaoyHe1ZRje~j8qexHBJ
zKSS*nU*vQAbJTA0B|gvZqIQR~qLN$wp7dO3oDKGBH9KDyPF?tACkU!uC0}+O+jB&Y
z#i`J)H0H0??J#tNch#)h<z2htm^=QS$3HXi)!eDN_!gM9a7_D!U3Kkkmj(!P*{itK
z;OSap<v0P)4}XjZeR?aQmG5DN3b-0Adj898+jWbfQ>#fvrz%cZ3PQjB(ic1^ZSR*{
z+pFw>SBxJn?t7tK4y)yLuNwZq?lrDVZx-{{?r!AI-`&bh-~D_uxE5W?MM*DpY-G34
z(U_XvSh#L&ZP?cvTfe$FbE~<T&us?Nn=f;XiRsNe_0#vG-ZRtfzT9TMyWTkY7E%pO
zk{anHRi<sWe^gK7S;Q0MQAA3pJY%~9;zFo$#^HRSMm?|DA)8cQvtP3iV_q{`=L!^^
zDl|@gWxG{wwgn<p^{LrVXUtq$Xdr22I^W_f2Jbn`Lii%qp7~WTP6xgS9Uc!9*OnhW
zT3&@fjrRCAiWmlgAKclR!>jfz_eItAW|!>Ho_!+xXHGc`W^K=|2KjAAly_&tr_M`Q
z%2R(YfQ@l9VJS`!&W6sbP}tt?tTf2k9)px0TSl3$@5d8?RJPQ8N6%MDKqM(e&9XC0
zjG|Unsonae@TT2LNq&q<BB!Prv0$vKG+D?<p5H2wdY~REOORo!&8U8#WSVNpv`0m>
z2oW#h2_{fHg#xyQnk-^Mi`XC4=UOq5lN8n)HCFRCV)jzDl41n2;>;ZORrYK3`P)Q^
z_L5dq@Z6Y5HvAUr&d<4i*>;2Zd}ne_6S^d~Bm9QrVQQ=gb+;PEnc~{xCtp8aT`J~M
zVu;2}*qm3xs_)tEk@v*9bfSLtrhc3WVwcVeMKzd)KQV=RkMP?vlEOcYuZi%P60%5%
z*lU%ry{Nolg`6r?CrO}AYzmxBT>=zRS|_UT<r0~Vv<%?YM;H~5h81QoaTf2ls!6i&
z4!*YZ-_kM|fe(?UrPmyrN`yNauCz1^5}5RdBMrkj#sH0e34rR_WvAwNA$D;BlVc-5
z!|}>azhtv>z{y1OpJSlh18Kf-QR=PKo)>g!yE7xF#;00~mF(?p;k>BYq|qQAmYCRD
zGr%~Fq!njcNbm4|K1TbN`>f536zD~?dZa?oM3yE6I)(}fG-RO~rYh>|6l|WPoYUgd
zZT?<X!>8Cy-4=ETe907-pDe5{tUX?hGYj`i#pS{SRI{iqEUrE-eDmlH6X!vqk5BPF
z`tIQgq<b^j{_mV|IC!WgoV`&CKQI$sa0z7Mk@dpT<I)iaqwV$)8}DIUKO66&Z>ft7
zYL`<e+CTE(j(|QugTuxUf~b>9GZiUKEmD`1t+A#a=?6-binK$usXd>F02hSY>gG*6
zw_sAKU~d~(O|pnH9r;;l;Z&Sgfi>7=Y%w~6JvX#;`LUm8+V2;50vY(RAvmkSMwpaC
zj|kfiTc*M1P=v|>T=fthvNb}`Mi}`O!no&Ia@<7Ksd#X8ne@1}Tv%L=jh(6ocyUCm
zSA@S;ha<=>=rubmKkaAD#3D~<HS#otm@@Hh3`}oa4z_aFdblIgqhZqY2qK6(NLM*j
z!AlMuFn0sIJXtR+t_eCf->{ekeQU-if>2cJrTU&%4)>(ne2t-PyP`v)k>0p*{gbW#
zVfma|h-vvOmM^U1z<jVM8IbtY&%kiPfVYP*rdr<WTAZBk*q%GqbOv8JJAf_m6izJc
zJ~^=t3B9y}AZzQH-PUJ7OORjUh#Dt7h6-W{oPko8>X9y)Aea{9*ZoXq<|>ZoysC@&
zZK6hIt^uwL0G4oS{tM^mEa^h5_8eG2hKQb`;yM)+LL`Mm_OX5<HomuYLIC;JjY(4=
z7i=Bc%7K!Iq{(;+ri<`Missm4lBooC+OGt5*tfKHOzDPpRSaT8tObZV+nBD=QQ5Zv
zLGH4H=LpEp)0_^DcL}8f<5FXwDy2g5L7hOy%F+rDs8c&;y)d3Gs!n@Fg^e?D53QaP
zn1G2MDJR-$M6Nu9@JFR^v8g=QqP8Q;4FMgAfq0-5;m2&JZ=><BU9Y?QrPhFF7@b65
zz?3W#w9;FpO>6`+C=ykL?L$NVo$?!H2kJr;W<Up8s7LVW*Qz{|wL%Xuf&emVsX7Wi
z-~`Zk8;V%0yZ%d=8?aaXd>eDFUEAjN{Bz9vCmyA7F4;s$hCba2N(LTVn)YxKj|5UW
z_<jpMJB!P`1Gx3j-*r43V+$CXa6u+6R%j+weu@IfPR=d}ZUou4R9>u+ZN#aX4cxSV
z`bq2%Sf_i0UJ8BuW08dV1DY<qXsCNbb#!+iU;US+4l(f%eLdTvEp<SZ)N=%?@F4`n
zP>cq!kwMZSYpU|Q9;pPnxpd=B*2HgBB7kX^%iuV{R1=YC%|xtkR;n)?&m_!4nkk1{
zslv*V_@ti~(k$^pLJCY2fU7e{ZpoG1R2OKGXX7T~k%#Cp9^CV)WuH47Nl0vz?a-<C
zV!!3EmU{f)nFIMIt<wgw5v6gKJIM~ZOrZ^6#PTA%;}Pb>8P69r<W~(xq8hr-Q@2Ll
z;s&b6Y_CoEReMQA8)6eo{0!~J==AjT-SZopd@F|}^IEPao*;pclFBrbqFp2yLy6fp
zk+q036ikyVhsX*$XLTw{9cZny`hraU)y4}c`v*9hap=8p+<kMu>bTsLmV^W`Fk3=w
z=_;5p{d$roU@Y<t-;~}}w&}uvR9ll2k<ytGy{R`5pjorUZjdOukkiEy_?CN(N1iOK
zl@=a8`C{Sz@){1wTlD2N3S=-+h?({pPOM9*i90l;@3=4|(@?-`d<-ZUP;`oFT9M5<
z+C5Sva3#!N#1lM4Q9(3D7(}w83}9;&kr|7$Dd5wtDlR-9LyCeH;xyUYOheZ90h=^J
zox;TxSDFK%8tG9w8hD**8j%(mB`NpFpbIWgKsbz;P8zx(9p>~{N4{He;*<;LCDV&_
zgiFq=I1LAPJf6!~BS@z!RZ7mFjxexB5}$T>(ndgUSAtlVA7fUPQWF&33GyEa-~G}$
zg+QZ%1G;@9l2H0Rh6i*gC=lBi!a^09oB^vGVrg|q6<hcc4>-w%PLDK6lC&Qw%}c0I
zN))M-(6y{F1<9mfm0AdiL>i%Fjy0Z}YlK=*8_cbR3zVLiRWJ0-4%*F_679&GTBnRj
zHs&Jthvhcf#+R7asuS)BkLdMW_?QHwc@tJeIia{n1%aR36)ci$?I_Q*#ydjqaAA#i
z1mEfF7wn`=LuEor*H4{shoGJ$$EhqtYN(=wo-+Az`$rZ@1|Vg~QIRrCwPlFJMXfN{
z6(ksHk;ZXk^VD}FQynLyj(0jti<J=9tp`S=AHs^81D?ipSY_tODtVd@0KSb@j%zet
z{YM?hlkoqPKd+WU)88?PhIxJ7?3mj+>^N^qH&}b4iXt{E)~KL>wfBH;9|1H*UaM5;
z5Rlg@&tPMJI!e05G={VQKGw(^`jwNNBaY<V%3p{aNxOBoa-~;q#Bmqqcc$gCiS>|)
z$KyvSYU$zf+S>AJQGAIHg1|=FRy_E{La_vZ!?!}>jilwo<T70*CpS;`8}TVJ{}QFh
z{$5zJ%fk2eYw-DwIZB0eVSsE4i^y(J(T9uU$2az83=76lD6qv5go%WbW2y!>7dz-Z
z7BF^jk^*arawv(Gwy46ZRahdTweJYqkaN%>z9xQ&vXxkiGU`$$tz$7!wL`sq%#O7|
zoP38i_E!u#$^<J!R9#ug$@`&Spj7}#dao36Y$f+0ZYgC@ArED1!1XJ@&5D)5705%>
zk%FeF&k<I(b(x(9>XM1YTGhiHB)MHY_@GwDWoL5xsS3w3g4RuQ>z=f8!z{1Ztcm3I
zykp%GOeL0!%S3O0EUxN0;wD<+3KeusnaJfD)hNu8X+@H4(WRxl`Ralyy6q4of0?8U
z6SX+)1ZBJKSc6@eYdo-Cc(C|*X<2+ogJm@NDb-|f*@-J<Xqw<3MI2(43Td|l@)De+
zEeMi%mzt!)WMpZ~)qK9^I`a%8f=v{3Rv2nlRbfa3aw90Ss(~mIe}+aa`8UqSVQgB;
i7%&V&$Df8j!$`lw41I*jmTsiclLdw0?1^kPJNj>J5dl5`

literal 0
HcmV?d00001

diff --git a/summa/preprocessing/__pycache__/util.cpython-38.pyc b/summa/preprocessing/__pycache__/util.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5979ffe72bc7e097d59bc1b4329cc139897049da
GIT binary patch
literal 701
zcmb7?y-ve05XYUQsG+D80|K#m?NaX*LP#(mh6;gLSdbe#ZD`^|XGd*OHsD!MpMf{Y
z%3CmTPEyng7&ys}@5876{o=cBw?ok0i>n}X3Hh+jreTbn;nD}V2_l>qa!-USJp4MM
zbxZm?frqKJ*>elmIWB#OTY+~BDV%3#kG#6aq;QW2-g2KVI~46kW-$+>re-Q>8VjoP
zFpL&74eJO*lSuKHY6Fp)(D9OTs^q*Wt6Gd!PU*!`t-R7toBGnjUZsbXD*|nl@hMp%
zMZ}rF60X<<H=JEV`X~dV8CN{ggRz8Q#!$$dgPf+hmRt#R!xW@8%*cfSu4YW<Ny1qM
zGJ_PQYZS(E6UBpUS@vrE516!b?OF_$A!&g<|INzsE!=|Ctb}7(HKg9EF@K^$1^+}f
z%sB9bt|<*0tX2PYE*8Iqk>qw5q{%^T#YW)Z5MZA`#k}5pbjx0?b&X{oV>K2Z?E4$_
a-3m|TY}~bf4yp$JNs@{@mZ#rD{q`5g|FX6K

literal 0
HcmV?d00001

diff --git a/summa/preprocessing/porter.py b/summa/preprocessing/porter.py
new file mode 100644
index 0000000..9e06b59
--- /dev/null
+++ b/summa/preprocessing/porter.py
@@ -0,0 +1,635 @@
+# Adapted from the NLTK package v3.0.1:
+# https://github.com/nltk/nltk/blob/3.0.1/nltk/stem/porter.py
+
+# Copyright (c) 2002 Vivake Gupta (vivakeATomniscia.org).  All rights reserved.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 2 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+# USA
+#
+# This software is maintained by Vivake (vivakeATomniscia.org) and is available at:
+#     http://www.omniscia.org/~vivake/python/PorterStemmer.py
+#
+# Additional modifications were made to incorporate this module into
+# NLTK.  All such modifications are marked with "--NLTK--".  The NLTK
+# version of this module is maintained by NLTK developers,
+# and is available via http://nltk.org/
+#
+# GNU Linking Exception:
+# Using this module statically or dynamically with other modules is
+# making a combined work based on this module. Thus, the terms and
+# conditions of the GNU General Public License cover the whole combination.
+# As a special exception, the copyright holders of this module give
+# you permission to combine this module with independent modules to
+# produce an executable program, regardless of the license terms of these
+# independent modules, and to copy and distribute the resulting
+# program under terms of your choice, provided that you also meet,
+# for each linked independent module, the terms and conditions of
+# the license of that module. An independent module is a module which
+# is not derived from or based on this module. If you modify this module,
+# you may extend this exception to your version of the module, but you
+# are not obliged to do so. If you do not wish to do so, delete this
+# exception statement from your version.
+
+"""
+Porter Stemmer
+
+This is the Porter stemming algorithm, ported to Python from the
+version coded up in ANSI C by the author. It follows the algorithm
+presented in
+
+Porter, M. "An algorithm for suffix stripping." Program 14.3 (1980): 130-137.
+
+only differing from it at the points marked --DEPARTURE-- and --NEW--
+below.
+
+For a more faithful version of the Porter algorithm, see
+
+    http://www.tartarus.org/~martin/PorterStemmer/
+
+Later additions:
+
+   June 2000
+
+   The 'l' of the 'logi' -> 'log' rule is put with the stem, so that
+   short stems like 'geo' 'theo' etc work like 'archaeo' 'philo' etc.
+
+   This follows a suggestion of Barry Wilkins, research student at
+   Birmingham.
+
+
+   February 2000
+
+   the cvc test for not dropping final -e now looks after vc at the
+   beginning of a word, so are, eve, ice, ore, use keep final -e. In this
+   test c is any consonant, including w, x and y. This extension was
+   suggested by Chris Emerson.
+
+   -fully    -> -ful   treated like  -fulness -> -ful, and
+   -tionally -> -tion  treated like  -tional  -> -tion
+
+   both in Step 2. These were suggested by Hiranmay Ghosh, of New Delhi.
+
+   Invariants proceed, succeed, exceed. Also suggested by Hiranmay Ghosh.
+
+Additional modifications were made to incorperate this module into
+nltk.  All such modifications are marked with \"--NLTK--\".  The nltk
+version of this module is maintained by the NLTK developers, and is
+available from <http://nltk.sourceforge.net>
+"""
+
+
+## --NLTK--
+## Declare this module's documentation format.
+
+class PorterStemmer():
+
+    ## --NLTK--
+    ## Add a module docstring
+    """
+    A word stemmer based on the Porter stemming algorithm.
+
+        Porter, M. \"An algorithm for suffix stripping.\"
+        Program 14.3 (1980): 130-137.
+
+    A few minor modifications have been made to Porter's basic
+    algorithm.  See the source code of this module for more
+    information.
+
+    The Porter Stemmer requires that all tokens have string types.
+    """
+
+    # The main part of the stemming algorithm starts here.
+    # Note that only lower case sequences are stemmed. Forcing to lower case
+    # should be done before stem(...) is called.
+
+    def __init__(self):
+
+        ## --NEW--
+        ## This is a table of irregular forms. It is quite short, but still
+        ## reflects the errors actually drawn to Martin Porter's attention over
+        ## a 20 year period!
+        ##
+        ## Extend it as necessary.
+        ##
+        ## The form of the table is:
+        ##  {
+        ##  "p1" : ["s11","s12","s13", ... ],
+        ##  "p2" : ["s21","s22","s23", ... ],
+        ##  ...
+        ##  "pn" : ["sn1","sn2","sn3", ... ]
+        ##  }
+        ##
+        ## String sij is mapped to paradigm form pi, and the main stemming
+        ## process is then bypassed.
+
+        irregular_forms = {
+            "sky" :     ["sky", "skies"],
+            "die" :     ["dying"],
+            "lie" :     ["lying"],
+            "tie" :     ["tying"],
+            "news" :    ["news"],
+            "inning" :  ["innings", "inning"],
+            "outing" :  ["outings", "outing"],
+            "canning" : ["cannings", "canning"],
+            "howe" :    ["howe"],
+
+            # --NEW--
+            "proceed" : ["proceed"],
+            "exceed"  : ["exceed"],
+            "succeed" : ["succeed"], # Hiranmay Ghosh
+            }
+
+        self.pool = {}
+        for key in irregular_forms:
+            for val in irregular_forms[key]:
+                self.pool[val] = key
+
+        self.vowels = frozenset(['a', 'e', 'i', 'o', 'u'])
+
+    def _cons(self, word, i):
+        """cons(i) is TRUE <=> b[i] is a consonant."""
+        if word[i] in self.vowels:
+            return False
+        if word[i] == 'y':
+            if i == 0:
+                return True
+            else:
+                return (not self._cons(word, i - 1))
+        return True
+
+    def _m(self, word, j):
+        """m() measures the number of consonant sequences between k0 and j.
+        if c is a consonant sequence and v a vowel sequence, and <..>
+        indicates arbitrary presence,
+
+           <c><v>       gives 0
+           <c>vc<v>     gives 1
+           <c>vcvc<v>   gives 2
+           <c>vcvcvc<v> gives 3
+           ....
+        """
+        n = 0
+        i = 0
+        while True:
+            if i > j:
+                return n
+            if not self._cons(word, i):
+                break
+            i = i + 1
+        i = i + 1
+
+        while True:
+            while True:
+                if i > j:
+                    return n
+                if self._cons(word, i):
+                    break
+                i = i + 1
+            i = i + 1
+            n = n + 1
+
+            while True:
+                if i > j:
+                    return n
+                if not self._cons(word, i):
+                    break
+                i = i + 1
+            i = i + 1
+
+    def _vowelinstem(self, stem):
+        """vowelinstem(stem) is TRUE <=> stem contains a vowel"""
+        for i in range(len(stem)):
+            if not self._cons(stem, i):
+                return True
+        return False
+
+    def _doublec(self, word):
+        """doublec(word) is TRUE <=> word ends with a double consonant"""
+        if len(word) < 2:
+            return False
+        if (word[-1] != word[-2]):
+            return False
+        return self._cons(word, len(word)-1)
+
+    def _cvc(self, word, i):
+        """cvc(i) is TRUE <=>
+
+        a) ( --NEW--) i == 1, and word[0] word[1] is vowel consonant, or
+
+        b) word[i - 2], word[i - 1], word[i] has the form consonant -
+           vowel - consonant and also if the second c is not w, x or y. this
+           is used when trying to restore an e at the end of a short word.
+           e.g.
+
+               cav(e), lov(e), hop(e), crim(e), but
+               snow, box, tray.
+        """
+        if i == 0: return False  # i == 0 never happens perhaps
+        if i == 1: return (not self._cons(word, 0) and self._cons(word, 1))
+        if not self._cons(word, i) or self._cons(word, i-1) or not self._cons(word, i-2): return False
+
+        ch = word[i]
+        if ch == 'w' or ch == 'x' or ch == 'y':
+            return False
+
+        return True
+
+    def _step1ab(self, word):
+        """step1ab() gets rid of plurals and -ed or -ing. e.g.
+
+           caresses  ->  caress
+           ponies    ->  poni
+           sties     ->  sti
+           tie       ->  tie        (--NEW--: see below)
+           caress    ->  caress
+           cats      ->  cat
+
+           feed      ->  feed
+           agreed    ->  agree
+           disabled  ->  disable
+
+           matting   ->  mat
+           mating    ->  mate
+           meeting   ->  meet
+           milling   ->  mill
+           messing   ->  mess
+
+           meetings  ->  meet
+        """
+        if word[-1] == 's':
+            if word.endswith("sses"):
+                word = word[:-2]
+            elif word.endswith("ies"):
+                if len(word) == 4:
+                    word = word[:-1]
+                # this line extends the original algorithm, so that
+                # 'flies'->'fli' but 'dies'->'die' etc
+                else:
+                    word = word[:-2]
+            elif word[-2] != 's':
+                word = word[:-1]
+
+        ed_or_ing_trimmed = False
+        if word.endswith("ied"):
+            if len(word) == 4:
+                word = word[:-1]
+            else:
+                word = word[:-2]
+        # this line extends the original algorithm, so that
+        # 'spied'->'spi' but 'died'->'die' etc
+
+        elif word.endswith("eed"):
+            if self._m(word, len(word)-4) > 0:
+                word = word[:-1]
+
+
+        elif word.endswith("ed") and self._vowelinstem(word[:-2]):
+            word = word[:-2]
+            ed_or_ing_trimmed = True
+        elif word.endswith("ing") and self._vowelinstem(word[:-3]):
+            word = word[:-3]
+            ed_or_ing_trimmed = True
+
+        if ed_or_ing_trimmed:
+            if word.endswith("at") or word.endswith("bl") or word.endswith("iz"):
+                word += 'e'
+            elif self._doublec(word):
+                if word[-1] not in ['l', 's', 'z']:
+                    word = word[:-1]
+            elif (self._m(word, len(word)-1) == 1 and self._cvc(word, len(word)-1)):
+                word += 'e'
+
+        return word
+
+    def _step1c(self, word):
+        """step1c() turns terminal y to i when there is another vowel in the stem.
+        --NEW--: This has been modified from the original Porter algorithm so that y->i
+        is only done when y is preceded by a consonant, but not if the stem
+        is only a single consonant, i.e.
+
+           (*c and not c) Y -> I
+
+        So 'happy' -> 'happi', but
+          'enjoy' -> 'enjoy'  etc
+
+        This is a much better rule. Formerly 'enjoy'->'enjoi' and 'enjoyment'->
+        'enjoy'. Step 1c is perhaps done too soon; but with this modification that
+        no longer really matters.
+
+        Also, the removal of the vowelinstem(z) condition means that 'spy', 'fly',
+        'try' ... stem to 'spi', 'fli', 'tri' and conflate with 'spied', 'tried',
+        'flies' ...
+        """
+        if word[-1] == 'y' and len(word) > 2 and self._cons(word, len(word) - 2):
+            return word[:-1] + 'i'
+        else:
+            return word
+
+    def _step2(self, word):
+        """step2() maps double suffices to single ones.
+        so -ization ( = -ize plus -ation) maps to -ize etc. note that the
+        string before the suffix must give m() > 0.
+        """
+        if len(word) <= 1: # Only possible at this stage given unusual inputs to stem_word like 'oed'
+            return word
+
+        ch = word[-2]
+
+        if ch == 'a':
+            if word.endswith("ational"):
+                return word[:-7] + "ate" if self._m(word, len(word)-8) > 0 else word
+            elif word.endswith("tional"):
+                return word[:-2] if self._m(word, len(word)-7) > 0 else word
+            else:
+                return word
+        elif ch == 'c':
+            if word.endswith("enci"):
+                return word[:-4] + "ence" if self._m(word, len(word)-5) > 0 else word
+            elif word.endswith("anci"):
+                return word[:-4] + "ance" if self._m(word, len(word)-5) > 0 else word
+            else:
+                return word
+        elif ch == 'e':
+            if word.endswith("izer"):
+                return word[:-1] if self._m(word, len(word)-5) > 0 else word
+            else:
+                return word
+        elif ch == 'l':
+            if word.endswith("bli"):
+                return word[:-3] + "ble" if self._m(word, len(word)-4) > 0 else word # --DEPARTURE--
+            # To match the published algorithm, replace "bli" with "abli" and "ble" with "able"
+            elif word.endswith("alli"):
+                # --NEW--
+                if self._m(word, len(word)-5) > 0:
+                    word = word[:-2]
+                    return self._step2(word)
+                else:
+                    return word
+            elif word.endswith("fulli"):
+                return word[:-2] if self._m(word, len(word)-6) else word # --NEW--
+            elif word.endswith("entli"):
+                return word[:-2] if self._m(word, len(word)-6) else word
+            elif word.endswith("eli"):
+                return word[:-2] if self._m(word, len(word)-4) else word
+            elif word.endswith("ousli"):
+                return word[:-2] if self._m(word, len(word)-6) else word
+            else:
+                return word
+        elif ch == 'o':
+            if word.endswith("ization"):
+                return word[:-7] + "ize" if self._m(word, len(word)-8) else word
+            elif word.endswith("ation"):
+                return word[:-5] + "ate" if self._m(word, len(word)-6) else word
+            elif word.endswith("ator"):
+                return word[:-4] + "ate" if self._m(word, len(word)-5) else word
+            else:
+                return word
+        elif ch == 's':
+            if word.endswith("alism"):
+                return word[:-3] if self._m(word, len(word)-6) else word
+            elif word.endswith("ness"):
+                if word.endswith("iveness"):
+                    return word[:-4] if self._m(word, len(word)-8) else word
+                elif word.endswith("fulness"):
+                    return word[:-4] if self._m(word, len(word)-8) else word
+                elif word.endswith("ousness"):
+                    return word[:-4] if self._m(word, len(word)-8) else word
+                else:
+                    return word
+            else:
+                return word
+        elif ch == 't':
+            if word.endswith("aliti"):
+                return word[:-3] if self._m(word, len(word)-6) else word
+            elif word.endswith("iviti"):
+                return word[:-5] + "ive" if self._m(word, len(word)-6) else word
+            elif word.endswith("biliti"):
+                return word[:-6] + "ble" if self._m(word, len(word)-7) else word
+            else:
+                return word
+        elif ch == 'g': # --DEPARTURE--
+            if word.endswith("logi"):
+                return word[:-1] if self._m(word, len(word) - 4) else word # --NEW-- (Barry Wilkins)
+            # To match the published algorithm, pass len(word)-5 to _m instead of len(word)-4
+            else:
+                return word
+
+        else:
+            return word
+
+    def _step3(self, word):
+        """step3() deals with -ic-, -full, -ness etc. similar strategy to step2."""
+
+        ch = word[-1]
+
+        if ch == 'e':
+            if word.endswith("icate"):
+                return word[:-3] if self._m(word, len(word)-6) else word
+            elif word.endswith("ative"):
+                return word[:-5] if self._m(word, len(word)-6) else word
+            elif word.endswith("alize"):
+                return word[:-3] if self._m(word, len(word)-6) else word
+            else:
+                return word
+        elif ch == 'i':
+            if word.endswith("iciti"):
+                return word[:-3] if self._m(word, len(word)-6) else word
+            else:
+                return word
+        elif ch == 'l':
+            if word.endswith("ical"):
+                return word[:-2] if self._m(word, len(word)-5) else word
+            elif word.endswith("ful"):
+                return word[:-3] if self._m(word, len(word)-4) else word
+            else:
+                return word
+        elif ch == 's':
+            if word.endswith("ness"):
+                return word[:-4] if self._m(word, len(word)-5) else word
+            else:
+                return word
+
+        else:
+            return word
+
+    def _step4(self, word):
+        """step4() takes off -ant, -ence etc., in context <c>vcvc<v>."""
+
+        if len(word) <= 1: # Only possible at this stage given unusual inputs to stem_word like 'oed'
+            return word
+
+        ch = word[-2]
+
+        if ch == 'a':
+            if word.endswith("al"):
+                return word[:-2] if self._m(word, len(word)-3) > 1 else word
+            else:
+                return word
+        elif ch == 'c':
+            if word.endswith("ance"):
+                return word[:-4] if self._m(word, len(word)-5) > 1 else word
+            elif word.endswith("ence"):
+                return word[:-4] if self._m(word, len(word)-5) > 1 else word
+            else:
+                return word
+        elif ch == 'e':
+            if word.endswith("er"):
+                return word[:-2] if self._m(word, len(word)-3) > 1 else word
+            else:
+                return word
+        elif ch == 'i':
+            if word.endswith("ic"):
+                return word[:-2] if self._m(word, len(word)-3) > 1 else word
+            else:
+                return word
+        elif ch == 'l':
+            if word.endswith("able"):
+                return word[:-4] if self._m(word, len(word)-5) > 1 else word
+            elif word.endswith("ible"):
+                return word[:-4] if self._m(word, len(word)-5) > 1 else word
+            else:
+                return word
+        elif ch == 'n':
+            if word.endswith("ant"):
+                return word[:-3] if self._m(word, len(word)-4) > 1 else word
+            elif word.endswith("ement"):
+                return word[:-5] if self._m(word, len(word)-6) > 1 else word
+            elif word.endswith("ment"):
+                return word[:-4] if self._m(word, len(word)-5) > 1 else word
+            elif word.endswith("ent"):
+                return word[:-3] if self._m(word, len(word)-4) > 1 else word
+            else:
+                return word
+        elif ch == 'o':
+            if word.endswith("sion") or word.endswith("tion"): # slightly different logic to all the other cases
+                return word[:-3] if self._m(word, len(word)-4) > 1 else word
+            elif word.endswith("ou"):
+                return word[:-2] if self._m(word, len(word)-3) > 1 else word
+            else:
+                return word
+        elif ch == 's':
+            if word.endswith("ism"):
+                return word[:-3] if self._m(word, len(word)-4) > 1 else word
+            else:
+                return word
+        elif ch == 't':
+            if word.endswith("ate"):
+                return word[:-3] if self._m(word, len(word)-4) > 1 else word
+            elif word.endswith("iti"):
+                return word[:-3] if self._m(word, len(word)-4) > 1 else word
+            else:
+                return word
+        elif ch == 'u':
+            if word.endswith("ous"):
+                return word[:-3] if self._m(word, len(word)-4) > 1 else word
+            else:
+                return word
+        elif ch == 'v':
+            if word.endswith("ive"):
+                return word[:-3] if self._m(word, len(word)-4) > 1 else word
+            else:
+                return word
+        elif ch == 'z':
+            if word.endswith("ize"):
+                return word[:-3] if self._m(word, len(word)-4) > 1 else word
+            else:
+                return word
+        else:
+            return word
+
+    def _step5(self, word):
+        """step5() removes a final -e if m() > 1, and changes -ll to -l if
+        m() > 1.
+        """
+        if word[-1] == 'e':
+            a = self._m(word, len(word)-1)
+            if a > 1 or (a == 1 and not self._cvc(word, len(word)-2)):
+                word = word[:-1]
+        if word.endswith('ll') and self._m(word, len(word)-1) > 1:
+            word = word[:-1]
+
+        return word
+
+    def stem_word(self, p, i=0, j=None):
+        """
+        Returns the stem of p, or, if i and j are given, the stem of p[i:j+1].
+        """
+        ## --NLTK--
+        if j is None and i == 0:
+            word = p
+        else:
+            if j is None:
+                j = len(p) - 1
+            word = p[i:j+1]
+
+        if word in self.pool:
+            return self.pool[word]
+
+        if len(word) <= 2:
+            return word # --DEPARTURE--
+        # With this line, strings of length 1 or 2 don't go through the
+        # stemming process, although no mention is made of this in the
+        # published algorithm. Remove the line to match the published
+        # algorithm.
+
+        word = self._step1ab(word)
+        word = self._step1c(word)
+        word = self._step2(word)
+        word = self._step3(word)
+        word = self._step4(word)
+        word = self._step5(word)
+        return word
+
+    def _adjust_case(self, word, stem):
+        lower = word.lower()
+
+        ret = ""
+        for x in range(len(stem)):
+            if lower[x] == stem[x]:
+                ret += word[x]
+            else:
+                ret += stem[x]
+
+        return ret
+
+    ## --NLTK--
+    ## Don't use this procedure; we want to work with individual
+    ## tokens, instead.  (commented out the following procedure)
+    #def stem(self, text):
+    #    parts = re.split("(\W+)", text)
+    #    numWords = (len(parts) + 1)/2
+    #
+    #    ret = ""
+    #    for i in xrange(numWords):
+    #        word = parts[2 * i]
+    #        separator = ""
+    #        if ((2 * i) + 1) < len(parts):
+    #            separator = parts[(2 * i) + 1]
+    #
+    #        stem = self.stem_word(string.lower(word), 0, len(word) - 1)
+    #        ret = ret + self.adjust_case(word, stem)
+    #        ret = ret + separator
+    #    return ret
+
+    ## --NLTK--
+    ## Define a stem() method that implements the StemmerI interface.
+    def stem(self, word):
+        stem = self.stem_word(word.lower(), 0, len(word) - 1)
+        return self._adjust_case(word, stem)
+
+    ## --NLTK--
+    ## Add a string representation function
+    def __repr__(self):
+        return '<PorterStemmer>'
diff --git a/summa/preprocessing/snowball.py b/summa/preprocessing/snowball.py
new file mode 100644
index 0000000..64a166f
--- /dev/null
+++ b/summa/preprocessing/snowball.py
@@ -0,0 +1,4291 @@
+# Adapted from the NLTK package v3.0.1:
+# https://github.com/nltk/nltk/blob/3.0.1/nltk/stem/snowball.py
+
+#
+# Natural Language Toolkit: Snowball Stemmer
+#
+# Copyright (C) 2001-2014 NLTK Project
+# Author: Peter Michael Stahl <pemistahl@gmail.com>
+#         Peter Ljunglof <peter.ljunglof@heatherleaf.se> (revisions)
+# Algorithms: Dr Martin Porter <martin@tartarus.org>
+# URL: <http://nltk.org/>
+# For license information, see LICENSE.TXT
+
+"""
+Snowball stemmers
+
+This module provides a port of the Snowball stemmers
+developed by Martin Porter.
+
+"""
+
+import re
+
+from .porter import PorterStemmer
+from .util import prefix_replace, suffix_replace
+
+
+class SnowballStemmer():
+
+    """
+    Snowball Stemmer
+
+    The following languages are supported:
+    Danish, Dutch, English, Finnish, French, German,
+    Hungarian, Italian, Norwegian, Portuguese, Romanian, Russian,
+    Spanish and Swedish.
+
+    The algorithm for English is documented here:
+
+        Porter, M. \"An algorithm for suffix stripping.\"
+        Program 14.3 (1980): 130-137.
+
+    The algorithms have been developed by Martin Porter.
+    These stemmers are called Snowball, because Porter created
+    a programming language with this name for creating
+    new stemming algorithms. There is more information available
+    at http://snowball.tartarus.org/
+
+    The stemmer is invoked as shown below:
+
+    >>> from summa.preprocessing.snowball import SnowballStemmer
+    >>> print(" ".join(SnowballStemmer.languages)) # See which languages are supported
+    ...
+    >>> stemmer = SnowballStemmer("german") # Choose a language
+    >>> stemmer.stem("Autobahnen") # Stem a word
+    'autobahn'
+
+    Invoking the stemmers that way is useful if you do not know the
+    language to be stemmed at runtime. Alternatively, if you already know
+    the language, then you can invoke the language specific stemmer directly:
+
+    >>> from summa.preprocessing.snowball import GermanStemmer
+    >>> stemmer = GermanStemmer()
+    >>> stemmer.stem("Autobahnen")
+    'autobahn'
+
+    :param language: The language whose subclass is instantiated.
+    :type language: str or unicode
+    :raise ValueError: If there is no stemmer for the specified
+                           language, a ValueError is raised.
+    """
+
+    languages = (
+        "arabic",
+        "danish",
+        "dutch",
+        "english",
+        "finnish",
+        "french",
+        "german",
+        "hungarian",
+        "italian",
+        "norwegian",
+        "polish",
+        "portuguese",
+        "romanian",
+        "russian",
+        "spanish",
+        "swedish",
+    )
+
+    def __init__(self, language):
+        if language not in self.languages:
+            raise ValueError("The language '%s' is not supported." % language)
+        stemmerclass = globals()[language.capitalize() + "Stemmer"]
+        self.stemmer = stemmerclass()
+        self.stem = self.stemmer.stem
+
+
+class _LanguageSpecificStemmer():
+
+    """
+    This helper subclass offers the possibility
+    to invoke a specific stemmer directly.
+    This is useful if you already know the language to be stemmed at runtime.
+
+    Create an instance of the Snowball stemmer.
+    """
+
+    def __init__(self):
+        # The language is the name of the class, minus the final "Stemmer".
+        language = type(self).__name__.lower()
+        if language.endswith("stemmer"):
+            language = language[:-7]
+
+    def __repr__(self):
+        """
+        Print out the string representation of the respective class.
+
+        """
+        return "<%s>" % type(self).__name__
+
+
+class PorterStemmer(_LanguageSpecificStemmer, PorterStemmer):
+    """
+    A word stemmer based on the original Porter stemming algorithm.
+
+        Porter, M. \"An algorithm for suffix stripping.\"
+        Program 14.3 (1980): 130-137.
+
+    A few minor modifications have been made to Porter's basic
+    algorithm.  See the source code of the module
+    nltk.stem.porter for more information.
+
+    """
+    def __init__(self):
+        _LanguageSpecificStemmer.__init__(self)
+        PorterStemmer.__init__(self)
+
+
+class _ScandinavianStemmer(_LanguageSpecificStemmer):
+
+    """
+    This subclass encapsulates a method for defining the string region R1.
+    It is used by the Danish, Norwegian, and Swedish stemmer.
+
+    """
+
+    def _r1_scandinavian(self, word, vowels):
+        """
+        Return the region R1 that is used by the Scandinavian stemmers.
+
+        R1 is the region after the first non-vowel following a vowel,
+        or is the null region at the end of the word if there is no
+        such non-vowel. But then R1 is adjusted so that the region
+        before it contains at least three letters.
+
+        :param word: The word whose region R1 is determined.
+        :type word: str or unicode
+        :param vowels: The vowels of the respective language that are
+                       used to determine the region R1.
+        :type vowels: unicode
+        :return: the region R1 for the respective word.
+        :rtype: unicode
+        :note: This helper method is invoked by the respective stem method of
+               the subclasses DanishStemmer, NorwegianStemmer, and
+               SwedishStemmer. It is not to be invoked directly!
+
+        """
+        r1 = ""
+        for i in range(1, len(word)):
+            if word[i] not in vowels and word[i-1] in vowels:
+                if len(word[:i+1]) < 3 and len(word[:i+1]) > 0:
+                    r1 = word[3:]
+                elif len(word[:i+1]) >= 3:
+                    r1 = word[i+1:]
+                else:
+                    return word
+                break
+
+        return r1
+
+
+
+class _StandardStemmer(_LanguageSpecificStemmer):
+
+    """
+    This subclass encapsulates two methods for defining the standard versions
+    of the string regions R1, R2, and RV.
+
+    """
+
+    def _r1r2_standard(self, word, vowels):
+        """
+        Return the standard interpretations of the string regions R1 and R2.
+
+        R1 is the region after the first non-vowel following a vowel,
+        or is the null region at the end of the word if there is no
+        such non-vowel.
+
+        R2 is the region after the first non-vowel following a vowel
+        in R1, or is the null region at the end of the word if there
+        is no such non-vowel.
+
+        :param word: The word whose regions R1 and R2 are determined.
+        :type word: str or unicode
+        :param vowels: The vowels of the respective language that are
+                       used to determine the regions R1 and R2.
+        :type vowels: unicode
+        :return: (r1,r2), the regions R1 and R2 for the respective word.
+        :rtype: tuple
+        :note: This helper method is invoked by the respective stem method of
+               the subclasses DutchStemmer, FinnishStemmer,
+               FrenchStemmer, GermanStemmer, ItalianStemmer,
+               PortugueseStemmer, RomanianStemmer, and SpanishStemmer.
+               It is not to be invoked directly!
+        :note: A detailed description of how to define R1 and R2
+               can be found at http://snowball.tartarus.org/texts/r1r2.html
+
+        """
+        r1 = ""
+        r2 = ""
+        for i in range(1, len(word)):
+            if word[i] not in vowels and word[i-1] in vowels:
+                r1 = word[i+1:]
+                break
+
+        for i in range(1, len(r1)):
+            if r1[i] not in vowels and r1[i-1] in vowels:
+                r2 = r1[i+1:]
+                break
+
+        return (r1, r2)
+
+
+
+    def _rv_standard(self, word, vowels):
+        """
+        Return the standard interpretation of the string region RV.
+
+        If the second letter is a consonant, RV is the region after the
+        next following vowel. If the first two letters are vowels, RV is
+        the region after the next following consonant. Otherwise, RV is
+        the region after the third letter.
+
+        :param word: The word whose region RV is determined.
+        :type word: str or unicode
+        :param vowels: The vowels of the respective language that are
+                       used to determine the region RV.
+        :type vowels: unicode
+        :return: the region RV for the respective word.
+        :rtype: unicode
+        :note: This helper method is invoked by the respective stem method of
+               the subclasses ItalianStemmer, PortugueseStemmer,
+               RomanianStemmer, and SpanishStemmer. It is not to be
+               invoked directly!
+
+        """
+        rv = ""
+        if len(word) >= 2:
+            if word[1] not in vowels:
+                for i in range(2, len(word)):
+                    if word[i] in vowels:
+                        rv = word[i+1:]
+                        break
+
+            elif word[:2] in vowels:
+                for i in range(2, len(word)):
+                    if word[i] not in vowels:
+                        rv = word[i+1:]
+                        break
+            else:
+                rv = word[3:]
+
+        return rv
+
+
+
+class DanishStemmer(_ScandinavianStemmer):
+
+    """
+    The Danish Snowball stemmer.
+
+    :cvar __vowels: The Danish vowels.
+    :type __vowels: unicode
+    :cvar __consonants: The Danish consonants.
+    :type __consonants: unicode
+    :cvar __double_consonants: The Danish double consonants.
+    :type __double_consonants: tuple
+    :cvar __s_ending: Letters that may directly appear before a word final 's'.
+    :type __s_ending: unicode
+    :cvar __step1_suffixes: Suffixes to be deleted in step 1 of the algorithm.
+    :type __step1_suffixes: tuple
+    :cvar __step2_suffixes: Suffixes to be deleted in step 2 of the algorithm.
+    :type __step2_suffixes: tuple
+    :cvar __step3_suffixes: Suffixes to be deleted in step 3 of the algorithm.
+    :type __step3_suffixes: tuple
+    :note: A detailed description of the Danish
+           stemming algorithm can be found under
+           http://snowball.tartarus.org/algorithms/danish/stemmer.html
+
+    """
+
+    # The language's vowels and other important characters are defined.
+    __vowels = "aeiouy\xE6\xE5\xF8"
+    __consonants = "bcdfghjklmnpqrstvwxz"
+    __double_consonants = ("bb", "cc", "dd", "ff", "gg", "hh", "jj",
+                           "kk", "ll", "mm", "nn", "pp", "qq", "rr",
+                           "ss", "tt", "vv", "ww", "xx", "zz")
+    __s_ending = "abcdfghjklmnoprtvyz\xE5"
+
+    # The different suffixes, divided into the algorithm's steps
+    # and organized by length, are listed in tuples.
+    __step1_suffixes = ("erendes", "erende", "hedens", "ethed",
+                        "erede", "heden", "heder", "endes",
+                        "ernes", "erens", "erets", "ered",
+                        "ende", "erne", "eren", "erer", "heds",
+                        "enes", "eres", "eret", "hed", "ene", "ere",
+                        "ens", "ers", "ets", "en", "er", "es", "et",
+                        "e", "s")
+    __step2_suffixes = ("gd", "dt", "gt", "kt")
+    __step3_suffixes = ("elig", "l\xF8st", "lig", "els", "ig")
+
+    def stem(self, word):
+        """
+        Stem a Danish word and return the stemmed form.
+
+        :param word: The word that is stemmed.
+        :type word: str or unicode
+        :return: The stemmed form.
+        :rtype: unicode
+
+        """
+        # Every word is put into lower case for normalization.
+        word = word.lower()
+
+        # After this, the required regions are generated
+        # by the respective helper method.
+        r1 = self._r1_scandinavian(word, self.__vowels)
+
+        # Then the actual stemming process starts.
+        # Every new step is explicitly indicated
+        # according to the descriptions on the Snowball website.
+
+        # STEP 1
+        for suffix in self.__step1_suffixes:
+            if r1.endswith(suffix):
+                if suffix == "s":
+                    if word[-2] in self.__s_ending:
+                        word = word[:-1]
+                        r1 = r1[:-1]
+                else:
+                    word = word[:-len(suffix)]
+                    r1 = r1[:-len(suffix)]
+                break
+
+        # STEP 2
+        for suffix in self.__step2_suffixes:
+            if r1.endswith(suffix):
+                word = word[:-1]
+                r1 = r1[:-1]
+                break
+
+        # STEP 3
+        if r1.endswith("igst"):
+            word = word[:-2]
+            r1 = r1[:-2]
+
+        for suffix in self.__step3_suffixes:
+            if r1.endswith(suffix):
+                if suffix == "l\xF8st":
+                    word = word[:-1]
+                    r1 = r1[:-1]
+                else:
+                    word = word[:-len(suffix)]
+                    r1 = r1[:-len(suffix)]
+
+                    if r1.endswith(self.__step2_suffixes):
+                        word = word[:-1]
+                        r1 = r1[:-1]
+                break
+
+        # STEP 4: Undouble
+        for double_cons in self.__double_consonants:
+            if word.endswith(double_cons) and len(word) > 3:
+                word = word[:-1]
+                break
+
+
+        return word
+
+
+class DutchStemmer(_StandardStemmer):
+
+    """
+    The Dutch Snowball stemmer.
+
+    :cvar __vowels: The Dutch vowels.
+    :type __vowels: unicode
+    :cvar __step1_suffixes: Suffixes to be deleted in step 1 of the algorithm.
+    :type __step1_suffixes: tuple
+    :cvar __step3b_suffixes: Suffixes to be deleted in step 3b of the algorithm.
+    :type __step3b_suffixes: tuple
+    :note: A detailed description of the Dutch
+           stemming algorithm can be found under
+           http://snowball.tartarus.org/algorithms/dutch/stemmer.html
+
+    """
+
+    __vowels = "aeiouy\xE8"
+    __step1_suffixes = ("heden", "ene", "en", "se", "s")
+    __step3b_suffixes = ("baar", "lijk", "bar", "end", "ing", "ig")
+
+    def stem(self, word):
+        """
+        Stem a Dutch word and return the stemmed form.
+
+        :param word: The word that is stemmed.
+        :type word: str or unicode
+        :return: The stemmed form.
+        :rtype: unicode
+
+        """
+        word = word.lower()
+
+        step2_success = False
+
+        # Vowel accents are removed.
+        word = (word.replace("\xE4", "a").replace("\xE1", "a")
+                    .replace("\xEB", "e").replace("\xE9", "e")
+                    .replace("\xED", "i").replace("\xEF", "i")
+                    .replace("\xF6", "o").replace("\xF3", "o")
+                    .replace("\xFC", "u").replace("\xFA", "u"))
+
+        # An initial 'y', a 'y' after a vowel,
+        # and an 'i' between self.__vowels is put into upper case.
+        # As from now these are treated as consonants.
+        if word.startswith("y"):
+            word = "".join(("Y", word[1:]))
+
+        for i in range(1, len(word)):
+            if word[i-1] in self.__vowels and word[i] == "y":
+                word = "".join((word[:i], "Y", word[i+1:]))
+
+        for i in range(1, len(word)-1):
+            if (word[i-1] in self.__vowels and word[i] == "i" and
+               word[i+1] in self.__vowels):
+                word = "".join((word[:i], "I", word[i+1:]))
+
+        r1, r2 = self._r1r2_standard(word, self.__vowels)
+
+        # R1 is adjusted so that the region before it
+        # contains at least 3 letters.
+        for i in range(1, len(word)):
+            if word[i] not in self.__vowels and word[i-1] in self.__vowels:
+                if len(word[:i+1]) < 3 and len(word[:i+1]) > 0:
+                    r1 = word[3:]
+                elif len(word[:i+1]) == 0:
+                    return word
+                break
+
+        # STEP 1
+        for suffix in self.__step1_suffixes:
+            if r1.endswith(suffix):
+                if suffix == "heden":
+                    word = "".join((word[:-5], "heid"))
+                    r1 = "".join((r1[:-5], "heid"))
+                    if r2.endswith("heden"):
+                        r2 = "".join((r2[:-5], "heid"))
+
+                elif (suffix in ("ene", "en") and
+                      not word.endswith("heden") and
+                      word[-len(suffix)-1] not in self.__vowels and
+                      word[-len(suffix)-3:-len(suffix)] != "gem"):
+                    word = word[:-len(suffix)]
+                    r1 = r1[:-len(suffix)]
+                    r2 = r2[:-len(suffix)]
+                    if word.endswith(("kk", "dd", "tt")):
+                        word = word[:-1]
+                        r1 = r1[:-1]
+                        r2 = r2[:-1]
+
+                elif (suffix in ("se", "s") and
+                      word[-len(suffix)-1] not in self.__vowels and
+                      word[-len(suffix)-1] != "j"):
+                    word = word[:-len(suffix)]
+                    r1 = r1[:-len(suffix)]
+                    r2 = r2[:-len(suffix)]
+                break
+
+        # STEP 2
+        if r1.endswith("e") and word[-2] not in self.__vowels:
+            step2_success = True
+            word = word[:-1]
+            r1 = r1[:-1]
+            r2 = r2[:-1]
+
+            if word.endswith(("kk", "dd", "tt")):
+                word = word[:-1]
+                r1 = r1[:-1]
+                r2 = r2[:-1]
+
+        # STEP 3a
+        if r2.endswith("heid") and word[-5] != "c":
+            word = word[:-4]
+            r1 = r1[:-4]
+            r2 = r2[:-4]
+
+            if (r1.endswith("en") and word[-3] not in self.__vowels and
+                word[-5:-2] != "gem"):
+                word = word[:-2]
+                r1 = r1[:-2]
+                r2 = r2[:-2]
+
+                if word.endswith(("kk", "dd", "tt")):
+                    word = word[:-1]
+                    r1 = r1[:-1]
+                    r2 = r2[:-1]
+
+        # STEP 3b: Derivational suffixes
+        for suffix in self.__step3b_suffixes:
+            if r2.endswith(suffix):
+                if suffix in ("end", "ing"):
+                    word = word[:-3]
+                    r2 = r2[:-3]
+
+                    if r2.endswith("ig") and word[-3] != "e":
+                        word = word[:-2]
+                    else:
+                        if word.endswith(("kk", "dd", "tt")):
+                            word = word[:-1]
+
+                elif suffix == "ig" and word[-3] != "e":
+                    word = word[:-2]
+
+                elif suffix == "lijk":
+                    word = word[:-4]
+                    r1 = r1[:-4]
+
+                    if r1.endswith("e") and word[-2] not in self.__vowels:
+                        word = word[:-1]
+                        if word.endswith(("kk", "dd", "tt")):
+                            word = word[:-1]
+
+                elif suffix == "baar":
+                    word = word[:-4]
+
+                elif suffix == "bar" and step2_success:
+                    word = word[:-3]
+                break
+
+        # STEP 4: Undouble vowel
+        if len(word) >= 4:
+            if word[-1] not in self.__vowels and word[-1] != "I":
+                if word[-3:-1] in ("aa", "ee", "oo", "uu"):
+                    if word[-4] not in self.__vowels:
+                        word = "".join((word[:-3], word[-3], word[-1]))
+
+        # All occurrences of 'I' and 'Y' are put back into lower case.
+        word = word.replace("I", "i").replace("Y", "y")
+
+
+        return word
+
+
+
+class EnglishStemmer(_StandardStemmer):
+
+    """
+    The English Snowball stemmer.
+
+    :cvar __vowels: The English vowels.
+    :type __vowels: unicode
+    :cvar __double_consonants: The English double consonants.
+    :type __double_consonants: tuple
+    :cvar __li_ending: Letters that may directly appear before a word final 'li'.
+    :type __li_ending: unicode
+    :cvar __step0_suffixes: Suffixes to be deleted in step 0 of the algorithm.
+    :type __step0_suffixes: tuple
+    :cvar __step1a_suffixes: Suffixes to be deleted in step 1a of the algorithm.
+    :type __step1a_suffixes: tuple
+    :cvar __step1b_suffixes: Suffixes to be deleted in step 1b of the algorithm.
+    :type __step1b_suffixes: tuple
+    :cvar __step2_suffixes: Suffixes to be deleted in step 2 of the algorithm.
+    :type __step2_suffixes: tuple
+    :cvar __step3_suffixes: Suffixes to be deleted in step 3 of the algorithm.
+    :type __step3_suffixes: tuple
+    :cvar __step4_suffixes: Suffixes to be deleted in step 4 of the algorithm.
+    :type __step4_suffixes: tuple
+    :cvar __step5_suffixes: Suffixes to be deleted in step 5 of the algorithm.
+    :type __step5_suffixes: tuple
+    :cvar __special_words: A dictionary containing words
+                           which have to be stemmed specially.
+    :type __special_words: dict
+    :note: A detailed description of the English
+           stemming algorithm can be found under
+           http://snowball.tartarus.org/algorithms/english/stemmer.html
+    """
+
+    __vowels = "aeiouy"
+    __double_consonants = ("bb", "dd", "ff", "gg", "mm", "nn",
+                           "pp", "rr", "tt")
+    __li_ending = "cdeghkmnrt"
+    __step0_suffixes = ("'s'", "'s", "'")
+    __step1a_suffixes = ("sses", "ied", "ies", "us", "ss", "s")
+    __step1b_suffixes = ("eedly", "ingly", "edly", "eed", "ing", "ed")
+    __step2_suffixes = ('ization', 'ational', 'fulness', 'ousness',
+                        'iveness', 'tional', 'biliti', 'lessli',
+                        'entli', 'ation', 'alism', 'aliti', 'ousli',
+                        'iviti', 'fulli', 'enci', 'anci', 'abli',
+                        'izer', 'ator', 'alli', 'bli', 'ogi', 'li')
+    __step3_suffixes = ('ational', 'tional', 'alize', 'icate', 'iciti',
+                        'ative', 'ical', 'ness', 'ful')
+    __step4_suffixes = ('ement', 'ance', 'ence', 'able', 'ible', 'ment',
+                        'ant', 'ent', 'ism', 'ate', 'iti', 'ous',
+                        'ive', 'ize', 'ion', 'al', 'er', 'ic')
+    __step5_suffixes = ("e", "l")
+    __special_words = {"skis" : "ski",
+                       "skies" : "sky",
+                       "dying" : "die",
+                       "lying" : "lie",
+                       "tying" : "tie",
+                       "idly" : "idl",
+                       "gently" : "gentl",
+                       "ugly" : "ugli",
+                       "early" : "earli",
+                       "only" : "onli",
+                       "singly" : "singl",
+                       "sky" : "sky",
+                       "news" : "news",
+                       "howe" : "howe",
+                       "atlas" : "atlas",
+                       "cosmos" : "cosmos",
+                       "bias" : "bias",
+                       "andes" : "andes",
+                       "inning" : "inning",
+                       "innings" : "inning",
+                       "outing" : "outing",
+                       "outings" : "outing",
+                       "canning" : "canning",
+                       "cannings" : "canning",
+                       "herring" : "herring",
+                       "herrings" : "herring",
+                       "earring" : "earring",
+                       "earrings" : "earring",
+                       "proceed" : "proceed",
+                       "proceeds" : "proceed",
+                       "proceeded" : "proceed",
+                       "proceeding" : "proceed",
+                       "exceed" : "exceed",
+                       "exceeds" : "exceed",
+                       "exceeded" : "exceed",
+                       "exceeding" : "exceed",
+                       "succeed" : "succeed",
+                       "succeeds" : "succeed",
+                       "succeeded" : "succeed",
+                       "succeeding" : "succeed"}
+
+    def stem(self, word):
+
+        """
+        Stem an English word and return the stemmed form.
+
+        :param word: The word that is stemmed.
+        :type word: str or unicode
+        :return: The stemmed form.
+        :rtype: unicode
+
+        """
+        word = word.lower()
+
+        if len(word) <= 2:
+            return word
+
+        elif word in self.__special_words:
+            return self.__special_words[word]
+
+        # Map the different apostrophe characters to a single consistent one
+        word = (word.replace("\u2019", "\x27")
+                    .replace("\u2018", "\x27")
+                    .replace("\u201B", "\x27"))
+
+        if word.startswith("\x27"):
+            word = word[1:]
+
+        if word.startswith("y"):
+            word = "".join(("Y", word[1:]))
+
+        for i in range(1, len(word)):
+            if word[i-1] in self.__vowels and word[i] == "y":
+                word = "".join((word[:i], "Y", word[i+1:]))
+
+        step1a_vowel_found = False
+        step1b_vowel_found = False
+
+        r1 = ""
+        r2 = ""
+
+        if word.startswith(("gener", "commun", "arsen")):
+            if word.startswith(("gener", "arsen")):
+                r1 = word[5:]
+            else:
+                r1 = word[6:]
+
+            for i in range(1, len(r1)):
+                if r1[i] not in self.__vowels and r1[i-1] in self.__vowels:
+                    r2 = r1[i+1:]
+                    break
+        else:
+            r1, r2 = self._r1r2_standard(word, self.__vowels)
+
+
+        # STEP 0
+        for suffix in self.__step0_suffixes:
+            if word.endswith(suffix):
+                word = word[:-len(suffix)]
+                r1 = r1[:-len(suffix)]
+                r2 = r2[:-len(suffix)]
+                break
+
+        # STEP 1a
+        for suffix in self.__step1a_suffixes:
+            if word.endswith(suffix):
+
+                if suffix == "sses":
+                    word = word[:-2]
+                    r1 = r1[:-2]
+                    r2 = r2[:-2]
+
+                elif suffix in ("ied", "ies"):
+                    if len(word[:-len(suffix)]) > 1:
+                        word = word[:-2]
+                        r1 = r1[:-2]
+                        r2 = r2[:-2]
+                    else:
+                        word = word[:-1]
+                        r1 = r1[:-1]
+                        r2 = r2[:-1]
+
+                elif suffix == "s":
+                    for letter in word[:-2]:
+                        if letter in self.__vowels:
+                            step1a_vowel_found = True
+                            break
+
+                    if step1a_vowel_found:
+                        word = word[:-1]
+                        r1 = r1[:-1]
+                        r2 = r2[:-1]
+                break
+
+        # STEP 1b
+        for suffix in self.__step1b_suffixes:
+            if word.endswith(suffix):
+                if suffix in ("eed", "eedly"):
+
+                    if r1.endswith(suffix):
+                        word = "".join((word[:-len(suffix)], "ee"))
+
+                        if len(r1) >= len(suffix):
+                            r1 = "".join((r1[:-len(suffix)], "ee"))
+                        else:
+                            r1 = ""
+
+                        if len(r2) >= len(suffix):
+                            r2 = "".join((r2[:-len(suffix)], "ee"))
+                        else:
+                            r2 = ""
+                else:
+                    for letter in word[:-len(suffix)]:
+                        if letter in self.__vowels:
+                            step1b_vowel_found = True
+                            break
+
+                    if step1b_vowel_found:
+                        word = word[:-len(suffix)]
+                        r1 = r1[:-len(suffix)]
+                        r2 = r2[:-len(suffix)]
+
+                        if word.endswith(("at", "bl", "iz")):
+                            word = "".join((word, "e"))
+                            r1 = "".join((r1, "e"))
+
+                            if len(word) > 5 or len(r1) >=3:
+                                r2 = "".join((r2, "e"))
+
+                        elif word.endswith(self.__double_consonants):
+                            word = word[:-1]
+                            r1 = r1[:-1]
+                            r2 = r2[:-1]
+
+                        elif ((r1 == "" and len(word) >= 3 and
+                               word[-1] not in self.__vowels and
+                               word[-1] not in "wxY" and
+                               word[-2] in self.__vowels and
+                               word[-3] not in self.__vowels)
+                              or
+                              (r1 == "" and len(word) == 2 and
+                               word[0] in self.__vowels and
+                               word[1] not in self.__vowels)):
+
+                            word = "".join((word, "e"))
+
+                            if len(r1) > 0:
+                                r1 = "".join((r1, "e"))
+
+                            if len(r2) > 0:
+                                r2 = "".join((r2, "e"))
+                break
+
+        # STEP 1c
+        if len(word) > 2 and word[-1] in "yY" and word[-2] not in self.__vowels:
+            word = "".join((word[:-1], "i"))
+            if len(r1) >= 1:
+                r1 = "".join((r1[:-1], "i"))
+            else:
+                r1 = ""
+
+            if len(r2) >= 1:
+                r2 = "".join((r2[:-1], "i"))
+            else:
+                r2 = ""
+
+        # STEP 2
+        for suffix in self.__step2_suffixes:
+            if word.endswith(suffix):
+                if r1.endswith(suffix):
+                    if suffix == "tional":
+                        word = word[:-2]
+                        r1 = r1[:-2]
+                        r2 = r2[:-2]
+
+                    elif suffix in ("enci", "anci", "abli"):
+                        word = "".join((word[:-1], "e"))
+
+                        if len(r1) >= 1:
+                            r1 = "".join((r1[:-1], "e"))
+                        else:
+                            r1 = ""
+
+                        if len(r2) >= 1:
+                            r2 = "".join((r2[:-1], "e"))
+                        else:
+                            r2 = ""
+
+                    elif suffix == "entli":
+                        word = word[:-2]
+                        r1 = r1[:-2]
+                        r2 = r2[:-2]
+
+                    elif suffix in ("izer", "ization"):
+                        word = "".join((word[:-len(suffix)], "ize"))
+
+                        if len(r1) >= len(suffix):
+                            r1 = "".join((r1[:-len(suffix)], "ize"))
+                        else:
+                            r1 = ""
+
+                        if len(r2) >= len(suffix):
+                            r2 = "".join((r2[:-len(suffix)], "ize"))
+                        else:
+                            r2 = ""
+
+                    elif suffix in ("ational", "ation", "ator"):
+                        word = "".join((word[:-len(suffix)], "ate"))
+
+                        if len(r1) >= len(suffix):
+                            r1 = "".join((r1[:-len(suffix)], "ate"))
+                        else:
+                            r1 = ""
+
+                        if len(r2) >= len(suffix):
+                            r2 = "".join((r2[:-len(suffix)], "ate"))
+                        else:
+                            r2 = "e"
+
+                    elif suffix in ("alism", "aliti", "alli"):
+                        word = "".join((word[:-len(suffix)], "al"))
+
+                        if len(r1) >= len(suffix):
+                            r1 = "".join((r1[:-len(suffix)], "al"))
+                        else:
+                            r1 = ""
+
+                        if len(r2) >= len(suffix):
+                            r2 = "".join((r2[:-len(suffix)], "al"))
+                        else:
+                            r2 = ""
+
+                    elif suffix == "fulness":
+                        word = word[:-4]
+                        r1 = r1[:-4]
+                        r2 = r2[:-4]
+
+                    elif suffix in ("ousli", "ousness"):
+                        word = "".join((word[:-len(suffix)], "ous"))
+
+                        if len(r1) >= len(suffix):
+                            r1 = "".join((r1[:-len(suffix)], "ous"))
+                        else:
+                            r1 = ""
+
+                        if len(r2) >= len(suffix):
+                            r2 = "".join((r2[:-len(suffix)], "ous"))
+                        else:
+                            r2 = ""
+
+                    elif suffix in ("iveness", "iviti"):
+                        word = "".join((word[:-len(suffix)], "ive"))
+
+                        if len(r1) >= len(suffix):
+                            r1 = "".join((r1[:-len(suffix)], "ive"))
+                        else:
+                            r1 = ""
+
+                        if len(r2) >= len(suffix):
+                            r2 = "".join((r2[:-len(suffix)], "ive"))
+                        else:
+                            r2 = "e"
+
+                    elif suffix in ("biliti", "bli"):
+                        word = "".join((word[:-len(suffix)], "ble"))
+
+                        if len(r1) >= len(suffix):
+                            r1 = "".join((r1[:-len(suffix)], "ble"))
+                        else:
+                            r1 = ""
+
+                        if len(r2) >= len(suffix):
+                            r2 = "".join((r2[:-len(suffix)], "ble"))
+                        else:
+                            r2 = ""
+
+                    elif suffix == "ogi" and word[-4] == "l":
+                        word = word[:-1]
+                        r1 = r1[:-1]
+                        r2 = r2[:-1]
+
+                    elif suffix in ("fulli", "lessli"):
+                        word = word[:-2]
+                        r1 = r1[:-2]
+                        r2 = r2[:-2]
+
+                    elif suffix == "li" and word[-3] in self.__li_ending:
+                        word = word[:-2]
+                        r1 = r1[:-2]
+                        r2 = r2[:-2]
+                break
+
+        # STEP 3
+        for suffix in self.__step3_suffixes:
+            if word.endswith(suffix):
+                if r1.endswith(suffix):
+                    if suffix == "tional":
+                        word = word[:-2]
+                        r1 = r1[:-2]
+                        r2 = r2[:-2]
+
+                    elif suffix == "ational":
+                        word = "".join((word[:-len(suffix)], "ate"))
+
+                        if len(r1) >= len(suffix):
+                            r1 = "".join((r1[:-len(suffix)], "ate"))
+                        else:
+                            r1 = ""
+
+                        if len(r2) >= len(suffix):
+                            r2 = "".join((r2[:-len(suffix)], "ate"))
+                        else:
+                            r2 = ""
+
+                    elif suffix == "alize":
+                        word = word[:-3]
+                        r1 = r1[:-3]
+                        r2 = r2[:-3]
+
+                    elif suffix in ("icate", "iciti", "ical"):
+                        word = "".join((word[:-len(suffix)], "ic"))
+
+                        if len(r1) >= len(suffix):
+                            r1 = "".join((r1[:-len(suffix)], "ic"))
+                        else:
+                            r1 = ""
+
+                        if len(r2) >= len(suffix):
+                            r2 = "".join((r2[:-len(suffix)], "ic"))
+                        else:
+                            r2 = ""
+
+                    elif suffix in ("ful", "ness"):
+                        word = word[:-len(suffix)]
+                        r1 = r1[:-len(suffix)]
+                        r2 = r2[:-len(suffix)]
+
+                    elif suffix == "ative" and r2.endswith(suffix):
+                        word = word[:-5]
+                        r1 = r1[:-5]
+                        r2 = r2[:-5]
+                break
+
+        # STEP 4
+        for suffix in self.__step4_suffixes:
+            if word.endswith(suffix):
+                if r2.endswith(suffix):
+                    if suffix == "ion":
+                        if word[-4] in "st":
+                            word = word[:-3]
+                            r1 = r1[:-3]
+                            r2 = r2[:-3]
+                    else:
+                        word = word[:-len(suffix)]
+                        r1 = r1[:-len(suffix)]
+                        r2 = r2[:-len(suffix)]
+                break
+
+        # STEP 5
+        if r2.endswith("l") and word[-2] == "l":
+            word = word[:-1]
+        elif r2.endswith("e"):
+            word = word[:-1]
+        elif r1.endswith("e"):
+            if len(word) >= 4 and (word[-2] in self.__vowels or
+                                   word[-2] in "wxY" or
+                                   word[-3] not in self.__vowels or
+                                   word[-4] in self.__vowels):
+                word = word[:-1]
+
+
+        word = word.replace("Y", "y")
+
+
+        return word
+
+
+
+class FinnishStemmer(_StandardStemmer):
+
+    """
+    The Finnish Snowball stemmer.
+
+    :cvar __vowels: The Finnish vowels.
+    :type __vowels: unicode
+    :cvar __restricted_vowels: A subset of the Finnish vowels.
+    :type __restricted_vowels: unicode
+    :cvar __long_vowels: The Finnish vowels in their long forms.
+    :type __long_vowels: tuple
+    :cvar __consonants: The Finnish consonants.
+    :type __consonants: unicode
+    :cvar __double_consonants: The Finnish double consonants.
+    :type __double_consonants: tuple
+    :cvar __step1_suffixes: Suffixes to be deleted in step 1 of the algorithm.
+    :type __step1_suffixes: tuple
+    :cvar __step2_suffixes: Suffixes to be deleted in step 2 of the algorithm.
+    :type __step2_suffixes: tuple
+    :cvar __step3_suffixes: Suffixes to be deleted in step 3 of the algorithm.
+    :type __step3_suffixes: tuple
+    :cvar __step4_suffixes: Suffixes to be deleted in step 4 of the algorithm.
+    :type __step4_suffixes: tuple
+    :note: A detailed description of the Finnish
+           stemming algorithm can be found under
+           http://snowball.tartarus.org/algorithms/finnish/stemmer.html
+    """
+
+    __vowels = "aeiouy\xE4\xF6"
+    __restricted_vowels = "aeiou\xE4\xF6"
+    __long_vowels = ("aa", "ee", "ii", "oo", "uu", "\xE4\xE4",
+                     "\xF6\xF6")
+    __consonants = "bcdfghjklmnpqrstvwxz"
+    __double_consonants = ("bb", "cc", "dd", "ff", "gg", "hh", "jj",
+                           "kk", "ll", "mm", "nn", "pp", "qq", "rr",
+                           "ss", "tt", "vv", "ww", "xx", "zz")
+    __step1_suffixes = ('kaan', 'k\xE4\xE4n', 'sti', 'kin', 'han',
+                        'h\xE4n', 'ko', 'k\xF6', 'pa', 'p\xE4')
+    __step2_suffixes = ('nsa', 'ns\xE4', 'mme', 'nne', 'si', 'ni',
+                        'an', '\xE4n', 'en')
+    __step3_suffixes = ('siin', 'tten', 'seen', 'han', 'hen', 'hin',
+                        'hon', 'h\xE4n', 'h\xF6n', 'den', 'tta',
+                        'tt\xE4', 'ssa', 'ss\xE4', 'sta',
+                        'st\xE4', 'lla', 'll\xE4', 'lta',
+                        'lt\xE4', 'lle', 'ksi', 'ine', 'ta',
+                        't\xE4', 'na', 'n\xE4', 'a', '\xE4',
+                        'n')
+    __step4_suffixes = ('impi', 'impa', 'imp\xE4', 'immi', 'imma',
+                        'imm\xE4', 'mpi', 'mpa', 'mp\xE4', 'mmi',
+                        'mma', 'mm\xE4', 'eja', 'ej\xE4')
+
+    def stem(self, word):
+        """
+        Stem a Finnish word and return the stemmed form.
+
+        :param word: The word that is stemmed.
+        :type word: str or unicode
+        :return: The stemmed form.
+        :rtype: unicode
+
+        """
+        word = word.lower()
+
+        step3_success = False
+
+        r1, r2 = self._r1r2_standard(word, self.__vowels)
+
+        # STEP 1: Particles etc.
+        for suffix in self.__step1_suffixes:
+            if r1.endswith(suffix):
+                if suffix == "sti":
+                    if suffix in r2:
+                        word = word[:-3]
+                        r1 = r1[:-3]
+                        r2 = r2[:-3]
+                else:
+                    if word[-len(suffix)-1] in "ntaeiouy\xE4\xF6":
+                        word = word[:-len(suffix)]
+                        r1 = r1[:-len(suffix)]
+                        r2 = r2[:-len(suffix)]
+                break
+
+        # STEP 2: Possessives
+        for suffix in self.__step2_suffixes:
+            if r1.endswith(suffix):
+                if suffix == "si":
+                    if word[-3] != "k":
+                        word = word[:-2]
+                        r1 = r1[:-2]
+                        r2 = r2[:-2]
+
+                elif suffix == "ni":
+                    word = word[:-2]
+                    r1 = r1[:-2]
+                    r2 = r2[:-2]
+                    if word.endswith("kse"):
+                        word = "".join((word[:-3], "ksi"))
+
+                    if r1.endswith("kse"):
+                        r1 = "".join((r1[:-3], "ksi"))
+
+                    if r2.endswith("kse"):
+                        r2 = "".join((r2[:-3], "ksi"))
+
+                elif suffix == "an":
+                    if (word[-4:-2] in ("ta", "na") or
+                        word[-5:-2] in ("ssa", "sta", "lla", "lta")):
+                        word = word[:-2]
+                        r1 = r1[:-2]
+                        r2 = r2[:-2]
+
+                elif suffix == "\xE4n":
+                    if (word[-4:-2] in ("t\xE4", "n\xE4") or
+                        word[-5:-2] in ("ss\xE4", "st\xE4",
+                                        "ll\xE4", "lt\xE4")):
+                        word = word[:-2]
+                        r1 = r1[:-2]
+                        r2 = r2[:-2]
+
+                elif suffix == "en":
+                    if word[-5:-2] in ("lle", "ine"):
+                        word = word[:-2]
+                        r1 = r1[:-2]
+                        r2 = r2[:-2]
+                else:
+                    word = word[:-3]
+                    r1 = r1[:-3]
+                    r2 = r2[:-3]
+                break
+
+        # STEP 3: Cases
+        for suffix in self.__step3_suffixes:
+            if r1.endswith(suffix):
+                if suffix in ("han", "hen", "hin", "hon", "h\xE4n",
+                              "h\xF6n"):
+                    if ((suffix == "han" and word[-4] == "a") or
+                        (suffix == "hen" and word[-4] == "e") or
+                        (suffix == "hin" and word[-4] == "i") or
+                        (suffix == "hon" and word[-4] == "o") or
+                        (suffix == "h\xE4n" and word[-4] == "\xE4") or
+                        (suffix == "h\xF6n" and word[-4] == "\xF6")):
+                        word = word[:-3]
+                        r1 = r1[:-3]
+                        r2 = r2[:-3]
+                        step3_success = True
+
+                elif suffix in ("siin", "den", "tten"):
+                    if (word[-len(suffix)-1] == "i" and
+                        word[-len(suffix)-2] in self.__restricted_vowels):
+                        word = word[:-len(suffix)]
+                        r1 = r1[:-len(suffix)]
+                        r2 = r2[:-len(suffix)]
+                        step3_success = True
+                    else:
+                        continue
+
+                elif suffix == "seen":
+                    if word[-6:-4] in self.__long_vowels:
+                        word = word[:-4]
+                        r1 = r1[:-4]
+                        r2 = r2[:-4]
+                        step3_success = True
+                    else:
+                        continue
+
+                elif suffix in ("a", "\xE4"):
+                    if word[-2] in self.__vowels and word[-3] in self.__consonants:
+                        word = word[:-1]
+                        r1 = r1[:-1]
+                        r2 = r2[:-1]
+                        step3_success = True
+
+                elif suffix in ("tta", "tt\xE4"):
+                    if word[-4] == "e":
+                        word = word[:-3]
+                        r1 = r1[:-3]
+                        r2 = r2[:-3]
+                        step3_success = True
+
+                elif suffix == "n":
+                    word = word[:-1]
+                    r1 = r1[:-1]
+                    r2 = r2[:-1]
+                    step3_success = True
+
+                    if word[-2:] == "ie" or word[-2:] in self.__long_vowels:
+                        word = word[:-1]
+                        r1 = r1[:-1]
+                        r2 = r2[:-1]
+                else:
+                    word = word[:-len(suffix)]
+                    r1 = r1[:-len(suffix)]
+                    r2 = r2[:-len(suffix)]
+                    step3_success = True
+                break
+
+        # STEP 4: Other endings
+        for suffix in self.__step4_suffixes:
+            if r2.endswith(suffix):
+                if suffix in ("mpi", "mpa", "mp\xE4", "mmi", "mma",
+                              "mm\xE4"):
+                    if word[-5:-3] != "po":
+                        word = word[:-3]
+                        r1 = r1[:-3]
+                        r2 = r2[:-3]
+                else:
+                    word = word[:-len(suffix)]
+                    r1 = r1[:-len(suffix)]
+                    r2 = r2[:-len(suffix)]
+                break
+
+        # STEP 5: Plurals
+        if step3_success and len(r1) >= 1 and r1[-1] in "ij":
+            word = word[:-1]
+            r1 = r1[:-1]
+
+        elif (not step3_success and len(r1) >= 2 and
+              r1[-1] == "t" and r1[-2] in self.__vowels):
+            word = word[:-1]
+            r1 = r1[:-1]
+            r2 = r2[:-1]
+            if r2.endswith("imma"):
+                word = word[:-4]
+                r1 = r1[:-4]
+            elif r2.endswith("mma") and r2[-5:-3] != "po":
+                word = word[:-3]
+                r1 = r1[:-3]
+
+        # STEP 6: Tidying up
+        if r1[-2:] in self.__long_vowels:
+            word = word[:-1]
+            r1 = r1[:-1]
+
+        if (len(r1) >= 2 and r1[-2] in self.__consonants and
+            r1[-1] in "a\xE4ei"):
+            word = word[:-1]
+            r1 = r1[:-1]
+
+        if r1.endswith(("oj", "uj")):
+            word = word[:-1]
+            r1 = r1[:-1]
+
+        if r1.endswith("jo"):
+            word = word[:-1]
+            r1 = r1[:-1]
+
+        # If the word ends with a double consonant
+        # followed by zero or more vowels, the last consonant is removed.
+        for i in range(1, len(word)):
+            if word[-i] in self.__vowels:
+                continue
+            else:
+                if i == 1:
+                    if word[-i-1:] in self.__double_consonants:
+                        word = word[:-1]
+                else:
+                    if word[-i-1:-i+1] in self.__double_consonants:
+                        word = "".join((word[:-i], word[-i+1:]))
+                break
+
+
+        return word
+
+
+
+class FrenchStemmer(_StandardStemmer):
+
+    """
+    The French Snowball stemmer.
+
+    :cvar __vowels: The French vowels.
+    :type __vowels: unicode
+    :cvar __step1_suffixes: Suffixes to be deleted in step 1 of the algorithm.
+    :type __step1_suffixes: tuple
+    :cvar __step2a_suffixes: Suffixes to be deleted in step 2a of the algorithm.
+    :type __step2a_suffixes: tuple
+    :cvar __step2b_suffixes: Suffixes to be deleted in step 2b of the algorithm.
+    :type __step2b_suffixes: tuple
+    :cvar __step4_suffixes: Suffixes to be deleted in step 4 of the algorithm.
+    :type __step4_suffixes: tuple
+    :note: A detailed description of the French
+           stemming algorithm can be found under
+           http://snowball.tartarus.org/algorithms/french/stemmer.html
+    """
+
+    __vowels = "aeiouy\xE2\xE0\xEB\xE9\xEA\xE8\xEF\xEE\xF4\xFB\xF9"
+    __step1_suffixes = ('issements', 'issement', 'atrices', 'atrice',
+                        'ateurs', 'ations', 'logies', 'usions',
+                        'utions', 'ements', 'amment', 'emment',
+                        'ances', 'iqUes', 'ismes', 'ables', 'istes',
+                        'ateur', 'ation', 'logie', 'usion', 'ution',
+                        'ences', 'ement', 'euses', 'ments', 'ance',
+                        'iqUe', 'isme', 'able', 'iste', 'ence',
+                        'it\xE9s', 'ives', 'eaux', 'euse', 'ment',
+                        'eux', 'it\xE9', 'ive', 'ifs', 'aux', 'if')
+    __step2a_suffixes = ('issaIent', 'issantes', 'iraIent', 'issante',
+                         'issants', 'issions', 'irions', 'issais',
+                         'issait', 'issant', 'issent', 'issiez', 'issons',
+                         'irais', 'irait', 'irent', 'iriez', 'irons',
+                         'iront', 'isses', 'issez', '\xEEmes',
+                         '\xEEtes', 'irai', 'iras', 'irez', 'isse',
+                         'ies', 'ira', '\xEEt', 'ie', 'ir', 'is',
+                         'it', 'i')
+    __step2b_suffixes = ('eraIent', 'assions', 'erions', 'assent',
+                         'assiez', '\xE8rent', 'erais', 'erait',
+                         'eriez', 'erons', 'eront', 'aIent', 'antes',
+                         'asses', 'ions', 'erai', 'eras', 'erez',
+                         '\xE2mes', '\xE2tes', 'ante', 'ants',
+                         'asse', '\xE9es', 'era', 'iez', 'ais',
+                         'ait', 'ant', '\xE9e', '\xE9s', 'er',
+                         'ez', '\xE2t', 'ai', 'as', '\xE9', 'a')
+    __step4_suffixes = ('i\xE8re', 'I\xE8re', 'ion', 'ier', 'Ier',
+                        'e', '\xEB')
+
+    def stem(self, word):
+        """
+        Stem a French word and return the stemmed form.
+
+        :param word: The word that is stemmed.
+        :type word: str or unicode
+        :return: The stemmed form.
+        :rtype: unicode
+
+        """
+        word = word.lower()
+
+        step1_success = False
+        rv_ending_found = False
+        step2a_success = False
+        step2b_success = False
+
+        # Every occurrence of 'u' after 'q' is put into upper case.
+        for i in range(1, len(word)):
+            if word[i-1] == "q" and word[i] == "u":
+                word = "".join((word[:i], "U", word[i+1:]))
+
+        # Every occurrence of 'u' and 'i'
+        # between vowels is put into upper case.
+        # Every occurrence of 'y' preceded or
+        # followed by a vowel is also put into upper case.
+        for i in range(1, len(word)-1):
+            if word[i-1] in self.__vowels and word[i+1] in self.__vowels:
+                if word[i] == "u":
+                    word = "".join((word[:i], "U", word[i+1:]))
+
+                elif word[i] == "i":
+                    word = "".join((word[:i], "I", word[i+1:]))
+
+            if word[i-1] in self.__vowels or word[i+1] in self.__vowels:
+                if word[i] == "y":
+                    word = "".join((word[:i], "Y", word[i+1:]))
+
+        r1, r2 = self._r1r2_standard(word, self.__vowels)
+        rv = self.__rv_french(word, self.__vowels)
+
+        # STEP 1: Standard suffix removal
+        for suffix in self.__step1_suffixes:
+            if word.endswith(suffix):
+                if suffix == "eaux":
+                    word = word[:-1]
+                    step1_success = True
+
+                elif suffix in ("euse", "euses"):
+                    if suffix in r2:
+                        word = word[:-len(suffix)]
+                        step1_success = True
+
+                    elif suffix in r1:
+                        word = "".join((word[:-len(suffix)], "eux"))
+                        step1_success = True
+
+                elif suffix in ("ement", "ements") and suffix in rv:
+                    word = word[:-len(suffix)]
+                    step1_success = True
+
+                    if word[-2:] == "iv" and "iv" in r2:
+                        word = word[:-2]
+
+                        if word[-2:] == "at" and "at" in r2:
+                            word = word[:-2]
+
+                    elif word[-3:] == "eus":
+                        if "eus" in r2:
+                            word = word[:-3]
+                        elif "eus" in r1:
+                            word = "".join((word[:-1], "x"))
+
+                    elif word[-3:] in ("abl", "iqU"):
+                        if "abl" in r2 or "iqU" in r2:
+                            word = word[:-3]
+
+                    elif word[-3:] in ("i\xE8r", "I\xE8r"):
+                        if "i\xE8r" in rv or "I\xE8r" in rv:
+                            word = "".join((word[:-3], "i"))
+
+                elif suffix == "amment" and suffix in rv:
+                    word = "".join((word[:-6], "ant"))
+                    rv = "".join((rv[:-6], "ant"))
+                    rv_ending_found = True
+
+                elif suffix == "emment" and suffix in rv:
+                    word = "".join((word[:-6], "ent"))
+                    rv_ending_found = True
+
+                elif (suffix in ("ment", "ments") and suffix in rv and
+                      not rv.startswith(suffix) and
+                      rv[rv.rindex(suffix)-1] in self.__vowels):
+                    word = word[:-len(suffix)]
+                    rv = rv[:-len(suffix)]
+                    rv_ending_found = True
+
+                elif suffix == "aux" and suffix in r1:
+                    word = "".join((word[:-2], "l"))
+                    step1_success = True
+
+                elif (suffix in ("issement", "issements") and suffix in r1
+                      and word[-len(suffix)-1] not in self.__vowels):
+                    word = word[:-len(suffix)]
+                    step1_success = True
+
+                elif suffix in ("ance", "iqUe", "isme", "able", "iste",
+                              "eux", "ances", "iqUes", "ismes",
+                              "ables", "istes") and suffix in r2:
+                    word = word[:-len(suffix)]
+                    step1_success = True
+
+                elif suffix in ("atrice", "ateur", "ation", "atrices",
+                                "ateurs", "ations") and suffix in r2:
+                    word = word[:-len(suffix)]
+                    step1_success = True
+
+                    if word[-2:] == "ic":
+                        if "ic" in r2:
+                            word = word[:-2]
+                        else:
+                            word = "".join((word[:-2], "iqU"))
+
+                elif suffix in ("logie", "logies") and suffix in r2:
+                    word = "".join((word[:-len(suffix)], "log"))
+                    step1_success = True
+
+                elif (suffix in ("usion", "ution", "usions", "utions") and
+                      suffix in r2):
+                    word = "".join((word[:-len(suffix)], "u"))
+                    step1_success = True
+
+                elif suffix in ("ence", "ences") and suffix in r2:
+                    word = "".join((word[:-len(suffix)], "ent"))
+                    step1_success = True
+
+                elif suffix in ("it\xE9", "it\xE9s") and suffix in r2:
+                    word = word[:-len(suffix)]
+                    step1_success = True
+
+                    if word[-4:] == "abil":
+                        if "abil" in r2:
+                            word = word[:-4]
+                        else:
+                            word = "".join((word[:-2], "l"))
+
+                    elif word[-2:] == "ic":
+                        if "ic" in r2:
+                            word = word[:-2]
+                        else:
+                            word = "".join((word[:-2], "iqU"))
+
+                    elif word[-2:] == "iv":
+                        if "iv" in r2:
+                            word = word[:-2]
+
+                elif (suffix in ("if", "ive", "ifs", "ives") and
+                      suffix in r2):
+                    word = word[:-len(suffix)]
+                    step1_success = True
+
+                    if word[-2:] == "at" and "at" in r2:
+                        word = word[:-2]
+
+                        if word[-2:] == "ic":
+                            if "ic" in r2:
+                                word = word[:-2]
+                            else:
+                                word = "".join((word[:-2], "iqU"))
+                break
+
+        # STEP 2a: Verb suffixes beginning 'i'
+        if not step1_success or rv_ending_found:
+            for suffix in self.__step2a_suffixes:
+                if word.endswith(suffix):
+                    if (suffix in rv and len(rv) > len(suffix) and
+                        rv[rv.rindex(suffix)-1] not in self.__vowels):
+                        word = word[:-len(suffix)]
+                        step2a_success = True
+                    break
+
+        # STEP 2b: Other verb suffixes
+            if not step2a_success:
+                for suffix in self.__step2b_suffixes:
+                    if rv.endswith(suffix):
+                        if suffix == "ions" and "ions" in r2:
+                            word = word[:-4]
+                            step2b_success = True
+
+                        elif suffix in ('eraIent', 'erions', '\xE8rent',
+                                        'erais', 'erait', 'eriez',
+                                        'erons', 'eront', 'erai', 'eras',
+                                        'erez', '\xE9es', 'era', 'iez',
+                                        '\xE9e', '\xE9s', 'er', 'ez',
+                                        '\xE9'):
+                            word = word[:-len(suffix)]
+                            step2b_success = True
+
+                        elif suffix in ('assions', 'assent', 'assiez',
+                                        'aIent', 'antes', 'asses',
+                                        '\xE2mes', '\xE2tes', 'ante',
+                                        'ants', 'asse', 'ais', 'ait',
+                                        'ant', '\xE2t', 'ai', 'as',
+                                        'a'):
+                            word = word[:-len(suffix)]
+                            rv = rv[:-len(suffix)]
+                            step2b_success = True
+                            if rv.endswith("e"):
+                                word = word[:-1]
+                        break
+
+        # STEP 3
+        if step1_success or step2a_success or step2b_success:
+            if word[-1] == "Y":
+                word = "".join((word[:-1], "i"))
+            elif word[-1] == "\xE7":
+                word = "".join((word[:-1], "c"))
+
+        # STEP 4: Residual suffixes
+        else:
+            if (len(word) >= 2 and word[-1] == "s" and
+                word[-2] not in "aiou\xE8s"):
+                word = word[:-1]
+
+            for suffix in self.__step4_suffixes:
+                if word.endswith(suffix):
+                    if suffix in rv:
+                        if (suffix == "ion" and suffix in r2 and
+                            rv[-4] in "st"):
+                            word = word[:-3]
+
+                        elif suffix in ("ier", "i\xE8re", "Ier",
+                                        "I\xE8re"):
+                            word = "".join((word[:-len(suffix)], "i"))
+
+                        elif suffix == "e":
+                            word = word[:-1]
+
+                        elif suffix == "\xEB" and word[-3:-1] == "gu":
+                            word = word[:-1]
+                        break
+
+        # STEP 5: Undouble
+        if word.endswith(("enn", "onn", "ett", "ell", "eill")):
+            word = word[:-1]
+
+        # STEP 6: Un-accent
+        for i in range(1, len(word)):
+            if word[-i] not in self.__vowels:
+                i += 1
+            else:
+                if i != 1 and word[-i] in ("\xE9", "\xE8"):
+                    word = "".join((word[:-i], "e", word[-i+1:]))
+                break
+
+        word = (word.replace("I", "i")
+                    .replace("U", "u")
+                    .replace("Y", "y"))
+
+
+        return word
+
+
+
+    def __rv_french(self, word, vowels):
+        """
+        Return the region RV that is used by the French stemmer.
+
+        If the word begins with two vowels, RV is the region after
+        the third letter. Otherwise, it is the region after the first
+        vowel not at the beginning of the word, or the end of the word
+        if these positions cannot be found. (Exceptionally, u'par',
+        u'col' or u'tap' at the beginning of a word is also taken to
+        define RV as the region to their right.)
+
+        :param word: The French word whose region RV is determined.
+        :type word: str or unicode
+        :param vowels: The French vowels that are used to determine
+                       the region RV.
+        :type vowels: unicode
+        :return: the region RV for the respective French word.
+        :rtype: unicode
+        :note: This helper method is invoked by the stem method of
+               the subclass FrenchStemmer. It is not to be invoked directly!
+
+        """
+        rv = ""
+        if len(word) >= 2:
+            if (word.startswith(("par", "col", "tap")) or
+                (word[0] in vowels and word[1] in vowels)):
+                rv = word[3:]
+            else:
+                for i in range(1, len(word)):
+                    if word[i] in vowels:
+                        rv = word[i+1:]
+                        break
+
+        return rv
+
+
+
+class GermanStemmer(_StandardStemmer):
+
+    """
+    The German Snowball stemmer.
+
+    :cvar __vowels: The German vowels.
+    :type __vowels: unicode
+    :cvar __s_ending: Letters that may directly appear before a word final 's'.
+    :type __s_ending: unicode
+    :cvar __st_ending: Letter that may directly appear before a word final 'st'.
+    :type __st_ending: unicode
+    :cvar __step1_suffixes: Suffixes to be deleted in step 1 of the algorithm.
+    :type __step1_suffixes: tuple
+    :cvar __step2_suffixes: Suffixes to be deleted in step 2 of the algorithm.
+    :type __step2_suffixes: tuple
+    :cvar __step3_suffixes: Suffixes to be deleted in step 3 of the algorithm.
+    :type __step3_suffixes: tuple
+    :note: A detailed description of the German
+           stemming algorithm can be found under
+           http://snowball.tartarus.org/algorithms/german/stemmer.html
+
+    """
+
+    __vowels = "aeiouy\xE4\xF6\xFC"
+    __s_ending = "bdfghklmnrt"
+    __st_ending = "bdfghklmnt"
+
+    __step1_suffixes = ("ern", "em", "er", "en", "es", "e", "s")
+    __step2_suffixes = ("est", "en", "er", "st")
+    __step3_suffixes = ("isch", "lich", "heit", "keit",
+                          "end", "ung", "ig", "ik")
+
+    def stem(self, word):
+        """
+        Stem a German word and return the stemmed form.
+
+        :param word: The word that is stemmed.
+        :type word: str or unicode
+        :return: The stemmed form.
+        :rtype: unicode
+
+        """
+        word = word.lower()
+
+        word = word.replace("\xDF", "ss")
+
+        # Every occurrence of 'u' and 'y'
+        # between vowels is put into upper case.
+        for i in range(1, len(word)-1):
+            if word[i-1] in self.__vowels and word[i+1] in self.__vowels:
+                if word[i] == "u":
+                    word = "".join((word[:i], "U", word[i+1:]))
+
+                elif word[i] == "y":
+                    word = "".join((word[:i], "Y", word[i+1:]))
+
+        r1, r2 = self._r1r2_standard(word, self.__vowels)
+
+        # R1 is adjusted so that the region before it
+        # contains at least 3 letters.
+        for i in range(1, len(word)):
+            if word[i] not in self.__vowels and word[i-1] in self.__vowels:
+                if len(word[:i+1]) < 3 and len(word[:i+1]) > 0:
+                    r1 = word[3:]
+                elif len(word[:i+1]) == 0:
+                    return word
+                break
+
+        # STEP 1
+        for suffix in self.__step1_suffixes:
+            if r1.endswith(suffix):
+                if (suffix in ("en", "es", "e") and
+                    word[-len(suffix)-4:-len(suffix)] == "niss"):
+                    word = word[:-len(suffix)-1]
+                    r1 = r1[:-len(suffix)-1]
+                    r2 = r2[:-len(suffix)-1]
+
+                elif suffix == "s":
+                    if word[-2] in self.__s_ending:
+                        word = word[:-1]
+                        r1 = r1[:-1]
+                        r2 = r2[:-1]
+                else:
+                    word = word[:-len(suffix)]
+                    r1 = r1[:-len(suffix)]
+                    r2 = r2[:-len(suffix)]
+                break
+
+        # STEP 2
+        for suffix in self.__step2_suffixes:
+            if r1.endswith(suffix):
+                if suffix == "st":
+                    if word[-3] in self.__st_ending and len(word[:-3]) >= 3:
+                        word = word[:-2]
+                        r1 = r1[:-2]
+                        r2 = r2[:-2]
+                else:
+                    word = word[:-len(suffix)]
+                    r1 = r1[:-len(suffix)]
+                    r2 = r2[:-len(suffix)]
+                break
+
+        # STEP 3: Derivational suffixes
+        for suffix in self.__step3_suffixes:
+            if r2.endswith(suffix):
+                if suffix in ("end", "ung"):
+                    if ("ig" in r2[-len(suffix)-2:-len(suffix)] and
+                        "e" not in r2[-len(suffix)-3:-len(suffix)-2]):
+                        word = word[:-len(suffix)-2]
+                    else:
+                        word = word[:-len(suffix)]
+
+                elif (suffix in ("ig", "ik", "isch") and
+                      "e" not in r2[-len(suffix)-1:-len(suffix)]):
+                    word = word[:-len(suffix)]
+
+                elif suffix in ("lich", "heit"):
+                    if ("er" in r1[-len(suffix)-2:-len(suffix)] or
+                        "en" in r1[-len(suffix)-2:-len(suffix)]):
+                        word = word[:-len(suffix)-2]
+                    else:
+                        word = word[:-len(suffix)]
+
+                elif suffix == "keit":
+                    if "lich" in r2[-len(suffix)-4:-len(suffix)]:
+                        word = word[:-len(suffix)-4]
+
+                    elif "ig" in r2[-len(suffix)-2:-len(suffix)]:
+                        word = word[:-len(suffix)-2]
+                    else:
+                        word = word[:-len(suffix)]
+                break
+
+        # Umlaut accents are removed and
+        # 'u' and 'y' are put back into lower case.
+        word = (word.replace("\xE4", "a").replace("\xF6", "o")
+                    .replace("\xFC", "u").replace("U", "u")
+                    .replace("Y", "y"))
+
+
+        return word
+
+
+
+class HungarianStemmer(_LanguageSpecificStemmer):
+
+    """
+    The Hungarian Snowball stemmer.
+
+    :cvar __vowels: The Hungarian vowels.
+    :type __vowels: unicode
+    :cvar __digraphs: The Hungarian digraphs.
+    :type __digraphs: tuple
+    :cvar __double_consonants: The Hungarian double consonants.
+    :type __double_consonants: tuple
+    :cvar __step1_suffixes: Suffixes to be deleted in step 1 of the algorithm.
+    :type __step1_suffixes: tuple
+    :cvar __step2_suffixes: Suffixes to be deleted in step 2 of the algorithm.
+    :type __step2_suffixes: tuple
+    :cvar __step3_suffixes: Suffixes to be deleted in step 3 of the algorithm.
+    :type __step3_suffixes: tuple
+    :cvar __step4_suffixes: Suffixes to be deleted in step 4 of the algorithm.
+    :type __step4_suffixes: tuple
+    :cvar __step5_suffixes: Suffixes to be deleted in step 5 of the algorithm.
+    :type __step5_suffixes: tuple
+    :cvar __step6_suffixes: Suffixes to be deleted in step 6 of the algorithm.
+    :type __step6_suffixes: tuple
+    :cvar __step7_suffixes: Suffixes to be deleted in step 7 of the algorithm.
+    :type __step7_suffixes: tuple
+    :cvar __step8_suffixes: Suffixes to be deleted in step 8 of the algorithm.
+    :type __step8_suffixes: tuple
+    :cvar __step9_suffixes: Suffixes to be deleted in step 9 of the algorithm.
+    :type __step9_suffixes: tuple
+    :note: A detailed description of the Hungarian
+           stemming algorithm can be found under
+           http://snowball.tartarus.org/algorithms/hungarian/stemmer.html
+
+    """
+
+    __vowels = "aeiou\xF6\xFC\xE1\xE9\xED\xF3\xF5\xFA\xFB"
+    __digraphs = ("cs", "dz", "dzs", "gy", "ly", "ny", "ty", "zs")
+    __double_consonants = ("bb", "cc", "ccs", "dd", "ff", "gg",
+                             "ggy", "jj", "kk", "ll", "lly", "mm",
+                             "nn", "nny", "pp", "rr", "ss", "ssz",
+                             "tt", "tty", "vv", "zz", "zzs")
+
+    __step1_suffixes = ("al", "el")
+    __step2_suffixes = ('k\xE9ppen', 'onk\xE9nt', 'enk\xE9nt',
+                        'ank\xE9nt', 'k\xE9pp', 'k\xE9nt', 'ban',
+                        'ben', 'nak', 'nek', 'val', 'vel', 't\xF3l',
+                        't\xF5l', 'r\xF3l', 'r\xF5l', 'b\xF3l',
+                        'b\xF5l', 'hoz', 'hez', 'h\xF6z',
+                        'n\xE1l', 'n\xE9l', '\xE9rt', 'kor',
+                        'ba', 'be', 'ra', 're', 'ig', 'at', 'et',
+                        'ot', '\xF6t', 'ul', '\xFCl', 'v\xE1',
+                        'v\xE9', 'en', 'on', 'an', '\xF6n',
+                        'n', 't')
+    __step3_suffixes = ("\xE1nk\xE9nt", "\xE1n", "\xE9n")
+    __step4_suffixes = ('astul', 'est\xFCl', '\xE1stul',
+                        '\xE9st\xFCl', 'stul', 'st\xFCl')
+    __step5_suffixes = ("\xE1", "\xE9")
+    __step6_suffixes = ('ok\xE9', '\xF6k\xE9', 'ak\xE9',
+                        'ek\xE9', '\xE1k\xE9', '\xE1\xE9i',
+                        '\xE9k\xE9', '\xE9\xE9i', 'k\xE9',
+                        '\xE9i', '\xE9\xE9', '\xE9')
+    __step7_suffixes = ('\xE1juk', '\xE9j\xFCk', '\xFCnk',
+                        'unk', 'juk', 'j\xFCk', '\xE1nk',
+                        '\xE9nk', 'nk', 'uk', '\xFCk', 'em',
+                        'om', 'am', 'od', 'ed', 'ad', '\xF6d',
+                        'ja', 'je', '\xE1m', '\xE1d', '\xE9m',
+                        '\xE9d', 'm', 'd', 'a', 'e', 'o',
+                        '\xE1', '\xE9')
+    __step8_suffixes = ('jaitok', 'jeitek', 'jaink', 'jeink', 'aitok',
+                        'eitek', '\xE1itok', '\xE9itek', 'jaim',
+                        'jeim', 'jaid', 'jeid', 'eink', 'aink',
+                        'itek', 'jeik', 'jaik', '\xE1ink',
+                        '\xE9ink', 'aim', 'eim', 'aid', 'eid',
+                        'jai', 'jei', 'ink', 'aik', 'eik',
+                        '\xE1im', '\xE1id', '\xE1ik', '\xE9im',
+                        '\xE9id', '\xE9ik', 'im', 'id', 'ai',
+                        'ei', 'ik', '\xE1i', '\xE9i', 'i')
+    __step9_suffixes = ("\xE1k", "\xE9k", "\xF6k", "ok",
+                        "ek", "ak", "k")
+
+    def stem(self, word):
+        """
+        Stem an Hungarian word and return the stemmed form.
+
+        :param word: The word that is stemmed.
+        :type word: str or unicode
+        :return: The stemmed form.
+        :rtype: unicode
+
+        """
+        word = word.lower()
+
+        r1 = self.__r1_hungarian(word, self.__vowels, self.__digraphs)
+
+        # STEP 1: Remove instrumental case
+        if r1.endswith(self.__step1_suffixes):
+            for double_cons in self.__double_consonants:
+                if word[-2-len(double_cons):-2] == double_cons:
+                    word = "".join((word[:-4], word[-3]))
+
+                    if r1[-2-len(double_cons):-2] == double_cons:
+                        r1 = "".join((r1[:-4], r1[-3]))
+                    break
+
+        # STEP 2: Remove frequent cases
+        for suffix in self.__step2_suffixes:
+            if word.endswith(suffix):
+                if r1.endswith(suffix):
+                    word = word[:-len(suffix)]
+                    r1 = r1[:-len(suffix)]
+
+                    if r1.endswith("\xE1"):
+                        word = "".join((word[:-1], "a"))
+                        r1 = "".join((r1[:-1], "a"))
+
+                    elif r1.endswith("\xE9"):
+                        word = "".join((word[:-1], "e"))
+                        r1 = "".join((r1[:-1], "e"))
+                break
+
+        # STEP 3: Remove special cases
+        for suffix in self.__step3_suffixes:
+            if r1.endswith(suffix):
+                if suffix == "\xE9n":
+                    word = "".join((word[:-2], "e"))
+                    r1 = "".join((r1[:-2], "e"))
+                else:
+                    word = "".join((word[:-len(suffix)], "a"))
+                    r1 = "".join((r1[:-len(suffix)], "a"))
+                break
+
+        # STEP 4: Remove other cases
+        for suffix in self.__step4_suffixes:
+            if r1.endswith(suffix):
+                if suffix == "\xE1stul":
+                    word = "".join((word[:-5], "a"))
+                    r1 = "".join((r1[:-5], "a"))
+
+                elif suffix == "\xE9st\xFCl":
+                    word = "".join((word[:-5], "e"))
+                    r1 = "".join((r1[:-5], "e"))
+                else:
+                    word = word[:-len(suffix)]
+                    r1 = r1[:-len(suffix)]
+                break
+
+        # STEP 5: Remove factive case
+        for suffix in self.__step5_suffixes:
+            if r1.endswith(suffix):
+                for double_cons in self.__double_consonants:
+                    if word[-1-len(double_cons):-1] == double_cons:
+                        word = "".join((word[:-3], word[-2]))
+
+                        if r1[-1-len(double_cons):-1] == double_cons:
+                            r1 = "".join((r1[:-3], r1[-2]))
+                        break
+
+        # STEP 6: Remove owned
+        for suffix in self.__step6_suffixes:
+            if r1.endswith(suffix):
+                if suffix in ("\xE1k\xE9", "\xE1\xE9i"):
+                    word = "".join((word[:-3], "a"))
+                    r1 = "".join((r1[:-3], "a"))
+
+                elif suffix in ("\xE9k\xE9", "\xE9\xE9i",
+                                "\xE9\xE9"):
+                    word = "".join((word[:-len(suffix)], "e"))
+                    r1 = "".join((r1[:-len(suffix)], "e"))
+                else:
+                    word = word[:-len(suffix)]
+                    r1 = r1[:-len(suffix)]
+                break
+
+        # STEP 7: Remove singular owner suffixes
+        for suffix in self.__step7_suffixes:
+            if word.endswith(suffix):
+                if r1.endswith(suffix):
+                    if suffix in ("\xE1nk", "\xE1juk", "\xE1m",
+                                  "\xE1d", "\xE1"):
+                        word = "".join((word[:-len(suffix)], "a"))
+                        r1 = "".join((r1[:-len(suffix)], "a"))
+
+                    elif suffix in ("\xE9nk", "\xE9j\xFCk",
+                                    "\xE9m", "\xE9d", "\xE9"):
+                        word = "".join((word[:-len(suffix)], "e"))
+                        r1 = "".join((r1[:-len(suffix)], "e"))
+                    else:
+                        word = word[:-len(suffix)]
+                        r1 = r1[:-len(suffix)]
+                break
+
+        # STEP 8: Remove plural owner suffixes
+        for suffix in self.__step8_suffixes:
+            if word.endswith(suffix):
+                if r1.endswith(suffix):
+                    if suffix in ("\xE1im", "\xE1id", "\xE1i",
+                                  "\xE1ink", "\xE1itok", "\xE1ik"):
+                        word = "".join((word[:-len(suffix)], "a"))
+                        r1 = "".join((r1[:-len(suffix)], "a"))
+
+                    elif suffix in ("\xE9im", "\xE9id", "\xE9i",
+                                    "\xE9ink", "\xE9itek", "\xE9ik"):
+                        word = "".join((word[:-len(suffix)], "e"))
+                        r1 = "".join((r1[:-len(suffix)], "e"))
+                    else:
+                        word = word[:-len(suffix)]
+                        r1 = r1[:-len(suffix)]
+                break
+
+        # STEP 9: Remove plural suffixes
+        for suffix in self.__step9_suffixes:
+            if word.endswith(suffix):
+                if r1.endswith(suffix):
+                    if suffix == "\xE1k":
+                        word = "".join((word[:-2], "a"))
+                    elif suffix == "\xE9k":
+                        word = "".join((word[:-2], "e"))
+                    else:
+                        word = word[:-len(suffix)]
+                break
+
+
+        return word
+
+
+
+    def __r1_hungarian(self, word, vowels, digraphs):
+        """
+        Return the region R1 that is used by the Hungarian stemmer.
+
+        If the word begins with a vowel, R1 is defined as the region
+        after the first consonant or digraph (= two letters stand for
+        one phoneme) in the word. If the word begins with a consonant,
+        it is defined as the region after the first vowel in the word.
+        If the word does not contain both a vowel and consonant, R1
+        is the null region at the end of the word.
+
+        :param word: The Hungarian word whose region R1 is determined.
+        :type word: str or unicode
+        :param vowels: The Hungarian vowels that are used to determine
+                       the region R1.
+        :type vowels: unicode
+        :param digraphs: The digraphs that are used to determine the
+                         region R1.
+        :type digraphs: tuple
+        :return: the region R1 for the respective word.
+        :rtype: unicode
+        :note: This helper method is invoked by the stem method of the subclass
+               HungarianStemmer. It is not to be invoked directly!
+
+        """
+        r1 = ""
+        if word[0] in vowels:
+            for digraph in digraphs:
+                if digraph in word[1:]:
+                    r1 = word[word.index(digraph[-1])+1:]
+                    return r1
+
+            for i in range(1, len(word)):
+                if word[i] not in vowels:
+                    r1 = word[i+1:]
+                    break
+        else:
+            for i in range(1, len(word)):
+                if word[i] in vowels:
+                    r1 = word[i+1:]
+                    break
+
+        return r1
+
+
+
+class ItalianStemmer(_StandardStemmer):
+
+    """
+    The Italian Snowball stemmer.
+
+    :cvar __vowels: The Italian vowels.
+    :type __vowels: unicode
+    :cvar __step0_suffixes: Suffixes to be deleted in step 0 of the algorithm.
+    :type __step0_suffixes: tuple
+    :cvar __step1_suffixes: Suffixes to be deleted in step 1 of the algorithm.
+    :type __step1_suffixes: tuple
+    :cvar __step2_suffixes: Suffixes to be deleted in step 2 of the algorithm.
+    :type __step2_suffixes: tuple
+    :note: A detailed description of the Italian
+           stemming algorithm can be found under
+           http://snowball.tartarus.org/algorithms/italian/stemmer.html
+
+    """
+
+    __vowels = "aeiou\xE0\xE8\xEC\xF2\xF9"
+    __step0_suffixes = ('gliela', 'gliele', 'glieli', 'glielo',
+                        'gliene', 'sene', 'mela', 'mele', 'meli',
+                        'melo', 'mene', 'tela', 'tele', 'teli',
+                        'telo', 'tene', 'cela', 'cele', 'celi',
+                        'celo', 'cene', 'vela', 'vele', 'veli',
+                        'velo', 'vene', 'gli', 'ci', 'la', 'le',
+                        'li', 'lo', 'mi', 'ne', 'si', 'ti', 'vi')
+    __step1_suffixes = ('atrice', 'atrici', 'azione', 'azioni',
+                        'uzione', 'uzioni', 'usione', 'usioni',
+                        'amento', 'amenti', 'imento', 'imenti',
+                        'amente', 'abile', 'abili', 'ibile', 'ibili',
+                        'mente', 'atore', 'atori', 'logia', 'logie',
+                        'anza', 'anze', 'iche', 'ichi', 'ismo',
+                        'ismi', 'ista', 'iste', 'isti', 'ist\xE0',
+                        'ist\xE8', 'ist\xEC', 'ante', 'anti',
+                        'enza', 'enze', 'ico', 'ici', 'ica', 'ice',
+                        'oso', 'osi', 'osa', 'ose', 'it\xE0',
+                        'ivo', 'ivi', 'iva', 'ive')
+    __step2_suffixes = ('erebbero', 'irebbero', 'assero', 'assimo',
+                        'eranno', 'erebbe', 'eremmo', 'ereste',
+                        'eresti', 'essero', 'iranno', 'irebbe',
+                        'iremmo', 'ireste', 'iresti', 'iscano',
+                        'iscono', 'issero', 'arono', 'avamo', 'avano',
+                        'avate', 'eremo', 'erete', 'erono', 'evamo',
+                        'evano', 'evate', 'iremo', 'irete', 'irono',
+                        'ivamo', 'ivano', 'ivate', 'ammo', 'ando',
+                        'asse', 'assi', 'emmo', 'enda', 'ende',
+                        'endi', 'endo', 'erai', 'erei', 'Yamo',
+                        'iamo', 'immo', 'irai', 'irei', 'isca',
+                        'isce', 'isci', 'isco', 'ano', 'are', 'ata',
+                        'ate', 'ati', 'ato', 'ava', 'avi', 'avo',
+                        'er\xE0', 'ere', 'er\xF2', 'ete', 'eva',
+                        'evi', 'evo', 'ir\xE0', 'ire', 'ir\xF2',
+                        'ita', 'ite', 'iti', 'ito', 'iva', 'ivi',
+                        'ivo', 'ono', 'uta', 'ute', 'uti', 'uto',
+                        'ar', 'ir')
+
+    def stem(self, word):
+        """
+        Stem an Italian word and return the stemmed form.
+
+        :param word: The word that is stemmed.
+        :type word: str or unicode
+        :return: The stemmed form.
+        :rtype: unicode
+
+        """
+        word = word.lower()
+
+        step1_success = False
+
+        # All acute accents are replaced by grave accents.
+        word = (word.replace("\xE1", "\xE0")
+                    .replace("\xE9", "\xE8")
+                    .replace("\xED", "\xEC")
+                    .replace("\xF3", "\xF2")
+                    .replace("\xFA", "\xF9"))
+
+        # Every occurrence of 'u' after 'q'
+        # is put into upper case.
+        for i in range(1, len(word)):
+            if word[i-1] == "q" and word[i] == "u":
+                word = "".join((word[:i], "U", word[i+1:]))
+
+        # Every occurrence of 'u' and 'i'
+        # between vowels is put into upper case.
+        for i in range(1, len(word)-1):
+            if word[i-1] in self.__vowels and word[i+1] in self.__vowels:
+                if word[i] == "u":
+                    word = "".join((word[:i], "U", word[i+1:]))
+
+                elif word [i] == "i":
+                    word = "".join((word[:i], "I", word[i+1:]))
+
+        r1, r2 = self._r1r2_standard(word, self.__vowels)
+        rv = self._rv_standard(word, self.__vowels)
+
+        # STEP 0: Attached pronoun
+        for suffix in self.__step0_suffixes:
+            if rv.endswith(suffix):
+                if rv[-len(suffix)-4:-len(suffix)] in ("ando", "endo"):
+                    word = word[:-len(suffix)]
+                    r1 = r1[:-len(suffix)]
+                    r2 = r2[:-len(suffix)]
+                    rv = rv[:-len(suffix)]
+
+                elif (rv[-len(suffix)-2:-len(suffix)] in
+                      ("ar", "er", "ir")):
+                    word = "".join((word[:-len(suffix)], "e"))
+                    r1 = "".join((r1[:-len(suffix)], "e"))
+                    r2 = "".join((r2[:-len(suffix)], "e"))
+                    rv = "".join((rv[:-len(suffix)], "e"))
+                break
+
+        # STEP 1: Standard suffix removal
+        for suffix in self.__step1_suffixes:
+            if word.endswith(suffix):
+                if suffix == "amente" and r1.endswith(suffix):
+                    step1_success = True
+                    word = word[:-6]
+                    r2 = r2[:-6]
+                    rv = rv[:-6]
+
+                    if r2.endswith("iv"):
+                        word = word[:-2]
+                        r2 = r2[:-2]
+                        rv = rv[:-2]
+
+                        if r2.endswith("at"):
+                            word = word[:-2]
+                            rv = rv[:-2]
+
+                    elif r2.endswith(("os", "ic")):
+                        word = word[:-2]
+                        rv = rv[:-2]
+
+                    elif r2 .endswith("abil"):
+                        word = word[:-4]
+                        rv = rv[:-4]
+
+                elif (suffix in ("amento", "amenti",
+                                 "imento", "imenti") and
+                      rv.endswith(suffix)):
+                    step1_success = True
+                    word = word[:-6]
+                    rv = rv[:-6]
+
+                elif r2.endswith(suffix):
+                    step1_success = True
+                    if suffix in ("azione", "azioni", "atore", "atori"):
+                        word = word[:-len(suffix)]
+                        r2 = r2[:-len(suffix)]
+                        rv = rv[:-len(suffix)]
+
+                        if r2.endswith("ic"):
+                            word = word[:-2]
+                            rv = rv[:-2]
+
+                    elif suffix in ("logia", "logie"):
+                        word = word[:-2]
+                        rv = word[:-2]
+
+                    elif suffix in ("uzione", "uzioni",
+                                    "usione", "usioni"):
+                        word = word[:-5]
+                        rv = rv[:-5]
+
+                    elif suffix in ("enza", "enze"):
+                        word = "".join((word[:-2], "te"))
+                        rv = "".join((rv[:-2], "te"))
+
+                    elif suffix == "it\xE0":
+                        word = word[:-3]
+                        r2 = r2[:-3]
+                        rv = rv[:-3]
+
+                        if r2.endswith(("ic", "iv")):
+                            word = word[:-2]
+                            rv = rv[:-2]
+
+                        elif r2.endswith("abil"):
+                            word = word[:-4]
+                            rv = rv[:-4]
+
+                    elif suffix in ("ivo", "ivi", "iva", "ive"):
+                        word = word[:-3]
+                        r2 = r2[:-3]
+                        rv = rv[:-3]
+
+                        if r2.endswith("at"):
+                            word = word[:-2]
+                            r2 = r2[:-2]
+                            rv = rv[:-2]
+
+                            if r2.endswith("ic"):
+                                word = word[:-2]
+                                rv = rv[:-2]
+                    else:
+                        word = word[:-len(suffix)]
+                        rv = rv[:-len(suffix)]
+                break
+
+        # STEP 2: Verb suffixes
+        if not step1_success:
+            for suffix in self.__step2_suffixes:
+                if rv.endswith(suffix):
+                    word = word[:-len(suffix)]
+                    rv = rv[:-len(suffix)]
+                    break
+
+        # STEP 3a
+        if rv.endswith(("a", "e", "i", "o", "\xE0", "\xE8",
+                        "\xEC", "\xF2")):
+            word = word[:-1]
+            rv = rv[:-1]
+
+            if rv.endswith("i"):
+                word = word[:-1]
+                rv = rv[:-1]
+
+        # STEP 3b
+        if rv.endswith(("ch", "gh")):
+            word = word[:-1]
+
+        word = word.replace("I", "i").replace("U", "u")
+
+
+        return word
+
+
+
+class NorwegianStemmer(_ScandinavianStemmer):
+
+    """
+    The Norwegian Snowball stemmer.
+
+    :cvar __vowels: The Norwegian vowels.
+    :type __vowels: unicode
+    :cvar __s_ending: Letters that may directly appear before a word final 's'.
+    :type __s_ending: unicode
+    :cvar __step1_suffixes: Suffixes to be deleted in step 1 of the algorithm.
+    :type __step1_suffixes: tuple
+    :cvar __step2_suffixes: Suffixes to be deleted in step 2 of the algorithm.
+    :type __step2_suffixes: tuple
+    :cvar __step3_suffixes: Suffixes to be deleted in step 3 of the algorithm.
+    :type __step3_suffixes: tuple
+    :note: A detailed description of the Norwegian
+           stemming algorithm can be found under
+           http://snowball.tartarus.org/algorithms/norwegian/stemmer.html
+
+    """
+
+    __vowels = "aeiouy\xE6\xE5\xF8"
+    __s_ending = "bcdfghjlmnoprtvyz"
+    __step1_suffixes = ("hetenes", "hetene", "hetens", "heter",
+                        "heten", "endes", "ande", "ende", "edes",
+                        "enes", "erte", "ede", "ane", "ene", "ens",
+                        "ers", "ets", "het", "ast", "ert", "en",
+                        "ar", "er", "as", "es", "et", "a", "e", "s")
+
+    __step2_suffixes = ("dt", "vt")
+
+    __step3_suffixes = ("hetslov", "eleg", "elig", "elov", "slov",
+                          "leg", "eig", "lig", "els", "lov", "ig")
+
+    def stem(self, word):
+        """
+        Stem a Norwegian word and return the stemmed form.
+
+        :param word: The word that is stemmed.
+        :type word: str or unicode
+        :return: The stemmed form.
+        :rtype: unicode
+
+        """
+        word = word.lower()
+
+        r1 = self._r1_scandinavian(word, self.__vowels)
+
+        # STEP 1
+        for suffix in self.__step1_suffixes:
+            if r1.endswith(suffix):
+                if suffix in ("erte", "ert"):
+                    word = "".join((word[:-len(suffix)], "er"))
+                    r1 = "".join((r1[:-len(suffix)], "er"))
+
+                elif suffix == "s":
+                    if (word[-2] in self.__s_ending or
+                        (word[-2] == "k" and word[-3] not in self.__vowels)):
+                        word = word[:-1]
+                        r1 = r1[:-1]
+                else:
+                    word = word[:-len(suffix)]
+                    r1 = r1[:-len(suffix)]
+                break
+
+        # STEP 2
+        for suffix in self.__step2_suffixes:
+            if r1.endswith(suffix):
+                word = word[:-1]
+                r1 = r1[:-1]
+                break
+
+        # STEP 3
+        for suffix in self.__step3_suffixes:
+            if r1.endswith(suffix):
+                word = word[:-len(suffix)]
+                break
+
+
+        return word
+
+
+
+class PortugueseStemmer(_StandardStemmer):
+
+    """
+    The Portuguese Snowball stemmer.
+
+    :cvar __vowels: The Portuguese vowels.
+    :type __vowels: unicode
+    :cvar __step1_suffixes: Suffixes to be deleted in step 1 of the algorithm.
+    :type __step1_suffixes: tuple
+    :cvar __step2_suffixes: Suffixes to be deleted in step 2 of the algorithm.
+    :type __step2_suffixes: tuple
+    :cvar __step4_suffixes: Suffixes to be deleted in step 4 of the algorithm.
+    :type __step4_suffixes: tuple
+    :note: A detailed description of the Portuguese
+           stemming algorithm can be found under
+           http://snowball.tartarus.org/algorithms/portuguese/stemmer.html
+
+    """
+
+    __vowels = "aeiou\xE1\xE9\xED\xF3\xFA\xE2\xEA\xF4"
+    __step1_suffixes = ('amentos', 'imentos', 'uciones', 'amento',
+                        'imento', 'adoras', 'adores', 'a\xE7o~es',
+                        'log\xEDas', '\xEAncias', 'amente',
+                        'idades', 'ismos', 'istas', 'adora',
+                        'a\xE7a~o', 'antes', '\xE2ncia',
+                        'log\xEDa', 'uci\xF3n', '\xEAncia',
+                        'mente', 'idade', 'ezas', 'icos', 'icas',
+                        'ismo', '\xE1vel', '\xEDvel', 'ista',
+                        'osos', 'osas', 'ador', 'ante', 'ivas',
+                        'ivos', 'iras', 'eza', 'ico', 'ica',
+                        'oso', 'osa', 'iva', 'ivo', 'ira')
+    __step2_suffixes = ('ar\xEDamos', 'er\xEDamos', 'ir\xEDamos',
+                        '\xE1ssemos', '\xEAssemos', '\xEDssemos',
+                        'ar\xEDeis', 'er\xEDeis', 'ir\xEDeis',
+                        '\xE1sseis', '\xE9sseis', '\xEDsseis',
+                        '\xE1ramos', '\xE9ramos', '\xEDramos',
+                        '\xE1vamos', 'aremos', 'eremos', 'iremos',
+                        'ariam', 'eriam', 'iriam', 'assem', 'essem',
+                        'issem', 'ara~o', 'era~o', 'ira~o', 'arias',
+                        'erias', 'irias', 'ardes', 'erdes', 'irdes',
+                        'asses', 'esses', 'isses', 'astes', 'estes',
+                        'istes', '\xE1reis', 'areis', '\xE9reis',
+                        'ereis', '\xEDreis', 'ireis', '\xE1veis',
+                        '\xEDamos', 'armos', 'ermos', 'irmos',
+                        'aria', 'eria', 'iria', 'asse', 'esse',
+                        'isse', 'aste', 'este', 'iste', 'arei',
+                        'erei', 'irei', 'aram', 'eram', 'iram',
+                        'avam', 'arem', 'erem', 'irem',
+                        'ando', 'endo', 'indo', 'adas', 'idas',
+                        'ar\xE1s', 'aras', 'er\xE1s', 'eras',
+                        'ir\xE1s', 'avas', 'ares', 'eres', 'ires',
+                        '\xEDeis', 'ados', 'idos', '\xE1mos',
+                        'amos', 'emos', 'imos', 'iras', 'ada', 'ida',
+                        'ar\xE1', 'ara', 'er\xE1', 'era',
+                        'ir\xE1', 'ava', 'iam', 'ado', 'ido',
+                        'ias', 'ais', 'eis', 'ira', 'ia', 'ei', 'am',
+                        'em', 'ar', 'er', 'ir', 'as',
+                        'es', 'is', 'eu', 'iu', 'ou')
+    __step4_suffixes = ("os", "a", "i", "o", "\xE1",
+                        "\xED", "\xF3")
+
+    def stem(self, word):
+        """
+        Stem a Portuguese word and return the stemmed form.
+
+        :param word: The word that is stemmed.
+        :type word: str or unicode
+        :return: The stemmed form.
+        :rtype: unicode
+
+        """
+        word = word.lower()
+
+        step1_success = False
+        step2_success = False
+
+        word = (word.replace("\xE3", "a~")
+                    .replace("\xF5", "o~"))
+
+        r1, r2 = self._r1r2_standard(word, self.__vowels)
+        rv = self._rv_standard(word, self.__vowels)
+
+        # STEP 1: Standard suffix removal
+        for suffix in self.__step1_suffixes:
+            if word.endswith(suffix):
+                if suffix == "amente" and r1.endswith(suffix):
+                    step1_success = True
+
+                    word = word[:-6]
+                    r2 = r2[:-6]
+                    rv = rv[:-6]
+
+                    if r2.endswith("iv"):
+                        word = word[:-2]
+                        r2 = r2[:-2]
+                        rv = rv[:-2]
+
+                        if r2.endswith("at"):
+                            word = word[:-2]
+                            rv = rv[:-2]
+
+                    elif r2.endswith(("os", "ic", "ad")):
+                        word = word[:-2]
+                        rv = rv[:-2]
+
+                elif (suffix in ("ira", "iras") and rv.endswith(suffix) and
+                      word[-len(suffix)-1:-len(suffix)] == "e"):
+                    step1_success = True
+
+                    word = "".join((word[:-len(suffix)], "ir"))
+                    rv = "".join((rv[:-len(suffix)], "ir"))
+
+                elif r2.endswith(suffix):
+                    step1_success = True
+
+                    if suffix in ("log\xEDa", "log\xEDas"):
+                        word = word[:-2]
+                        rv = rv[:-2]
+
+                    elif suffix in ("uci\xF3n", "uciones"):
+                        word = "".join((word[:-len(suffix)], "u"))
+                        rv = "".join((rv[:-len(suffix)], "u"))
+
+                    elif suffix in ("\xEAncia", "\xEAncias"):
+                        word = "".join((word[:-len(suffix)], "ente"))
+                        rv = "".join((rv[:-len(suffix)], "ente"))
+
+                    elif suffix == "mente":
+                        word = word[:-5]
+                        r2 = r2[:-5]
+                        rv = rv[:-5]
+
+                        if r2.endswith(("ante", "avel", "\xEDvel")):
+                            word = word[:-4]
+                            rv = rv[:-4]
+
+                    elif suffix in ("idade", "idades"):
+                        word = word[:-len(suffix)]
+                        r2 = r2[:-len(suffix)]
+                        rv = rv[:-len(suffix)]
+
+                        if r2.endswith(("ic", "iv")):
+                            word = word[:-2]
+                            rv = rv[:-2]
+
+                        elif r2.endswith("abil"):
+                            word = word[:-4]
+                            rv = rv[:-4]
+
+                    elif suffix in ("iva", "ivo", "ivas", "ivos"):
+                        word = word[:-len(suffix)]
+                        r2 = r2[:-len(suffix)]
+                        rv = rv[:-len(suffix)]
+
+                        if r2.endswith("at"):
+                            word = word[:-2]
+                            rv = rv[:-2]
+                    else:
+                        word = word[:-len(suffix)]
+                        rv = rv[:-len(suffix)]
+                break
+
+        # STEP 2: Verb suffixes
+        if not step1_success:
+            for suffix in self.__step2_suffixes:
+                if rv.endswith(suffix):
+                    step2_success = True
+
+                    word = word[:-len(suffix)]
+                    rv = rv[:-len(suffix)]
+                    break
+
+        # STEP 3
+        if step1_success or step2_success:
+            if rv.endswith("i") and word[-2] == "c":
+                word = word[:-1]
+                rv = rv[:-1]
+
+        ### STEP 4: Residual suffix
+        if not step1_success and not step2_success:
+            for suffix in self.__step4_suffixes:
+                if rv.endswith(suffix):
+                    word = word[:-len(suffix)]
+                    rv = rv[:-len(suffix)]
+                    break
+
+        # STEP 5
+        if rv.endswith(("e", "\xE9", "\xEA")):
+            word = word[:-1]
+            rv = rv[:-1]
+
+            if ((word.endswith("gu") and rv.endswith("u")) or
+                (word.endswith("ci") and rv.endswith("i"))):
+                word = word[:-1]
+
+        elif word.endswith("\xE7"):
+            word = "".join((word[:-1], "c"))
+
+        word = word.replace("a~", "\xE3").replace("o~", "\xF5")
+
+
+        return word
+
+
+
+class RomanianStemmer(_StandardStemmer):
+
+    """
+    The Romanian Snowball stemmer.
+
+    :cvar __vowels: The Romanian vowels.
+    :type __vowels: unicode
+    :cvar __step0_suffixes: Suffixes to be deleted in step 0 of the algorithm.
+    :type __step0_suffixes: tuple
+    :cvar __step1_suffixes: Suffixes to be deleted in step 1 of the algorithm.
+    :type __step1_suffixes: tuple
+    :cvar __step2_suffixes: Suffixes to be deleted in step 2 of the algorithm.
+    :type __step2_suffixes: tuple
+    :cvar __step3_suffixes: Suffixes to be deleted in step 3 of the algorithm.
+    :type __step3_suffixes: tuple
+    :note: A detailed description of the Romanian
+           stemming algorithm can be found under
+           http://snowball.tartarus.org/algorithms/romanian/stemmer.html
+
+    """
+
+    __vowels = "aeiou\u0103\xE2\xEE"
+    __step0_suffixes = ('iilor', 'ului', 'elor', 'iile', 'ilor',
+                        'atei', 'a\u0163ie', 'a\u0163ia', 'aua',
+                        'ele', 'iua', 'iei', 'ile', 'ul', 'ea',
+                        'ii')
+    __step1_suffixes = ('abilitate', 'abilitati', 'abilit\u0103\u0163i',
+                        'ibilitate', 'abilit\u0103i', 'ivitate',
+                        'ivitati', 'ivit\u0103\u0163i', 'icitate',
+                        'icitati', 'icit\u0103\u0163i', 'icatori',
+                        'ivit\u0103i', 'icit\u0103i', 'icator',
+                        'a\u0163iune', 'atoare', '\u0103toare',
+                        'i\u0163iune', 'itoare', 'iciva', 'icive',
+                        'icivi', 'iciv\u0103', 'icala', 'icale',
+                        'icali', 'ical\u0103', 'ativa', 'ative',
+                        'ativi', 'ativ\u0103', 'atori', '\u0103tori',
+                        'itiva', 'itive', 'itivi', 'itiv\u0103',
+                        'itori', 'iciv', 'ical', 'ativ', 'ator',
+                        '\u0103tor', 'itiv', 'itor')
+    __step2_suffixes = ('abila', 'abile', 'abili', 'abil\u0103',
+                        'ibila', 'ibile', 'ibili', 'ibil\u0103',
+                        'atori', 'itate', 'itati', 'it\u0103\u0163i',
+                        'abil', 'ibil', 'oasa', 'oas\u0103', 'oase',
+                        'anta', 'ante', 'anti', 'ant\u0103', 'ator',
+                        'it\u0103i', 'iune', 'iuni', 'isme', 'ista',
+                        'iste', 'isti', 'ist\u0103', 'i\u015Fti',
+                        'ata', 'at\u0103', 'ati', 'ate', 'uta',
+                        'ut\u0103', 'uti', 'ute', 'ita', 'it\u0103',
+                        'iti', 'ite', 'ica', 'ice', 'ici', 'ic\u0103',
+                        'osi', 'o\u015Fi', 'ant', 'iva', 'ive', 'ivi',
+                        'iv\u0103', 'ism', 'ist', 'at', 'ut', 'it',
+                        'ic', 'os', 'iv')
+    __step3_suffixes = ('seser\u0103\u0163i', 'aser\u0103\u0163i',
+                        'iser\u0103\u0163i', '\xE2ser\u0103\u0163i',
+                        'user\u0103\u0163i', 'seser\u0103m',
+                        'aser\u0103m', 'iser\u0103m', '\xE2ser\u0103m',
+                        'user\u0103m', 'ser\u0103\u0163i', 'sese\u015Fi',
+                        'seser\u0103', 'easc\u0103', 'ar\u0103\u0163i',
+                        'ur\u0103\u0163i', 'ir\u0103\u0163i',
+                        '\xE2r\u0103\u0163i', 'ase\u015Fi',
+                        'aser\u0103', 'ise\u015Fi', 'iser\u0103',
+                        '\xe2se\u015Fi', '\xE2ser\u0103',
+                        'use\u015Fi', 'user\u0103', 'ser\u0103m',
+                        'sesem', 'indu', '\xE2ndu', 'eaz\u0103',
+                        'e\u015Fti', 'e\u015Fte', '\u0103\u015Fti',
+                        '\u0103\u015Fte', 'ea\u0163i', 'ia\u0163i',
+                        'ar\u0103m', 'ur\u0103m', 'ir\u0103m',
+                        '\xE2r\u0103m', 'asem', 'isem',
+                        '\xE2sem', 'usem', 'se\u015Fi', 'ser\u0103',
+                        'sese', 'are', 'ere', 'ire', '\xE2re',
+                        'ind', '\xE2nd', 'eze', 'ezi', 'esc',
+                        '\u0103sc', 'eam', 'eai', 'eau', 'iam',
+                        'iai', 'iau', 'a\u015Fi', 'ar\u0103',
+                        'u\u015Fi', 'ur\u0103', 'i\u015Fi', 'ir\u0103',
+                        '\xE2\u015Fi', '\xe2r\u0103', 'ase',
+                        'ise', '\xE2se', 'use', 'a\u0163i',
+                        'e\u0163i', 'i\u0163i', '\xe2\u0163i', 'sei',
+                        'ez', 'am', 'ai', 'au', 'ea', 'ia', 'ui',
+                        '\xE2i', '\u0103m', 'em', 'im', '\xE2m',
+                        'se')
+
+    def stem(self, word):
+        """
+        Stem a Romanian word and return the stemmed form.
+
+        :param word: The word that is stemmed.
+        :type word: str or unicode
+        :return: The stemmed form.
+        :rtype: unicode
+
+        """
+        word = word.lower()
+
+        step1_success = False
+        step2_success = False
+
+        for i in range(1, len(word)-1):
+            if word[i-1] in self.__vowels and word[i+1] in self.__vowels:
+                if word[i] == "u":
+                    word = "".join((word[:i], "U", word[i+1:]))
+
+                elif word[i] == "i":
+                    word = "".join((word[:i], "I", word[i+1:]))
+
+        r1, r2 = self._r1r2_standard(word, self.__vowels)
+        rv = self._rv_standard(word, self.__vowels)
+
+        # STEP 0: Removal of plurals and other simplifications
+        for suffix in self.__step0_suffixes:
+            if word.endswith(suffix):
+                if suffix in r1:
+                    if suffix in ("ul", "ului"):
+                        word = word[:-len(suffix)]
+
+                        if suffix in rv:
+                            rv = rv[:-len(suffix)]
+                        else:
+                            rv = ""
+
+                    elif (suffix == "aua" or suffix == "atei" or
+                          (suffix == "ile" and word[-5:-3] != "ab")):
+                        word = word[:-2]
+
+                    elif suffix in ("ea", "ele", "elor"):
+                        word = "".join((word[:-len(suffix)], "e"))
+
+                        if suffix in rv:
+                            rv = "".join((rv[:-len(suffix)], "e"))
+                        else:
+                            rv = ""
+
+                    elif suffix in ("ii", "iua", "iei",
+                                    "iile", "iilor", "ilor"):
+                        word = "".join((word[:-len(suffix)], "i"))
+
+                        if suffix in rv:
+                            rv = "".join((rv[:-len(suffix)], "i"))
+                        else:
+                            rv = ""
+
+                    elif suffix in ("a\u0163ie", "a\u0163ia"):
+                        word = word[:-1]
+                break
+
+        # STEP 1: Reduction of combining suffixes
+        while True:
+
+            replacement_done = False
+
+            for suffix in self.__step1_suffixes:
+                if word.endswith(suffix):
+                    if suffix in r1:
+                        step1_success = True
+                        replacement_done = True
+
+                        if suffix in ("abilitate", "abilitati",
+                                      "abilit\u0103i",
+                                      "abilit\u0103\u0163i"):
+                            word = "".join((word[:-len(suffix)], "abil"))
+
+                        elif suffix == "ibilitate":
+                            word = word[:-5]
+
+                        elif suffix in ("ivitate", "ivitati",
+                                        "ivit\u0103i",
+                                        "ivit\u0103\u0163i"):
+                            word = "".join((word[:-len(suffix)], "iv"))
+
+                        elif suffix in ("icitate", "icitati", "icit\u0103i",
+                                        "icit\u0103\u0163i", "icator",
+                                        "icatori", "iciv", "iciva",
+                                        "icive", "icivi", "iciv\u0103",
+                                        "ical", "icala", "icale", "icali",
+                                        "ical\u0103"):
+                            word = "".join((word[:-len(suffix)], "ic"))
+
+                        elif suffix in ("ativ", "ativa", "ative", "ativi",
+                                        "ativ\u0103", "a\u0163iune",
+                                        "atoare", "ator", "atori",
+                                        "\u0103toare",
+                                        "\u0103tor", "\u0103tori"):
+                            word = "".join((word[:-len(suffix)], "at"))
+
+                            if suffix in r2:
+                                r2 = "".join((r2[:-len(suffix)], "at"))
+
+                        elif suffix in ("itiv", "itiva", "itive", "itivi",
+                                        "itiv\u0103", "i\u0163iune",
+                                        "itoare", "itor", "itori"):
+                            word = "".join((word[:-len(suffix)], "it"))
+
+                            if suffix in r2:
+                                r2 = "".join((r2[:-len(suffix)], "it"))
+                    else:
+                        step1_success = False
+                    break
+
+            if not replacement_done:
+                break
+
+        # STEP 2: Removal of standard suffixes
+        for suffix in self.__step2_suffixes:
+            if word.endswith(suffix):
+                if suffix in r2:
+                    step2_success = True
+
+                    if suffix in ("iune", "iuni"):
+                        if word[-5] == "\u0163":
+                            word = "".join((word[:-5], "t"))
+
+                    elif suffix in ("ism", "isme", "ist", "ista", "iste",
+                                    "isti", "ist\u0103", "i\u015Fti"):
+                        word = "".join((word[:-len(suffix)], "ist"))
+
+                    else:
+                        word = word[:-len(suffix)]
+                break
+
+        # STEP 3: Removal of verb suffixes
+        if not step1_success and not step2_success:
+            for suffix in self.__step3_suffixes:
+                if word.endswith(suffix):
+                    if suffix in rv:
+                        if suffix in ('seser\u0103\u0163i', 'seser\u0103m',
+                                      'ser\u0103\u0163i', 'sese\u015Fi',
+                                      'seser\u0103', 'ser\u0103m', 'sesem',
+                                      'se\u015Fi', 'ser\u0103', 'sese',
+                                      'a\u0163i', 'e\u0163i', 'i\u0163i',
+                                      '\xE2\u0163i', 'sei', '\u0103m',
+                                      'em', 'im', '\xE2m', 'se'):
+                            word = word[:-len(suffix)]
+                            rv = rv[:-len(suffix)]
+                        else:
+                            if (not rv.startswith(suffix) and
+                                rv[rv.index(suffix)-1] not in
+                                "aeio\u0103\xE2\xEE"):
+                                word = word[:-len(suffix)]
+                        break
+
+        # STEP 4: Removal of final vowel
+        for suffix in ("ie", "a", "e", "i", "\u0103"):
+            if word.endswith(suffix):
+                if suffix in rv:
+                    word = word[:-len(suffix)]
+                break
+
+        word = word.replace("I", "i").replace("U", "u")
+
+
+        return word
+
+
+
+class RussianStemmer(_LanguageSpecificStemmer):
+
+    """
+    The Russian Snowball stemmer.
+
+    :cvar __perfective_gerund_suffixes: Suffixes to be deleted.
+    :type __perfective_gerund_suffixes: tuple
+    :cvar __adjectival_suffixes: Suffixes to be deleted.
+    :type __adjectival_suffixes: tuple
+    :cvar __reflexive_suffixes: Suffixes to be deleted.
+    :type __reflexive_suffixes: tuple
+    :cvar __verb_suffixes: Suffixes to be deleted.
+    :type __verb_suffixes: tuple
+    :cvar __noun_suffixes: Suffixes to be deleted.
+    :type __noun_suffixes: tuple
+    :cvar __superlative_suffixes: Suffixes to be deleted.
+    :type __superlative_suffixes: tuple
+    :cvar __derivational_suffixes: Suffixes to be deleted.
+    :type __derivational_suffixes: tuple
+    :note: A detailed description of the Russian
+           stemming algorithm can be found under
+           http://snowball.tartarus.org/algorithms/russian/stemmer.html
+
+    """
+
+    __perfective_gerund_suffixes = ("ivshis'", "yvshis'", "vshis'",
+                                      "ivshi", "yvshi", "vshi", "iv",
+                                      "yv", "v")
+    __adjectival_suffixes = ('ui^ushchi^ui^u', 'ui^ushchi^ai^a',
+                               'ui^ushchimi', 'ui^ushchymi', 'ui^ushchego',
+                               'ui^ushchogo', 'ui^ushchemu', 'ui^ushchomu',
+                               'ui^ushchikh', 'ui^ushchykh',
+                               'ui^ushchui^u', 'ui^ushchaia',
+                               'ui^ushchoi^u', 'ui^ushchei^u',
+                               'i^ushchi^ui^u', 'i^ushchi^ai^a',
+                               'ui^ushchee', 'ui^ushchie',
+                               'ui^ushchye', 'ui^ushchoe', 'ui^ushchei`',
+                               'ui^ushchii`', 'ui^ushchyi`',
+                               'ui^ushchoi`', 'ui^ushchem', 'ui^ushchim',
+                               'ui^ushchym', 'ui^ushchom', 'i^ushchimi',
+                               'i^ushchymi', 'i^ushchego', 'i^ushchogo',
+                               'i^ushchemu', 'i^ushchomu', 'i^ushchikh',
+                               'i^ushchykh', 'i^ushchui^u', 'i^ushchai^a',
+                               'i^ushchoi^u', 'i^ushchei^u', 'i^ushchee',
+                               'i^ushchie', 'i^ushchye', 'i^ushchoe',
+                               'i^ushchei`', 'i^ushchii`',
+                               'i^ushchyi`', 'i^ushchoi`', 'i^ushchem',
+                               'i^ushchim', 'i^ushchym', 'i^ushchom',
+                               'shchi^ui^u', 'shchi^ai^a', 'ivshi^ui^u',
+                               'ivshi^ai^a', 'yvshi^ui^u', 'yvshi^ai^a',
+                               'shchimi', 'shchymi', 'shchego', 'shchogo',
+                               'shchemu', 'shchomu', 'shchikh', 'shchykh',
+                               'shchui^u', 'shchai^a', 'shchoi^u',
+                               'shchei^u', 'ivshimi', 'ivshymi',
+                               'ivshego', 'ivshogo', 'ivshemu', 'ivshomu',
+                               'ivshikh', 'ivshykh', 'ivshui^u',
+                               'ivshai^a', 'ivshoi^u', 'ivshei^u',
+                               'yvshimi', 'yvshymi', 'yvshego', 'yvshogo',
+                               'yvshemu', 'yvshomu', 'yvshikh', 'yvshykh',
+                               'yvshui^u', 'yvshai^a', 'yvshoi^u',
+                               'yvshei^u', 'vshi^ui^u', 'vshi^ai^a',
+                               'shchee', 'shchie', 'shchye', 'shchoe',
+                               'shchei`', 'shchii`', 'shchyi`', 'shchoi`',
+                               'shchem', 'shchim', 'shchym', 'shchom',
+                               'ivshee', 'ivshie', 'ivshye', 'ivshoe',
+                               'ivshei`', 'ivshii`', 'ivshyi`',
+                               'ivshoi`', 'ivshem', 'ivshim', 'ivshym',
+                               'ivshom', 'yvshee', 'yvshie', 'yvshye',
+                               'yvshoe', 'yvshei`', 'yvshii`',
+                               'yvshyi`', 'yvshoi`', 'yvshem',
+                               'yvshim', 'yvshym', 'yvshom', 'vshimi',
+                               'vshymi', 'vshego', 'vshogo', 'vshemu',
+                               'vshomu', 'vshikh', 'vshykh', 'vshui^u',
+                               'vshai^a', 'vshoi^u', 'vshei^u',
+                               'emi^ui^u', 'emi^ai^a', 'nni^ui^u',
+                               'nni^ai^a', 'vshee',
+                               'vshie', 'vshye', 'vshoe', 'vshei`',
+                               'vshii`', 'vshyi`', 'vshoi`',
+                               'vshem', 'vshim', 'vshym', 'vshom',
+                               'emimi', 'emymi', 'emego', 'emogo',
+                               'ememu', 'emomu', 'emikh', 'emykh',
+                               'emui^u', 'emai^a', 'emoi^u', 'emei^u',
+                               'nnimi', 'nnymi', 'nnego', 'nnogo',
+                               'nnemu', 'nnomu', 'nnikh', 'nnykh',
+                               'nnui^u', 'nnai^a', 'nnoi^u', 'nnei^u',
+                               'emee', 'emie', 'emye', 'emoe',
+                               'emei`', 'emii`', 'emyi`',
+                               'emoi`', 'emem', 'emim', 'emym',
+                               'emom', 'nnee', 'nnie', 'nnye', 'nnoe',
+                               'nnei`', 'nnii`', 'nnyi`',
+                               'nnoi`', 'nnem', 'nnim', 'nnym',
+                               'nnom', 'i^ui^u', 'i^ai^a', 'imi', 'ymi',
+                               'ego', 'ogo', 'emu', 'omu', 'ikh',
+                               'ykh', 'ui^u', 'ai^a', 'oi^u', 'ei^u',
+                               'ee', 'ie', 'ye', 'oe', 'ei`',
+                               'ii`', 'yi`', 'oi`', 'em',
+                               'im', 'ym', 'om')
+    __reflexive_suffixes = ("si^a", "s'")
+    __verb_suffixes = ("esh'", 'ei`te', 'ui`te', 'ui^ut',
+                         "ish'", 'ete', 'i`te', 'i^ut', 'nno',
+                         'ila', 'yla', 'ena', 'ite', 'ili', 'yli',
+                         'ilo', 'ylo', 'eno', 'i^at', 'uet', 'eny',
+                         "it'", "yt'", 'ui^u', 'la', 'na', 'li',
+                         'em', 'lo', 'no', 'et', 'ny', "t'",
+                         'ei`', 'ui`', 'il', 'yl', 'im',
+                         'ym', 'en', 'it', 'yt', 'i^u', 'i`',
+                         'l', 'n')
+    __noun_suffixes = ('ii^ami', 'ii^akh', 'i^ami', 'ii^am', 'i^akh',
+                         'ami', 'iei`', 'i^am', 'iem', 'akh',
+                         'ii^u', "'i^u", 'ii^a', "'i^a", 'ev', 'ov',
+                         'ie', "'e", 'ei', 'ii', 'ei`',
+                         'oi`', 'ii`', 'em', 'am', 'om',
+                         'i^u', 'i^a', 'a', 'e', 'i', 'i`',
+                         'o', 'u', 'y', "'")
+    __superlative_suffixes = ("ei`she", "ei`sh")
+    __derivational_suffixes = ("ost'", "ost")
+
+    def stem(self, word):
+        """
+        Stem a Russian word and return the stemmed form.
+
+        :param word: The word that is stemmed.
+        :type word: str or unicode
+        :return: The stemmed form.
+        :rtype: unicode
+
+        """
+        chr_exceeded = False
+        for i in range(len(word)):
+            if ord(word[i]) > 255:
+                chr_exceeded = True
+                break
+
+        if chr_exceeded:
+            word = self.__cyrillic_to_roman(word)
+
+        step1_success = False
+        adjectival_removed = False
+        verb_removed = False
+        undouble_success = False
+        superlative_removed = False
+
+        rv, r2 = self.__regions_russian(word)
+
+        # Step 1
+        for suffix in self.__perfective_gerund_suffixes:
+            if rv.endswith(suffix):
+                if suffix in ("v", "vshi", "vshis'"):
+                    if (rv[-len(suffix)-3:-len(suffix)] == "i^a" or
+                        rv[-len(suffix)-1:-len(suffix)] == "a"):
+                        word = word[:-len(suffix)]
+                        r2 = r2[:-len(suffix)]
+                        rv = rv[:-len(suffix)]
+                        step1_success = True
+                        break
+                else:
+                    word = word[:-len(suffix)]
+                    r2 = r2[:-len(suffix)]
+                    rv = rv[:-len(suffix)]
+                    step1_success = True
+                    break
+
+        if not step1_success:
+            for suffix in self.__reflexive_suffixes:
+                if rv.endswith(suffix):
+                    word = word[:-len(suffix)]
+                    r2 = r2[:-len(suffix)]
+                    rv = rv[:-len(suffix)]
+                    break
+
+            for suffix in self.__adjectival_suffixes:
+                if rv.endswith(suffix):
+                    if suffix in ('i^ushchi^ui^u', 'i^ushchi^ai^a',
+                              'i^ushchui^u', 'i^ushchai^a', 'i^ushchoi^u',
+                              'i^ushchei^u', 'i^ushchimi', 'i^ushchymi',
+                              'i^ushchego', 'i^ushchogo', 'i^ushchemu',
+                              'i^ushchomu', 'i^ushchikh', 'i^ushchykh',
+                              'shchi^ui^u', 'shchi^ai^a', 'i^ushchee',
+                              'i^ushchie', 'i^ushchye', 'i^ushchoe',
+                              'i^ushchei`', 'i^ushchii`', 'i^ushchyi`',
+                              'i^ushchoi`', 'i^ushchem', 'i^ushchim',
+                              'i^ushchym', 'i^ushchom', 'vshi^ui^u',
+                              'vshi^ai^a', 'shchui^u', 'shchai^a',
+                              'shchoi^u', 'shchei^u', 'emi^ui^u',
+                              'emi^ai^a', 'nni^ui^u', 'nni^ai^a',
+                              'shchimi', 'shchymi', 'shchego', 'shchogo',
+                              'shchemu', 'shchomu', 'shchikh', 'shchykh',
+                              'vshui^u', 'vshai^a', 'vshoi^u', 'vshei^u',
+                              'shchee', 'shchie', 'shchye', 'shchoe',
+                              'shchei`', 'shchii`', 'shchyi`', 'shchoi`',
+                              'shchem', 'shchim', 'shchym', 'shchom',
+                              'vshimi', 'vshymi', 'vshego', 'vshogo',
+                              'vshemu', 'vshomu', 'vshikh', 'vshykh',
+                              'emui^u', 'emai^a', 'emoi^u', 'emei^u',
+                              'nnui^u', 'nnai^a', 'nnoi^u', 'nnei^u',
+                              'vshee', 'vshie', 'vshye', 'vshoe',
+                              'vshei`', 'vshii`', 'vshyi`', 'vshoi`',
+                              'vshem', 'vshim', 'vshym', 'vshom',
+                              'emimi', 'emymi', 'emego', 'emogo',
+                              'ememu', 'emomu', 'emikh', 'emykh',
+                              'nnimi', 'nnymi', 'nnego', 'nnogo',
+                              'nnemu', 'nnomu', 'nnikh', 'nnykh',
+                              'emee', 'emie', 'emye', 'emoe', 'emei`',
+                              'emii`', 'emyi`', 'emoi`', 'emem', 'emim',
+                              'emym', 'emom', 'nnee', 'nnie', 'nnye',
+                              'nnoe', 'nnei`', 'nnii`', 'nnyi`', 'nnoi`',
+                              'nnem', 'nnim', 'nnym', 'nnom'):
+                        if (rv[-len(suffix)-3:-len(suffix)] == "i^a" or
+                            rv[-len(suffix)-1:-len(suffix)] == "a"):
+                            word = word[:-len(suffix)]
+                            r2 = r2[:-len(suffix)]
+                            rv = rv[:-len(suffix)]
+                            adjectival_removed = True
+                            break
+                    else:
+                        word = word[:-len(suffix)]
+                        r2 = r2[:-len(suffix)]
+                        rv = rv[:-len(suffix)]
+                        adjectival_removed = True
+                        break
+
+            if not adjectival_removed:
+                for suffix in self.__verb_suffixes:
+                    if rv.endswith(suffix):
+                        if suffix in ("la", "na", "ete", "i`te", "li",
+                                      "i`", "l", "em", "n", "lo", "no",
+                                      "et", "i^ut", "ny", "t'", "esh'",
+                                      "nno"):
+                            if (rv[-len(suffix)-3:-len(suffix)] == "i^a" or
+                                rv[-len(suffix)-1:-len(suffix)] == "a"):
+                                word = word[:-len(suffix)]
+                                r2 = r2[:-len(suffix)]
+                                rv = rv[:-len(suffix)]
+                                verb_removed = True
+                                break
+                        else:
+                            word = word[:-len(suffix)]
+                            r2 = r2[:-len(suffix)]
+                            rv = rv[:-len(suffix)]
+                            verb_removed = True
+                            break
+
+            if not adjectival_removed and not verb_removed:
+                for suffix in self.__noun_suffixes:
+                    if rv.endswith(suffix):
+                        word = word[:-len(suffix)]
+                        r2 = r2[:-len(suffix)]
+                        rv = rv[:-len(suffix)]
+                        break
+
+        # Step 2
+        if rv.endswith("i"):
+            word = word[:-1]
+            r2 = r2[:-1]
+
+        # Step 3
+        for suffix in self.__derivational_suffixes:
+            if r2.endswith(suffix):
+                word = word[:-len(suffix)]
+                break
+
+        # Step 4
+        if word.endswith("nn"):
+            word = word[:-1]
+            undouble_success = True
+
+        if not undouble_success:
+            for suffix in self.__superlative_suffixes:
+                if word.endswith(suffix):
+                    word = word[:-len(suffix)]
+                    superlative_removed = True
+                    break
+            if word.endswith("nn"):
+                word = word[:-1]
+
+        if not undouble_success and not superlative_removed:
+            if word.endswith("'"):
+                word = word[:-1]
+
+        if chr_exceeded:
+            word = self.__roman_to_cyrillic(word)
+
+
+        return word
+
+
+
+    def __regions_russian(self, word):
+        """
+        Return the regions RV and R2 which are used by the Russian stemmer.
+
+        In any word, RV is the region after the first vowel,
+        or the end of the word if it contains no vowel.
+
+        R2 is the region after the first non-vowel following
+        a vowel in R1, or the end of the word if there is no such non-vowel.
+
+        R1 is the region after the first non-vowel following a vowel,
+        or the end of the word if there is no such non-vowel.
+
+        :param word: The Russian word whose regions RV and R2 are determined.
+        :type word: str or unicode
+        :return: the regions RV and R2 for the respective Russian word.
+        :rtype: tuple
+        :note: This helper method is invoked by the stem method of the subclass
+               RussianStemmer. It is not to be invoked directly!
+
+        """
+        r1 = ""
+        r2 = ""
+        rv = ""
+
+        vowels = ("A", "U", "E", "a", "e", "i", "o", "u", "y")
+        word = (word.replace("i^a", "A")
+                    .replace("i^u", "U")
+                    .replace("e`", "E"))
+
+        for i in range(1, len(word)):
+            if word[i] not in vowels and word[i-1] in vowels:
+                r1 = word[i+1:]
+                break
+
+        for i in range(1, len(r1)):
+            if r1[i] not in vowels and r1[i-1] in vowels:
+                r2 = r1[i+1:]
+                break
+
+        for i in range(len(word)):
+            if word[i] in vowels:
+                rv = word[i+1:]
+                break
+
+        r2 = (r2.replace("A", "i^a")
+                .replace("U", "i^u")
+                .replace("E", "e`"))
+        rv = (rv.replace("A", "i^a")
+              .replace("U", "i^u")
+              .replace("E", "e`"))
+
+
+        return (rv, r2)
+
+
+
+    def __cyrillic_to_roman(self, word):
+        """
+        Transliterate a Russian word into the Roman alphabet.
+
+        A Russian word whose letters consist of the Cyrillic
+        alphabet are transliterated into the Roman alphabet
+        in order to ease the forthcoming stemming process.
+
+        :param word: The word that is transliterated.
+        :type word: unicode
+        :return: the transliterated word.
+        :rtype: unicode
+        :note: This helper method is invoked by the stem method of the subclass
+               RussianStemmer. It is not to be invoked directly!
+
+        """
+        word = (word.replace("\u0410", "a").replace("\u0430", "a")
+                    .replace("\u0411", "b").replace("\u0431", "b")
+                    .replace("\u0412", "v").replace("\u0432", "v")
+                    .replace("\u0413", "g").replace("\u0433", "g")
+                    .replace("\u0414", "d").replace("\u0434", "d")
+                    .replace("\u0415", "e").replace("\u0435", "e")
+                    .replace("\u0401", "e").replace("\u0451", "e")
+                    .replace("\u0416", "zh").replace("\u0436", "zh")
+                    .replace("\u0417", "z").replace("\u0437", "z")
+                    .replace("\u0418", "i").replace("\u0438", "i")
+                    .replace("\u0419", "i`").replace("\u0439", "i`")
+                    .replace("\u041A", "k").replace("\u043A", "k")
+                    .replace("\u041B", "l").replace("\u043B", "l")
+                    .replace("\u041C", "m").replace("\u043C", "m")
+                    .replace("\u041D", "n").replace("\u043D", "n")
+                    .replace("\u041E", "o").replace("\u043E", "o")
+                    .replace("\u041F", "p").replace("\u043F", "p")
+                    .replace("\u0420", "r").replace("\u0440", "r")
+                    .replace("\u0421", "s").replace("\u0441", "s")
+                    .replace("\u0422", "t").replace("\u0442", "t")
+                    .replace("\u0423", "u").replace("\u0443", "u")
+                    .replace("\u0424", "f").replace("\u0444", "f")
+                    .replace("\u0425", "kh").replace("\u0445", "kh")
+                    .replace("\u0426", "t^s").replace("\u0446", "t^s")
+                    .replace("\u0427", "ch").replace("\u0447", "ch")
+                    .replace("\u0428", "sh").replace("\u0448", "sh")
+                    .replace("\u0429", "shch").replace("\u0449", "shch")
+                    .replace("\u042A", "''").replace("\u044A", "''")
+                    .replace("\u042B", "y").replace("\u044B", "y")
+                    .replace("\u042C", "'").replace("\u044C", "'")
+                    .replace("\u042D", "e`").replace("\u044D", "e`")
+                    .replace("\u042E", "i^u").replace("\u044E", "i^u")
+                    .replace("\u042F", "i^a").replace("\u044F", "i^a"))
+
+
+        return word
+
+
+
+    def __roman_to_cyrillic(self, word):
+        """
+        Transliterate a Russian word back into the Cyrillic alphabet.
+
+        A Russian word formerly transliterated into the Roman alphabet
+        in order to ease the stemming process, is transliterated back
+        into the Cyrillic alphabet, its original form.
+
+        :param word: The word that is transliterated.
+        :type word: str or unicode
+        :return: word, the transliterated word.
+        :rtype: unicode
+        :note: This helper method is invoked by the stem method of the subclass
+               RussianStemmer. It is not to be invoked directly!
+
+        """
+        word = (word.replace("i^u", "\u044E").replace("i^a", "\u044F")
+                    .replace("shch", "\u0449").replace("kh", "\u0445")
+                    .replace("t^s", "\u0446").replace("ch", "\u0447")
+                    .replace("e`", "\u044D").replace("i`", "\u0439")
+                    .replace("sh", "\u0448").replace("k", "\u043A")
+                    .replace("e", "\u0435").replace("zh", "\u0436")
+                    .replace("a", "\u0430").replace("b", "\u0431")
+                    .replace("v", "\u0432").replace("g", "\u0433")
+                    .replace("d", "\u0434").replace("e", "\u0435")
+                    .replace("z", "\u0437").replace("i", "\u0438")
+                    .replace("l", "\u043B").replace("m", "\u043C")
+                    .replace("n", "\u043D").replace("o", "\u043E")
+                    .replace("p", "\u043F").replace("r", "\u0440")
+                    .replace("s", "\u0441").replace("t", "\u0442")
+                    .replace("u", "\u0443").replace("f", "\u0444")
+                    .replace("''", "\u044A").replace("y", "\u044B")
+                    .replace("'", "\u044C"))
+
+
+        return word
+
+
+
+class SpanishStemmer(_StandardStemmer):
+
+    """
+    The Spanish Snowball stemmer.
+
+    :cvar __vowels: The Spanish vowels.
+    :type __vowels: unicode
+    :cvar __step0_suffixes: Suffixes to be deleted in step 0 of the algorithm.
+    :type __step0_suffixes: tuple
+    :cvar __step1_suffixes: Suffixes to be deleted in step 1 of the algorithm.
+    :type __step1_suffixes: tuple
+    :cvar __step2a_suffixes: Suffixes to be deleted in step 2a of the algorithm.
+    :type __step2a_suffixes: tuple
+    :cvar __step2b_suffixes: Suffixes to be deleted in step 2b of the algorithm.
+    :type __step2b_suffixes: tuple
+    :cvar __step3_suffixes: Suffixes to be deleted in step 3 of the algorithm.
+    :type __step3_suffixes: tuple
+    :note: A detailed description of the Spanish
+           stemming algorithm can be found under
+           http://snowball.tartarus.org/algorithms/spanish/stemmer.html
+
+    """
+
+    __vowels = "aeiou\xE1\xE9\xED\xF3\xFA\xFC"
+    __step0_suffixes = ("selas", "selos", "sela", "selo", "las",
+                        "les", "los", "nos", "me", "se", "la", "le",
+                        "lo")
+    __step1_suffixes = ('amientos', 'imientos', 'amiento', 'imiento',
+                        'aciones', 'uciones', 'adoras', 'adores',
+                        'ancias', 'log\xEDas', 'encias', 'amente',
+                        'idades', 'anzas', 'ismos', 'ables', 'ibles',
+                        'istas', 'adora', 'aci\xF3n', 'antes',
+                        'ancia', 'log\xEDa', 'uci\xf3n', 'encia',
+                        'mente', 'anza', 'icos', 'icas', 'ismo',
+                        'able', 'ible', 'ista', 'osos', 'osas',
+                        'ador', 'ante', 'idad', 'ivas', 'ivos',
+                        'ico',
+                        'ica', 'oso', 'osa', 'iva', 'ivo')
+    __step2a_suffixes = ('yeron', 'yendo', 'yamos', 'yais', 'yan',
+                         'yen', 'yas', 'yes', 'ya', 'ye', 'yo',
+                         'y\xF3')
+    __step2b_suffixes = ('ar\xEDamos', 'er\xEDamos', 'ir\xEDamos',
+                         'i\xE9ramos', 'i\xE9semos', 'ar\xEDais',
+                         'aremos', 'er\xEDais', 'eremos',
+                         'ir\xEDais', 'iremos', 'ierais', 'ieseis',
+                         'asteis', 'isteis', '\xE1bamos',
+                         '\xE1ramos', '\xE1semos', 'ar\xEDan',
+                         'ar\xEDas', 'ar\xE9is', 'er\xEDan',
+                         'er\xEDas', 'er\xE9is', 'ir\xEDan',
+                         'ir\xEDas', 'ir\xE9is',
+                         'ieran', 'iesen', 'ieron', 'iendo', 'ieras',
+                         'ieses', 'abais', 'arais', 'aseis',
+                         '\xE9amos', 'ar\xE1n', 'ar\xE1s',
+                         'ar\xEDa', 'er\xE1n', 'er\xE1s',
+                         'er\xEDa', 'ir\xE1n', 'ir\xE1s',
+                         'ir\xEDa', 'iera', 'iese', 'aste', 'iste',
+                         'aban', 'aran', 'asen', 'aron', 'ando',
+                         'abas', 'adas', 'idas', 'aras', 'ases',
+                         '\xEDais', 'ados', 'idos', 'amos', 'imos',
+                         'emos', 'ar\xE1', 'ar\xE9', 'er\xE1',
+                         'er\xE9', 'ir\xE1', 'ir\xE9', 'aba',
+                         'ada', 'ida', 'ara', 'ase', '\xEDan',
+                         'ado', 'ido', '\xEDas', '\xE1is',
+                         '\xE9is', '\xEDa', 'ad', 'ed', 'id',
+                         'an', 'i\xF3', 'ar', 'er', 'ir', 'as',
+                         '\xEDs', 'en', 'es')
+    __step3_suffixes = ("os", "a", "e", "o", "\xE1",
+                        "\xE9", "\xED", "\xF3")
+
+    def stem(self, word):
+        """
+        Stem a Spanish word and return the stemmed form.
+
+        :param word: The word that is stemmed.
+        :type word: str or unicode
+        :return: The stemmed form.
+        :rtype: unicode
+
+        """
+        word = word.lower()
+
+        step1_success = False
+
+        r1, r2 = self._r1r2_standard(word, self.__vowels)
+        rv = self._rv_standard(word, self.__vowels)
+
+        # STEP 0: Attached pronoun
+        for suffix in self.__step0_suffixes:
+            if word.endswith(suffix):
+                if rv.endswith(suffix):
+                    if rv[:-len(suffix)].endswith(("i\xE9ndo",
+                                                   "\xE1ndo",
+                                                   "\xE1r", "\xE9r",
+                                                   "\xEDr")):
+                        word = (word[:-len(suffix)].replace("\xE1", "a")
+                                                   .replace("\xE9", "e")
+                                                   .replace("\xED", "i"))
+                        r1 = (r1[:-len(suffix)].replace("\xE1", "a")
+                                               .replace("\xE9", "e")
+                                               .replace("\xED", "i"))
+                        r2 = (r2[:-len(suffix)].replace("\xE1", "a")
+                                               .replace("\xE9", "e")
+                                               .replace("\xED", "i"))
+                        rv = (rv[:-len(suffix)].replace("\xE1", "a")
+                                               .replace("\xE9", "e")
+                                               .replace("\xED", "i"))
+
+                    elif rv[:-len(suffix)].endswith(("ando", "iendo",
+                                                     "ar", "er", "ir")):
+                        word = word[:-len(suffix)]
+                        r1 = r1[:-len(suffix)]
+                        r2 = r2[:-len(suffix)]
+                        rv = rv[:-len(suffix)]
+
+                    elif (rv[:-len(suffix)].endswith("yendo") and
+                          word[:-len(suffix)].endswith("uyendo")):
+                        word = word[:-len(suffix)]
+                        r1 = r1[:-len(suffix)]
+                        r2 = r2[:-len(suffix)]
+                        rv = rv[:-len(suffix)]
+                break
+
+        # STEP 1: Standard suffix removal
+        for suffix in self.__step1_suffixes:
+            if word.endswith(suffix):
+                if suffix == "amente" and r1.endswith(suffix):
+                    step1_success = True
+                    word = word[:-6]
+                    r2 = r2[:-6]
+                    rv = rv[:-6]
+
+                    if r2.endswith("iv"):
+                        word = word[:-2]
+                        r2 = r2[:-2]
+                        rv = rv[:-2]
+
+                        if r2.endswith("at"):
+                            word = word[:-2]
+                            rv = rv[:-2]
+
+                    elif r2.endswith(("os", "ic", "ad")):
+                        word = word[:-2]
+                        rv = rv[:-2]
+
+                elif r2.endswith(suffix):
+                    step1_success = True
+                    if suffix in ("adora", "ador", "aci\xF3n", "adoras",
+                                  "adores", "aciones", "ante", "antes",
+                                  "ancia", "ancias"):
+                        word = word[:-len(suffix)]
+                        r2 = r2[:-len(suffix)]
+                        rv = rv[:-len(suffix)]
+
+                        if r2.endswith("ic"):
+                            word = word[:-2]
+                            rv = rv[:-2]
+
+                    elif suffix in ("log\xEDa", "log\xEDas"):
+                        word = word.replace(suffix, "log")
+                        rv = rv.replace(suffix, "log")
+
+                    elif suffix in ("uci\xF3n", "uciones"):
+                        word = word.replace(suffix, "u")
+                        rv = rv.replace(suffix, "u")
+
+                    elif suffix in ("encia", "encias"):
+                        word = word.replace(suffix, "ente")
+                        rv = rv.replace(suffix, "ente")
+
+                    elif suffix == "mente":
+                        word = word[:-5]
+                        r2 = r2[:-5]
+                        rv = rv[:-5]
+
+                        if r2.endswith(("ante", "able", "ible")):
+                            word = word[:-4]
+                            rv = rv[:-4]
+
+                    elif suffix in ("idad", "idades"):
+                        word = word[:-len(suffix)]
+                        r2 = r2[:-len(suffix)]
+                        rv = rv[:-len(suffix)]
+
+                        for pre_suff in ("abil", "ic", "iv"):
+                            if r2.endswith(pre_suff):
+                                word = word[:-len(pre_suff)]
+                                rv = rv[:-len(pre_suff)]
+
+                    elif suffix in ("ivo", "iva", "ivos", "ivas"):
+                        word = word[:-len(suffix)]
+                        r2 = r2[:-len(suffix)]
+                        rv = rv[:-len(suffix)]
+                        if r2.endswith("at"):
+                            word = word[:-2]
+                            rv = rv[:-2]
+                    else:
+                        word = word[:-len(suffix)]
+                        rv = rv[:-len(suffix)]
+                break
+
+        # STEP 2a: Verb suffixes beginning 'y'
+        if not step1_success:
+            for suffix in self.__step2a_suffixes:
+                if (rv.endswith(suffix) and
+                    word[-len(suffix)-1:-len(suffix)] == "u"):
+                    word = word[:-len(suffix)]
+                    rv = rv[:-len(suffix)]
+                    break
+
+        # STEP 2b: Other verb suffixes
+            for suffix in self.__step2b_suffixes:
+                if rv.endswith(suffix):
+                    if suffix in ("en", "es", "\xE9is", "emos"):
+                        word = word[:-len(suffix)]
+                        rv = rv[:-len(suffix)]
+
+                        if word.endswith("gu"):
+                            word = word[:-1]
+
+                        if rv.endswith("gu"):
+                            rv = rv[:-1]
+                    else:
+                        word = word[:-len(suffix)]
+                        rv = rv[:-len(suffix)]
+                    break
+
+        # STEP 3: Residual suffix
+        for suffix in self.__step3_suffixes:
+            if rv.endswith(suffix):
+                if suffix in ("e", "\xE9"):
+                    word = word[:-len(suffix)]
+                    rv = rv[:-len(suffix)]
+
+                    if word[-2:] == "gu" and rv[-1] == "u":
+                        word = word[:-1]
+                else:
+                    word = word[:-len(suffix)]
+                break
+
+        word = (word.replace("\xE1", "a").replace("\xE9", "e")
+                    .replace("\xED", "i").replace("\xF3", "o")
+                    .replace("\xFA", "u"))
+
+
+        return word
+
+
+
+class SwedishStemmer(_ScandinavianStemmer):
+
+    """
+    The Swedish Snowball stemmer.
+
+    :cvar __vowels: The Swedish vowels.
+    :type __vowels: unicode
+    :cvar __s_ending: Letters that may directly appear before a word final 's'.
+    :type __s_ending: unicode
+    :cvar __step1_suffixes: Suffixes to be deleted in step 1 of the algorithm.
+    :type __step1_suffixes: tuple
+    :cvar __step2_suffixes: Suffixes to be deleted in step 2 of the algorithm.
+    :type __step2_suffixes: tuple
+    :cvar __step3_suffixes: Suffixes to be deleted in step 3 of the algorithm.
+    :type __step3_suffixes: tuple
+    :note: A detailed description of the Swedish
+           stemming algorithm can be found under
+           http://snowball.tartarus.org/algorithms/swedish/stemmer.html
+
+    """
+
+    __vowels = "aeiouy\xE4\xE5\xF6"
+    __s_ending = "bcdfghjklmnoprtvy"
+    __step1_suffixes = ("heterna", "hetens", "heter", "heten",
+                        "anden", "arnas", "ernas", "ornas", "andes",
+                        "andet", "arens", "arna", "erna", "orna",
+                        "ande", "arne", "aste", "aren", "ades",
+                        "erns", "ade", "are", "ern", "ens", "het",
+                        "ast", "ad", "en", "ar", "er", "or", "as",
+                        "es", "at", "a", "e", "s")
+    __step2_suffixes = ("dd", "gd", "nn", "dt", "gt", "kt", "tt")
+    __step3_suffixes = ("fullt", "l\xF6st", "els", "lig", "ig")
+
+    def stem(self, word):
+        """
+        Stem a Swedish word and return the stemmed form.
+
+        :param word: The word that is stemmed.
+        :type word: str or unicode
+        :return: The stemmed form.
+        :rtype: unicode
+
+        """
+        word = word.lower()
+
+        r1 = self._r1_scandinavian(word, self.__vowels)
+
+        # STEP 1
+        for suffix in self.__step1_suffixes:
+            if r1.endswith(suffix):
+                if suffix == "s":
+                    if word[-2] in self.__s_ending:
+                        word = word[:-1]
+                        r1 = r1[:-1]
+                else:
+                    word = word[:-len(suffix)]
+                    r1 = r1[:-len(suffix)]
+                break
+
+        # STEP 2
+        for suffix in self.__step2_suffixes:
+            if r1.endswith(suffix):
+                word = word[:-1]
+                r1 = r1[:-1]
+                break
+
+        # STEP 3
+        for suffix in self.__step3_suffixes:
+            if r1.endswith(suffix):
+                if suffix in ("els", "lig", "ig"):
+                    word = word[:-len(suffix)]
+                elif suffix in ("fullt", "l\xF6st"):
+                    word = word[:-1]
+                break
+
+
+        return word
+
+
+class PolishStemmer(_LanguageSpecificStemmer):
+    """
+    The Polish stemmer, implemented based on python stemmer
+    for Polish language available at: https://github.com/Tutanchamon/pl_stemmer
+    """
+
+    def stem(self, word):
+        word = word.lower()
+
+        stem = word[:]
+        stem = self.remove_nouns(stem)
+        stem = self.remove_diminutive(stem)
+        stem = self.remove_adjective_ends(stem)
+        stem = self.remove_verbs_ends(stem)
+        stem = self.remove_adverbs_ends(stem)
+        stem = self.remove_plural_forms(stem)
+        stem = self.remove_general_ends(stem)
+
+        return stem
+
+    @staticmethod
+    def remove_general_ends(word):
+        # print "DEBUG: END", word[-1:]
+        if len(word) > 4 and word[-2:] in {"ia", "ie"}:
+            return word[:-2]
+        if len(word) > 4 and word[-1:] in {"u", u"ą", "i", "a", u"ę", "y", u"ę", u"ł"}:
+            return word[:-1]
+        return word
+
+    @staticmethod
+    def remove_diminutive(word):
+        if len(word) > 6:
+            if word[-5:] in {"eczek", "iczek", "iszek", "aszek", "uszek"}:
+                return word[:-5]
+            if word[-4:] in {"enek", "ejek", "erek"}:
+                return word[:-2]
+        if len(word) > 4:
+            if word[-2:] in {"ek", "ak"}:
+                return word[:-2]
+        return word
+
+    @staticmethod
+    def remove_verbs_ends(word):
+        if len(word) > 5 and word.endswith("bym"):
+            return word[:-3]
+        if len(word) > 5 and word[-3:] in {"esz", "asz", "cie", u"eść", u"aść", u"łem", "amy", "emy"}:
+            return word[:-3]
+        if len(word) > 3 and word[-3:] in {"esz", "asz", u"eść", u"aść", u"eć", u"ać"}:
+            return word[:-2]
+        if len(word) > 3 and word[-3:] in {"aj"}:
+            return word[:-1]
+        if len(word) > 3 and word[-2:] in {u"ać", "em", "am", u"ał", u"ił", u"ić", u"ąc"}:
+            return word[:-2]
+        return word
+
+    @staticmethod
+    def remove_nouns(word):
+        if len(word) > 7 and word[-5:] in {"zacja", u"zacją", "zacji"}:
+            return word[:-4]
+        if len(word) > 6 and word[-4:] in {"acja", "acji", u"acją", "tach", "anie", "enie",
+                                           "eniu", "aniu"}:
+            return word[:-4]
+        if len(word) > 6 and word.endswith("tyka"):
+            return word[:-2]
+        if len(word) > 5 and word[-3:] in {"ach", "ami", "nia", "niu", "cia", "ciu"}:
+            return word[:-3]
+        if len(word) > 5 and word[-3:] in {"cji", "cja", u"cją"}:
+            return word[:-2]
+        if len(word) > 5 and word[-2:] in {"ce", "ta"}:
+            return word[:-2]
+        return word
+
+    @staticmethod
+    def remove_adjective_ends(word):
+        if len(word) > 7 and word.startswith("naj") and (word.endswith("sze")
+                                                         or word.endswith("szy")):
+            return word[3:-3]
+        if len(word) > 7 and word.startswith("naj") and word.endswith("szych"):
+            return word[3:-5]
+        if len(word) > 6 and word.endswith("czny"):
+            return word[:-4]
+        if len(word) > 5 and word[-3:] in {"owy", "owa", "owe", "ych", "ego"}:
+            return word[:-3]
+        if len(word) > 5 and word[-2:] in {"ej"}:
+            return word[:-2]
+        return word
+
+    @staticmethod
+    def remove_adverbs_ends(word):
+        if len(word) > 4 and word[:-3] in {"nie", "wie"}:
+            return word[:-2]
+        if len(word) > 4 and word.endswith("rze"):
+            return word[:-2]
+        return word
+
+    @staticmethod
+    def remove_plural_forms(word):
+        if len(word) > 4 and (word.endswith(u"ów") or word.endswith("om")):
+            return word[:-2]
+        if len(word) > 4 and word.endswith("ami"):
+            return word[:-3]
+        return word
+
+class ArabicStemmer(_StandardStemmer, _LanguageSpecificStemmer):
+
+    # Normalize_pre stes
+    __vocalization = re.compile(
+        r'[\u064b-\u064c-\u064d-\u064e-\u064f-\u0650-\u0651-\u0652]'
+    )
+
+    __kasheeda = re.compile(r'[\u0640]')  # ـ tatweel/kasheeda
+
+    __arabic_punctuation_marks = re.compile(r'[\u060C-\u061B-\u061F]')  #  ؛ ، ؟
+
+    # Normalize_post
+    __last_hamzat = ('\u0623', '\u0625', '\u0622', '\u0624', '\u0626')  # أ، إ، آ، ؤ، ئ
+
+    # normalize other hamza's
+    __initial_hamzat = re.compile(r'^[\u0622\u0623\u0625]')  #  أ، إ، آ
+
+    __waw_hamza = re.compile(r'[\u0624]')  # ؤ
+
+    __yeh_hamza = re.compile(r'[\u0626]')  # ئ
+
+    __alefat = re.compile(r'[\u0623\u0622\u0625]')  #  أ، إ، آ
+
+    # Checks
+    __checks1 = (
+        '\u0643\u0627\u0644',
+        '\u0628\u0627\u0644',  # بال، كال
+        '\u0627\u0644',
+        '\u0644\u0644',  # لل، ال
+    )
+
+    __checks2 = ('\u0629', '\u0627\u062a')  # ة  #  female plural ات
+
+    # Suffixes
+    __suffix_noun_step1a = (
+        '\u064a',
+        '\u0643',
+        '\u0647',  # ي، ك، ه
+        '\u0646\u0627',
+        '\u0643\u0645',
+        '\u0647\u0627',
+        '\u0647\u0646',
+        '\u0647\u0645',  # نا، كم، ها، هن، هم
+        '\u0643\u0645\u0627',
+        '\u0647\u0645\u0627',  # كما، هما
+    )
+
+    __suffix_noun_step1b = '\u0646'  # ن
+
+    __suffix_noun_step2a = ('\u0627', '\u064a', '\u0648')  # ا، ي، و
+
+    __suffix_noun_step2b = '\u0627\u062a'  # ات
+
+    __suffix_noun_step2c1 = '\u062a'  # ت
+
+    __suffix_noun_step2c2 = '\u0629'  # ة
+
+    __suffix_noun_step3 = '\u064a'  # ي
+
+    __suffix_verb_step1 = (
+        '\u0647',
+        '\u0643',  # ه، ك
+        '\u0646\u064a',
+        '\u0646\u0627',
+        '\u0647\u0627',
+        '\u0647\u0645',  # ني، نا، ها، هم
+        '\u0647\u0646',
+        '\u0643\u0645',
+        '\u0643\u0646',  # هن، كم، كن
+        '\u0647\u0645\u0627',
+        '\u0643\u0645\u0627',
+        '\u0643\u0645\u0648',  # هما، كما، كمو
+    )
+
+    __suffix_verb_step2a = (
+        '\u062a',
+        '\u0627',
+        '\u0646',
+        '\u064a',  # ت، ا، ن، ي
+        '\u0646\u0627',
+        '\u062a\u0627',
+        '\u062a\u0646',  # نا، تا، تن Past
+        '\u0627\u0646',
+        '\u0648\u0646',
+        '\u064a\u0646',  # ان، هن، ين Present
+        '\u062a\u0645\u0627',  # تما
+    )
+
+    __suffix_verb_step2b = ('\u0648\u0627', '\u062a\u0645')  # وا، تم
+
+    __suffix_verb_step2c = ('\u0648', '\u062a\u0645\u0648')  # و  # تمو
+
+    __suffix_all_alef_maqsura = '\u0649'  # ى
+
+    # Prefixes
+    __prefix_step1 = (
+        '\u0623',  # أ
+        '\u0623\u0623',
+        '\u0623\u0622',
+        '\u0623\u0624',
+        '\u0623\u0627',
+        '\u0623\u0625',  # أأ، أآ، أؤ، أا، أإ
+    )
+
+    __prefix_step2a = ('\u0641\u0627\u0644', '\u0648\u0627\u0644')  # فال، وال
+
+    __prefix_step2b = ('\u0641', '\u0648')  # ف، و
+
+    __prefix_step3a_noun = (
+        '\u0627\u0644',
+        '\u0644\u0644',  # لل، ال
+        '\u0643\u0627\u0644',
+        '\u0628\u0627\u0644',  # بال، كال
+    )
+
+    __prefix_step3b_noun = (
+        '\u0628',
+        '\u0643',
+        '\u0644',  # ب، ك، ل
+        '\u0628\u0628',
+        '\u0643\u0643',  # بب، كك
+    )
+
+    __prefix_step3_verb = (
+        '\u0633\u064a',
+        '\u0633\u062a',
+        '\u0633\u0646',
+        '\u0633\u0623',
+    )  # سي، ست، سن، سأ
+
+    __prefix_step4_verb = (
+        '\u064a\u0633\u062a',
+        '\u0646\u0633\u062a',
+        '\u062a\u0633\u062a',
+    )  # يست، نست، تست
+
+    # Suffixes added due to Conjugation Verbs
+    __conjugation_suffix_verb_1 = ('\u0647', '\u0643')  # ه، ك
+
+    __conjugation_suffix_verb_2 = (
+        '\u0646\u064a',
+        '\u0646\u0627',
+        '\u0647\u0627',  # ني، نا، ها
+        '\u0647\u0645',
+        '\u0647\u0646',
+        '\u0643\u0645',  # هم، هن، كم
+        '\u0643\u0646',  # كن
+    )
+    __conjugation_suffix_verb_3 = (
+        '\u0647\u0645\u0627',
+        '\u0643\u0645\u0627',
+        '\u0643\u0645\u0648',
+    )  # هما، كما، كمو
+
+    __conjugation_suffix_verb_4 = ('\u0627', '\u0646', '\u064a')  # ا، ن، ي
+
+    __conjugation_suffix_verb_past = (
+        '\u0646\u0627',
+        '\u062a\u0627',
+        '\u062a\u0646',
+    )  # نا، تا، تن
+
+    __conjugation_suffix_verb_present = (
+        '\u0627\u0646',
+        '\u0648\u0646',
+        '\u064a\u0646',
+    )  # ان، ون، ين
+
+    # Suffixes added due to derivation Names
+    __conjugation_suffix_noun_1 = ('\u064a', '\u0643', '\u0647')  # ي، ك، ه
+
+    __conjugation_suffix_noun_2 = (
+        '\u0646\u0627',
+        '\u0643\u0645',  # نا، كم
+        '\u0647\u0627',
+        '\u0647\u0646',
+        '\u0647\u0645',  # ها، هن، هم
+    )
+
+    __conjugation_suffix_noun_3 = (
+        '\u0643\u0645\u0627',
+        '\u0647\u0645\u0627',
+    )  # كما، هما
+
+    # Prefixes added due to derivation Names
+    __prefixes1 = ('\u0648\u0627', '\u0641\u0627')  # فا، وا
+
+    __articles_3len = ('\u0643\u0627\u0644', '\u0628\u0627\u0644')  # بال كال
+
+    __articles_2len = ('\u0627\u0644', '\u0644\u0644')  # ال لل
+
+    # Prepositions letters
+    __prepositions1 = ('\u0643', '\u0644')  # ك، ل
+    __prepositions2 = ('\u0628\u0628', '\u0643\u0643')  # بب، كك
+
+    is_verb = True
+    is_noun = True
+    is_defined = False
+
+    suffixes_verb_step1_success = False
+    suffix_verb_step2a_success = False
+    suffix_verb_step2b_success = False
+    suffix_noun_step2c2_success = False
+    suffix_noun_step1a_success = False
+    suffix_noun_step2a_success = False
+    suffix_noun_step2b_success = False
+    suffixe_noun_step1b_success = False
+    prefix_step2a_success = False
+    prefix_step3a_noun_success = False
+    prefix_step3b_noun_success = False
+
+    def __normalize_pre(self, token):
+        """
+        :param token: string
+        :return: normalized token type string
+        """
+        # strip diacritics
+        token = self.__vocalization.sub('', token)
+        # strip kasheeda
+        token = self.__kasheeda.sub('', token)
+        # strip punctuation marks
+        token = self.__arabic_punctuation_marks.sub('', token)
+        return token
+
+    def __normalize_post(self, token):
+        # normalize last hamza
+        for hamza in self.__last_hamzat:
+            if token.endswith(hamza):
+                token = suffix_replace(token, hamza, '\u0621')
+                break
+        # normalize other hamzat
+        token = self.__initial_hamzat.sub('\u0627', token)
+        token = self.__waw_hamza.sub('\u0648', token)
+        token = self.__yeh_hamza.sub('\u064a', token)
+        token = self.__alefat.sub('\u0627', token)
+        return token
+
+    def __checks_1(self, token):
+        for prefix in self.__checks1:
+            if token.startswith(prefix):
+                if prefix in self.__articles_3len and len(token) > 4:
+                    self.is_noun = True
+                    self.is_verb = False
+                    self.is_defined = True
+                    break
+
+                if prefix in self.__articles_2len and len(token) > 3:
+                    self.is_noun = True
+                    self.is_verb = False
+                    self.is_defined = True
+                    break
+
+    def __checks_2(self, token):
+        for suffix in self.__checks2:
+            if token.endswith(suffix):
+                if suffix == '\u0629' and len(token) > 2:
+                    self.is_noun = True
+                    self.is_verb = False
+                    break
+
+                if suffix == '\u0627\u062a' and len(token) > 3:
+                    self.is_noun = True
+                    self.is_verb = False
+                    break
+
+    def __Suffix_Verb_Step1(self, token):
+        for suffix in self.__suffix_verb_step1:
+            if token.endswith(suffix):
+                if suffix in self.__conjugation_suffix_verb_1 and len(token) >= 4:
+                    token = token[:-1]
+                    self.suffixes_verb_step1_success = True
+                    break
+
+                if suffix in self.__conjugation_suffix_verb_2 and len(token) >= 5:
+                    token = token[:-2]
+                    self.suffixes_verb_step1_success = True
+                    break
+
+                if suffix in self.__conjugation_suffix_verb_3 and len(token) >= 6:
+                    token = token[:-3]
+                    self.suffixes_verb_step1_success = True
+                    break
+        return token
+
+    def __Suffix_Verb_Step2a(self, token):
+        for suffix in self.__suffix_verb_step2a:
+            if token.endswith(suffix) and len(token) > 3:
+                if suffix == '\u062a' and len(token) >= 4:
+                    token = token[:-1]
+                    self.suffix_verb_step2a_success = True
+                    break
+
+                if suffix in self.__conjugation_suffix_verb_4 and len(token) >= 4:
+                    token = token[:-1]
+                    self.suffix_verb_step2a_success = True
+                    break
+
+                if suffix in self.__conjugation_suffix_verb_past and len(token) >= 5:
+                    token = token[:-2]  # past
+                    self.suffix_verb_step2a_success = True
+                    break
+
+                if suffix in self.__conjugation_suffix_verb_present and len(token) > 5:
+                    token = token[:-2]  # present
+                    self.suffix_verb_step2a_success = True
+                    break
+
+                if suffix == '\u062a\u0645\u0627' and len(token) >= 6:
+                    token = token[:-3]
+                    self.suffix_verb_step2a_success = True
+                    break
+        return token
+
+    def __Suffix_Verb_Step2c(self, token):
+        for suffix in self.__suffix_verb_step2c:
+            if token.endswith(suffix):
+                if suffix == '\u062a\u0645\u0648' and len(token) >= 6:
+                    token = token[:-3]
+                    break
+
+                if suffix == '\u0648' and len(token) >= 4:
+                    token = token[:-1]
+                    break
+        return token
+
+    def __Suffix_Verb_Step2b(self, token):
+        for suffix in self.__suffix_verb_step2b:
+            if token.endswith(suffix) and len(token) >= 5:
+                token = token[:-2]
+                self.suffix_verb_step2b_success = True
+                break
+        return token
+
+    def __Suffix_Noun_Step2c2(self, token):
+        for suffix in self.__suffix_noun_step2c2:
+            if token.endswith(suffix) and len(token) >= 3:
+                token = token[:-1]
+                self.suffix_noun_step2c2_success = True
+                break
+        return token
+
+    def __Suffix_Noun_Step1a(self, token):
+        for suffix in self.__suffix_noun_step1a:
+            if token.endswith(suffix):
+                if suffix in self.__conjugation_suffix_noun_1 and len(token) >= 4:
+                    token = token[:-1]
+                    self.suffix_noun_step1a_success = True
+                    break
+
+                if suffix in self.__conjugation_suffix_noun_2 and len(token) >= 5:
+                    token = token[:-2]
+                    self.suffix_noun_step1a_success = True
+                    break
+
+                if suffix in self.__conjugation_suffix_noun_3 and len(token) >= 6:
+                    token = token[:-3]
+                    self.suffix_noun_step1a_success = True
+                    break
+        return token
+
+    def __Suffix_Noun_Step2a(self, token):
+        for suffix in self.__suffix_noun_step2a:
+            if token.endswith(suffix) and len(token) > 4:
+                token = token[:-1]
+                self.suffix_noun_step2a_success = True
+                break
+        return token
+
+    def __Suffix_Noun_Step2b(self, token):
+        for suffix in self.__suffix_noun_step2b:
+            if token.endswith(suffix) and len(token) >= 5:
+                token = token[:-2]
+                self.suffix_noun_step2b_success = True
+                break
+        return token
+
+    def __Suffix_Noun_Step2c1(self, token):
+        for suffix in self.__suffix_noun_step2c1:
+            if token.endswith(suffix) and len(token) >= 4:
+                token = token[:-1]
+                break
+        return token
+
+    def __Suffix_Noun_Step1b(self, token):
+        for suffix in self.__suffix_noun_step1b:
+            if token.endswith(suffix) and len(token) > 5:
+                token = token[:-1]
+                self.suffixe_noun_step1b_success = True
+                break
+        return token
+
+    def __Suffix_Noun_Step3(self, token):
+        for suffix in self.__suffix_noun_step3:
+            if token.endswith(suffix) and len(token) >= 3:
+                token = token[:-1]  # ya' nisbiya
+                break
+        return token
+
+    def __Suffix_All_alef_maqsura(self, token):
+        for suffix in self.__suffix_all_alef_maqsura:
+            if token.endswith(suffix):
+                token = suffix_replace(token, suffix, '\u064a')
+        return token
+
+    def __Prefix_Step1(self, token):
+        for prefix in self.__prefix_step1:
+            if token.startswith(prefix) and len(token) > 3:
+                if prefix == '\u0623\u0623':
+                    token = prefix_replace(token, prefix, '\u0623')
+                    break
+
+                elif prefix == '\u0623\u0622':
+                    token = prefix_replace(token, prefix, '\u0622')
+                    break
+
+                elif prefix == '\u0623\u0624':
+                    token = prefix_replace(token, prefix, '\u0624')
+                    break
+
+                elif prefix == '\u0623\u0627':
+                    token = prefix_replace(token, prefix, '\u0627')
+                    break
+
+                elif prefix == '\u0623\u0625':
+                    token = prefix_replace(token, prefix, '\u0625')
+                    break
+        return token
+
+    def __Prefix_Step2a(self, token):
+        for prefix in self.__prefix_step2a:
+            if token.startswith(prefix) and len(token) > 5:
+                token = token[len(prefix) :]
+                self.prefix_step2a_success = True
+                break
+        return token
+
+    def __Prefix_Step2b(self, token):
+        for prefix in self.__prefix_step2b:
+            if token.startswith(prefix) and len(token) > 3:
+                if token[:2] not in self.__prefixes1:
+                    token = token[len(prefix) :]
+                    break
+        return token
+
+    def __Prefix_Step3a_Noun(self, token):
+        for prefix in self.__prefix_step3a_noun:
+            if token.startswith(prefix):
+                if prefix in self.__articles_2len and len(token) > 4:
+                    token = token[len(prefix) :]
+                    self.prefix_step3a_noun_success = True
+                    break
+                if prefix in self.__articles_3len and len(token) > 5:
+                    token = token[len(prefix) :]
+                    break
+        return token
+
+    def __Prefix_Step3b_Noun(self, token):
+        for prefix in self.__prefix_step3b_noun:
+            if token.startswith(prefix):
+                if len(token) > 3:
+                    if prefix == '\u0628':
+                        token = token[len(prefix) :]
+                        self.prefix_step3b_noun_success = True
+                        break
+
+                    if prefix in self.__prepositions2:
+                        token = prefix_replace(token, prefix, prefix[1])
+                        self.prefix_step3b_noun_success = True
+                        break
+
+                if prefix in self.__prepositions1 and len(token) > 4:
+                    token = token[len(prefix) :]  # BUG: cause confusion
+                    self.prefix_step3b_noun_success = True
+                    break
+        return token
+
+    def __Prefix_Step3_Verb(self, token):
+        for prefix in self.__prefix_step3_verb:
+            if token.startswith(prefix) and len(token) > 4:
+                token = prefix_replace(token, prefix, prefix[1])
+                break
+        return token
+
+    def __Prefix_Step4_Verb(self, token):
+        for prefix in self.__prefix_step4_verb:
+            if token.startswith(prefix) and len(token) > 4:
+                token = prefix_replace(token, prefix, '\u0627\u0633\u062a')
+                self.is_verb = True
+                self.is_noun = False
+                break
+        return token
+
+    def stem(self, word):
+        """
+         Stem an Arabic word and return the stemmed form.
+        :param word: string
+        :return: string
+        """
+        # set initial values
+        self.is_verb = True
+        self.is_noun = True
+        self.is_defined = False
+
+        self.suffix_verb_step2a_success = False
+        self.suffix_verb_step2b_success = False
+        self.suffix_noun_step2c2_success = False
+        self.suffix_noun_step1a_success = False
+        self.suffix_noun_step2a_success = False
+        self.suffix_noun_step2b_success = False
+        self.suffixe_noun_step1b_success = False
+        self.prefix_step2a_success = False
+        self.prefix_step3a_noun_success = False
+        self.prefix_step3b_noun_success = False
+
+        modified_word = word
+        # guess type and properties
+        # checks1
+        self.__checks_1(modified_word)
+        # checks2
+        self.__checks_2(modified_word)
+        # Pre_Normalization
+        modified_word = self.__normalize_pre(modified_word)
+        # Start stemming
+        if self.is_verb:
+            modified_word = self.__Suffix_Verb_Step1(modified_word)
+            if self.suffixes_verb_step1_success:
+                modified_word = self.__Suffix_Verb_Step2a(modified_word)
+                if not self.suffix_verb_step2a_success:
+                    modified_word = self.__Suffix_Verb_Step2c(modified_word)
+                # or next TODO: How to deal with or next instruction
+            else:
+                modified_word = self.__Suffix_Verb_Step2b(modified_word)
+                if not self.suffix_verb_step2b_success:
+                    modified_word = self.__Suffix_Verb_Step2a(modified_word)
+        if self.is_noun:
+            modified_word = self.__Suffix_Noun_Step2c2(modified_word)
+            if not self.suffix_noun_step2c2_success:
+                if not self.is_defined:
+                    modified_word = self.__Suffix_Noun_Step1a(modified_word)
+                    # if self.suffix_noun_step1a_success:
+                    modified_word = self.__Suffix_Noun_Step2a(modified_word)
+                    if not self.suffix_noun_step2a_success:
+                        modified_word = self.__Suffix_Noun_Step2b(modified_word)
+                    if (
+                        not self.suffix_noun_step2b_success
+                        and not self.suffix_noun_step2a_success
+                    ):
+                        modified_word = self.__Suffix_Noun_Step2c1(modified_word)
+                    # or next ? todo : how to deal with or next
+                else:
+                    modified_word = self.__Suffix_Noun_Step1b(modified_word)
+                    if self.suffixe_noun_step1b_success:
+                        modified_word = self.__Suffix_Noun_Step2a(modified_word)
+                        if not self.suffix_noun_step2a_success:
+                            modified_word = self.__Suffix_Noun_Step2b(modified_word)
+                        if (
+                            not self.suffix_noun_step2b_success
+                            and not self.suffix_noun_step2a_success
+                        ):
+                            modified_word = self.__Suffix_Noun_Step2c1(modified_word)
+                    else:
+                        if not self.is_defined:
+                            modified_word = self.__Suffix_Noun_Step2a(modified_word)
+                        modified_word = self.__Suffix_Noun_Step2b(modified_word)
+            modified_word = self.__Suffix_Noun_Step3(modified_word)
+        if not self.is_noun and self.is_verb:
+            modified_word = self.__Suffix_All_alef_maqsura(modified_word)
+
+        # prefixes
+        modified_word = self.__Prefix_Step1(modified_word)
+        modified_word = self.__Prefix_Step2a(modified_word)
+        if not self.prefix_step2a_success:
+            modified_word = self.__Prefix_Step2b(modified_word)
+        modified_word = self.__Prefix_Step3a_Noun(modified_word)
+        if not self.prefix_step3a_noun_success and self.is_noun:
+            modified_word = self.__Prefix_Step3b_Noun(modified_word)
+        else:
+            if not self.prefix_step3b_noun_success and self.is_verb:
+                modified_word = self.__Prefix_Step3_Verb(modified_word)
+                modified_word = self.__Prefix_Step4_Verb(modified_word)
+
+        # post normalization stemming
+        modified_word = self.__normalize_post(modified_word)
+        stemmed_word = modified_word
+        return stemmed_word
diff --git a/summa/preprocessing/stopwords.py b/summa/preprocessing/stopwords.py
new file mode 100644
index 0000000..3527d68
--- /dev/null
+++ b/summa/preprocessing/stopwords.py
@@ -0,0 +1,210 @@
+english = """
+all six eleven just less being indeed over both anyway detail four front already through yourselves fify
+mill still its before move whose one system also somewhere herself thick show had enough should to only
+seeming under herein ours two has might thereafter do them his around thereby get very de none cannot
+every whether they not during thus now him nor name regarding several hereafter did always cry whither
+beforehand this someone she each further become thereupon where side towards few twelve because often ten
+anyhow doing km eg some back used go namely besides yet are cant our beyond ourselves sincere out even
+what throughout computer give for bottom mine since please while per find everything behind does various
+above between kg neither seemed ever across t somehow be we who were sixty however here otherwise whereupon
+nowhere although found hers re along quite fifteen by on about didn last would anything via of could thence
+put against keep etc s became ltd hence therein onto or whereafter con among own co afterwards formerly
+within seems into others whatever yourself down alone everyone done least another whoever moreover couldnt
+must your three from her their together top there due been next anyone whom much call too interest thru
+themselves hundred was until empty more himself elsewhere mostly that fire becomes becoming hereby but
+else part everywhere former don with than those he me forty myself made full twenty these bill using up us
+will nevertheless below anywhere nine can theirs toward my something and sometimes whenever sometime then
+almost wherever is describe am it doesn an really as itself at have in seem whence ie any if again hasnt
+inc un thin no perhaps latter meanwhile when amount same wherein beside how other take which latterly you
+fill either nobody unless whereas see though may after upon therefore most hereupon eight amongst never
+serious nothing such why a off whereby third i whole noone many well except amoungst yours rather without
+so five the first having once
+"""
+
+spanish = """
+un una unas unos uno sobre todo tambien tras otro algun alguno alguna algunos algunas ser es soy eres somos
+sois estoy esta estamos estais estan como en para atras porque por que estado estaba ante antes siendo ambos
+pero por poder puede puedo podemos podeis pueden fui fue fuimos fueron hacer hago hace hacemos haceis hacen
+cada fin incluso primero desde conseguir consigo consigue consigues conseguimos consiguen ir voy va vamos
+vais van vaya gueno ha tener tengo tiene tenemos teneis tienen el la lo las los su aqui mio tuyo ellos ellas
+nos nosotros vosotros vosotras si dentro solo solamente saber sabes sabe sabemos sabeis saben ultimo largo
+bastante haces muchos aquellos aquellas sus entonces tiempo verdad verdadero verdadera cierto ciertos cierta
+ciertas intentar intento intenta intentas intentamos intentais intentan dos bajo arriba encima usar uso usas
+usa usamos usais usan emplear empleo empleas emplean ampleamos empleais valor muy era eras eramos eran modo
+bien cual cuando donde mientras quien con entre sin trabajo trabajar trabajas trabaja trabajamos trabajais
+trabajan podria podrias podriamos podrian podriais yo aquel a acabar actualmente acuerdo adelante ademas
+ademas adrede afirmo agrego ahi ahora ahi al algo alguna algunas alguno algunos algun alla alli alli alrededor
+ambos antano antano ante anterior antes apenas aproximadamente aquel aquella aquellas aquello aquellos aqui
+aquel aquella aquellas aquellos aqui arribaabajo aseguro asi asi aun aunque ayer anadio aun b bajo bastante
+bien breve buen buena buenas bueno buenos c cada casi cerca cierto cinco claro comento como con conmigo
+conocer considera considero contigo contra cosa cosas creo cual cuales cualquier cuando cuanta cuantas cuanto
+cuantos cuatro cuenta cuyo cual cuales cuando cuanta cuantas cuanto cuantos como d da dado dan dar de debajo
+debe deben deber debido decir dejo del delante demasiado demas dentro deprisa desde despacio despues despues
+detras detras dia dias dice dicen dicho dieron diferente diferentes dijeron dijo dio donde dos durante dia
+dias donde e ejemplo el ella ellas ello ellos embargo en encima encuentra enfrente enseguida entonces entre
+era erais eramos eran eras eres es esa esas ese eso esos esta estaba estabais estabamos estaban estabas estad
+estada estadas estado estados estais estamos estan estando estar estara estaran estaras estare estareis
+estaremos estaria estariais estariamos estarian estarias estara estas este esteis estemos esten estes esto
+estos estoy estuve estuviera estuvierais estuvieramos estuvieran estuvieras estuvieron estuviese estuvieseis
+estuviesemos estuviesen estuvieses estuvimos estuviste estuvisteis estuvo esta estan ex excepto existe existen
+explico expreso f fin final fue fuera fuerais fueramos fueran fueras fueron fuese fueseis fuesemos fuesen
+fueses fui fuimos fuiste fuisteis g general gran grande grandes gustar h ha habeis haber habia habiais habiamos
+habian habias habida habidas habido habidos habiendo habla hablan habra habran habras habre habreis habremos
+habria habriais habriamos habrian habrias habra habia habian hace hacen hacer hacerlo hacia haciendo han has
+hasta hay haya hayais hayamos hayan hayas he hecho hemos hicieron hizo horas hoy hube hubiera hubierais
+hubieramos hubieran hubieras hubieron hubiese hubieseis hubiesemos hubiesen hubieses hubimos hubiste hubisteis
+hubo i igual incluso indico informo informo ir j jamas junto k l la lado las le lejos les llego lleva llevar
+lo los luego lugar m mal manera manifesto mas mayor me mediante medio mejor menciono menos menudo mi mia mias
+mientras mio mios mis misma mismas mismo mismos momento mucha muchas mucho muchos muy mas mi mia mias mio mios
+n nada nadie ni ningun ninguna ningunas ninguno ningunos ningun no nos nosotras nosotros nuestra nuestras
+nuestro nuestros nueva nuevas nuevo nuevos nunca o ocho os otra otras otro otros p pais para parece parte
+partir pasada pasado pasar pais peor pequeno pero pesar poca pocas poco pocos podemos poder podra podran podria
+podrian poner por porque posible primer primera primero primeros principalmente pronto propia propias propio
+propios proximo proximo proximos pudo pueda puede pueden pues q qeu que quedo queremos querer quien quienes
+quiere quiza quizas quiza quizas quien quienes que r raras realizado realizar realizo repente respecto s saber
+salvo se sea seais seamos sean seas seguir segun segunda segundo segun seis senor senora ser sera seran seras
+sere sereis seremos seria seriais seriamos serian serias sera seran seria senalo si sido siempre siendo siete
+sigue siguiente sin sino sisi sobre sois sola solamente solas solo solos somos son soy soyos su supuesto sus
+suya suyas suyo suyos se si solo t tal tambien tambien tampoco tan tanto tarde te temprano tendra tendran
+tendras tendre tendreis tendremos tendria tendriais tendriamos tendrian tendrias tendra tendran tened teneis
+tenemos tener tenga tengais tengamos tengan tengas tengo tenia teniais teniamos tenian tenias tenida tenidas
+tenido tenidos teniendo tenia tercera ti tiene tienen tienes toda todas todavia todavia todo todos tomar total
+tras trata traves tres tu tus tuve tuviera tuvierais tuvieramos tuvieran tuvieras tuvieron tuviese tuvieseis
+tuviesemos tuviesen tuvieses tuvimos tuviste tuvisteis tuvo tuya tuyas tuyo tuyos tu u un una unas uno unos
+usted ustedes v va vamos van varias varios veces venir ver vez volver vosotras vosotros vuestra vuestras vuestro
+vuestros w x y ya yo z el esa esas ese esos esta estas este estos ultima ultimas ultimo ultimos
+"""
+
+german = """
+aber als am an auch auf aus bei bin bis bist da dadurch daher darum das daß dass dein deine dem den der des
+dessen deshalb die dies dieser dieses doch dort du durch ein eine einem einen einer eines er es euer eure fur
+hatte hatten hattest hattet hierhinter ich ihr ihre im in ist ja jede jedem jeden jeder jedes jener jenes jetzt
+kann kannst konnen konnt machen mein meine mit muß mußt musst mussen mußt nach nachdem nein nicht nun oder seid
+sein seine sich sie sind soll sollen sollst sollt sonst soweit sowie und unserunsere unter vom von vor wann
+warum was weiter weitere wenn wer werde werden werdet weshalb wie wieder wieso wir wird wirst wo woher wohin zu
+zum zur uber
+"""
+
+portuguese = """
+de a o que e do da em um para é com não uma os no se na por mais as dos como mas foi ao ele das tem à seu
+sua ou ser quando muito há nos já está eu também só pelo pela até isso ela entre era depois sem mesmo aos ter
+seus quem nas me esse eles estão você tinha foram essa num nem suas meu às minha têm numa pelos elas havia seja
+qual será nós tenho lhe deles essas esses pelas este fosse dele tu te vocês vos lhes meus minhas teu tua teus
+tuas nosso nossa nossos nossas dela delas esta estes estas aquele aquela aqueles aquelas isto aquilo estou está
+estamos estão estive esteve estivemos estiveram estava estávamos estavam estivera estivéramos esteja estejamos
+estejam estivesse estivéssemos estivessem estiver estivermos estiverem hei há havemos hão houve houvemos houveram
+houvera houvéramos haja hajamos hajam houvesse houvéssemos houvessem houver houvermos houverem houverei houverá
+houveremos houverão houveria houveríamos houveriam sou somos são era éramos eram fui foi fomos foram fora fôramos
+seja sejamos sejam fosse fôssemos fossem for formos forem serei será seremos serão seria seríamos seriam tenho
+tem temos tém tinha tínhamos tinham tive teve tivemos tiveram tivera tivéramos tenha tenhamos tenham tivesse
+tivéssemos tivessem tiver tivermos tiverem terei terá teremos terão teria teríamos teriam
+"""
+
+swedish = """
+aderton adertonde adjö aldrig alla allas allt alltid alltså andra andras annan annat artonde artonn att av bakom
+bara behöva behövas behövde behövt beslut beslutat beslutit bland blev bli blir blivit bort borta bra bäst bättre
+båda bådas dag dagar dagarna dagen de del delen dem den denna deras dess dessa det detta dig din dina dit ditt
+dock dom du där därför då e efter eftersom ej elfte eller elva emot en enkel enkelt enkla enligt ens er era ers
+ert ett ettusen fanns fem femte femtio femtionde femton femtonde fick fin finnas finns fjorton fjortonde fjärde
+fler flera flesta fram framför från fyra fyrtio fyrtionde få får fått följande för före förlåt förra första
+genast genom gick gjorde gjort god goda godare godast gott gälla gäller gällt gärna gå går gått gör göra ha hade
+haft han hans har heller hellre helst helt henne hennes hit hon honom hundra hundraen hundraett hur här hög höger
+högre högst i ibland icke idag igen igår imorgon in inför inga ingen ingenting inget innan inne inom inte inuti
+ja jag jo ju just jämfört kan kanske knappast kom komma kommer kommit kr kunde kunna kunnat kvar legat ligga
+ligger lika likställd likställda lilla lite liten litet länge längre längst lätt lättare lättast långsam
+långsammare långsammast långsamt långt låt man med mej mellan men mer mera mest mig min mina mindre minst mitt
+mittemot mot mycket många måste möjlig möjligen möjligt möjligtvis ned nederst nedersta nedre nej ner ni nio
+nionde nittio nittionde nitton nittonde nog noll nr nu nummer när nästa någon någonting något några nån nånting
+nåt nödvändig nödvändiga nödvändigt nödvändigtvis och också ofta oftast olika olikt om oss på rakt redan rätt sa
+sade sagt samma sedan senare senast sent sex sextio sextionde sexton sextonde sig sin sina sist sista siste sitt
+sitta sju sjunde sjuttio sjuttionde sjutton sjuttonde själv sjätte ska skall skulle slutligen små smått snart som
+stor stora stort större störst säga säger sämre sämst så sådan sådana sådant ta tack tar tidig tidigare tidigast
+tidigt till tills tillsammans tio tionde tjugo tjugoen tjugoett tjugonde tjugotre tjugotvå tjungo tolfte tolv tre
+tredje trettio trettionde tretton trettonde två tvåhundra under upp ur ursäkt ut utan utanför ute va vad var vara
+varför varifrån varit varje varken vars varsågod vart vem vems verkligen vi vid vidare viktig viktigare viktigast
+viktigt vilka vilkas vilken vilket vill väl vänster vänstra värre vår våra vårt än ännu är även åt åtminstone
+åtta åttio åttionde åttonde över övermorgon överst övre
+"""
+
+danish = """
+ad af aldrig alle alt anden andet andre at bare begge blev blive bliver da de dem den denne der deres det dette
+dig din dine disse dit dog du efter ej eller en end ene eneste enhver er et far fem fik fire flere fleste for
+fordi forrige fra få får før god godt ham han hans har havde have hej helt hende hendes her hos hun hvad hvem
+hver hvilken hvis hvor hvordan hvorfor hvornår i ikke ind ingen intet ja jeg jer jeres jo kan kom komme kommer
+kun kunne lad lav lidt lige lille man mand mange med meget men mens mere mig min mine mit mod må ned nej ni nogen
+noget nogle nu ny nyt når nær næste næsten og også okay om op os otte over på se seks selv ser ses sig sige
+sin sine sit skal skulle som stor store syv så sådan tag tage thi ti til to tre ud under var ved vi vil ville
+vor vores være været alene allerede alligevel altid bag blandt burde bør dens derefter derfor derfra deri dermed
+derpå derved egen ellers endnu ens enten flest foran først gennem gjorde gjort gør gøre gørende hel heller hen
+henover herefter heri hermed herpå hvilke hvilkes hvorefter hvorfra hvorhen hvori hvorimod hvorved igen igennem
+imellem imens imod indtil langs lave lavet ligesom længere mellem mest mindre mindst måske nemlig nogensinde nok
+omkring overalt samme sammen selvom senere siden stadig synes syntes således temmelig tidligere tilbage tit uden
+udover undtagen via vore vær øvrigt
+"""
+
+italian = """
+un avete dal voi nostri avesti stiano starò sull tutto faccio sarai vostri farebbe ai degli farò c faccia lo 
+sullo farà facevate avendo fummo stiamo staranno questi sia con sue al mio fareste ero di e avessi alle avreste 
+avesse alla avrei avemmo col ad ne avremmo avevano tuo avessero siate suoi facevo ti che mi questa avrebbe fossero 
+tua starebbero faceste facesti anche cui ho tra foste stavamo non stessi avevate nostre quelli queste avrete eri 
+facemmo stavate stia in dagl avrò avremo se feci furono io stavano nelle quante per abbiano nell faceva fecero steste
+eravamo farei sarei avevi sui quanto dai dello era loro su quello fossi stava nostra quale una farete gli siano avranno
+i stette fece negli facciano facevano dove vostra farebbero sugli vostro uno aveva dall ha avuto avuti sarete sulla sarà 
+perché essendo fai siete facendo da avevamo starà o faranno lei mie stiate nel fu facciamo stessero noi facciate stando 
+si è avute sarebbero miei sto contro avrà coi chi ci avrebbero aveste stettero abbiamo sarebbe agl del stareste sua faremo 
+siamo fanno sei abbiate fui ed quella dalle facessero tue fosti facevamo erano stessimo nei facessimo nello le dell abbia 
+fosse farai facesse starai stavo staremo mia stesse avevo lui agli fossimo dagli vostre stanno sareste quanti stemmo facessi 
+ebbe stesti tuoi dallo tutti sugl staremmo vi la dei quanta ebbero stavi saranno delle dalla saresti staresti stai suo nostro 
+aremo starete saremmo sarò li hai allo avresti dov avuta faresti starei il quelle degl all a ebbi nella eravate stetti negl 
+come questo facevi sulle più tu della sono starebbe sul hanno faremmo sta avrai avessimo ma l
+"""
+
+# stopwords from https://github.com/bieli/stopwords repository
+polish = """
+a aby ach acz aczkolwiek aj albo ale alez ależ ani az aż bardziej bardzo beda bedzie bez deda będą bede będę 
+będzie bo bowiem by byc być byl byla byli bylo byly był była było były bynajmniej cala cali caly cała cały ci 
+cie ciebie cię co cokolwiek cos coś czasami czasem czemu czy czyli daleko dla dlaczego dlatego do dobrze 
+dokad dokąd dosc dość duzo dużo dwa dwaj dwie dwoje dzis dzisiaj dziś gdy gdyby gdyz gdyż gdzie gdziekolwiek 
+gdzies gdzieś go i ich ile im inna inne inny innych iz iż ja jak jakas jakaś jakby jaki jakichs jakichś jakie 
+jakis jakiś jakiz jakiż jakkolwiek jako jakos jakoś ją je jeden jedna jednak jednakze jednakże jedno jego jej 
+jemu jesli jest jestem jeszcze jeśli jezeli jeżeli juz już kazdy każdy kiedy kilka kims kimś kto ktokolwiek 
+ktora ktore ktorego ktorej ktory ktorych ktorym ktorzy ktos ktoś która które którego której który których 
+którym którzy ku lat lecz lub ma mają mało mam mi miedzy między mimo mna mną mnie moga mogą moi moim moj 
+moja moje moze mozliwe mozna może możliwe można mój mu musi my na nad nam nami nas nasi nasz nasza nasze 
+naszego naszych natomiast natychmiast nawet nia nią nic nich nie niech niego niej niemu nigdy nim nimi niz 
+niż no o obok od około on ona one oni ono oraz oto owszem pan pana pani po pod podczas pomimo ponad poniewaz 
+ponieważ powinien powinna powinni powinno poza prawie przeciez przecież przed przede przedtem przez przy roku 
+rowniez również sam sama są sie się skad skąd soba sobą sobie sposob sposób swoje ta tak taka taki takie 
+takze także tam te tego tej ten teraz też to toba tobą tobie totez toteż totobą trzeba tu tutaj twoi twoim 
+twoj twoja twoje twój twym ty tych tylko tym u w wam wami was wasz wasza wasze we według wiele wielu więc 
+więcej wlasnie właśnie wszyscy wszystkich wszystkie wszystkim wszystko wtedy wy z za zaden zadna zadne 
+zadnych zapewne zawsze ze zeby zeznowu zł znow znowu znów zostal został żaden żadna żadne żadnych że żeby
+"""
+
+arabic = """أنت كليكما اللتان بنا هما إذا اللواتي أينما كلاهما إما كيت إذ هم ليس كيف لك هن لئن ألا عليك وإن إليكما أيها لعل أنتن كأي لسن ممن له 
+حين اللتين فيها عسى ما هي أين ليسا هنا بما عما هاته ذاك لدى هاك نحو بكم ذواتا هذا أقل اللتيا إن مع لكما بكما قد لي أولئك إليك أن كلا 
+ليسوا بس ذات فيه منها ومن هو بها كأنما هاهنا هاتان هذي ذلك كما أوه هكذا ذوا ليست لكي نعم لكن خلا لكم أنا بخ تي فلا حبذا أولاء 
+ذواتي منذ ولو بين لكنما سوى آها تلك إي آي إذما الذي كليهما لكيلا لهما بعض يا بكن حيثما وإذا بهما ذا ها فيما ماذا والذين لستما كل 
+لوما ثمة متى عند في هيهات أما ذان الذين وهو أنتم كي آه ذي إذن إليكم بل فإن وإذ تلكما هلا فإذا هذه ذلكم فمن إلا إنا بمن كذلك هاتين 
+عليه كأن هل ذلكما مهما شتان والذي هيا ذين لستن بك مذ ولا هذين كأين فيم حتى إنما بهن هنالك أم لسنا غير لنا منه نحن اللاتي بعد تينك 
+ذلكن ولكن كلما إيه عدا لها هذان ته حاشا دون أنى عن تين أكثر كلتا إنه بيد كذا هاتي ذو لست لم إليكن وما مما إلى ذانك اللذين من مه أف 
+كم اللائي حيث ليستا هؤلاء بماذا ليت هيت بهم لهن التي لولا لو لهم هناك ثم سوف كيفما لستم لما ذينك بلى لا تلكم على لاسيما به بي اللذان أي ذه لن عل أو ريث أنتما
+"""
+
+LANGUAGES = {
+    "danish": danish,
+    "english": english,
+    "german": german,
+    "spanish": spanish,
+    "portuguese": portuguese,
+    "swedish": swedish,
+    "italian": italian,
+    "polish": polish,
+    "arabic": arabic
+}
+
+
+def get_stopwords_by_language(language):
+    if language in LANGUAGES:
+        return LANGUAGES[language]
+    return ""
diff --git a/summa/preprocessing/textcleaner.py b/summa/preprocessing/textcleaner.py
new file mode 100644
index 0000000..9716774
--- /dev/null
+++ b/summa/preprocessing/textcleaner.py
@@ -0,0 +1,188 @@
+import string
+import unicodedata
+import logging
+
+logger = logging.getLogger('summa.preprocessing.cleaner')
+
+try:
+    from pattern.en import tag
+    logger.info("'pattern' package found; tag filters are available for English")
+    HAS_PATTERN = True
+except ImportError:
+    logger.info("'pattern' package not found; tag filters are not available for English")
+    HAS_PATTERN = False
+
+import re
+
+from .snowball import SnowballStemmer
+from .stopwords import get_stopwords_by_language
+from summa.syntactic_unit import SyntacticUnit
+
+
+# Utility functions adapted from Gensim v0.10.0:
+# https://github.com/RaRe-Technologies/gensim/blob/0.10.0/gensim/utils.py
+# https://github.com/RaRe-Technologies/gensim/blob/0.10.0/gensim/parsing/preprocessing.py
+
+
+SEPARATOR = r"@"
+RE_SENTENCE = re.compile('(\S.+?[.!?])(?=\s+|$)|(\S.+?)(?=[\n]|$)')
+AB_SENIOR = re.compile("([A-Z][a-z]{1,2}\.)\s(\w)")
+AB_ACRONYM = re.compile("(\.[a-zA-Z]\.)\s(\w)")
+AB_ACRONYM_LETTERS = re.compile("([a-zA-Z])\.([a-zA-Z])\.")
+UNDO_AB_SENIOR = re.compile("([A-Z][a-z]{1,2}\.)" + SEPARATOR + "(\w)")
+UNDO_AB_ACRONYM = re.compile("(\.[a-zA-Z]\.)" + SEPARATOR + "(\w)")
+
+STEMMER = None
+STOPWORDS = None
+
+
+def set_stemmer_language(language):
+    global STEMMER
+    if not language in SnowballStemmer.languages:
+        raise ValueError("Valid languages are: " + ", ".join(sorted(SnowballStemmer.languages)))
+    STEMMER = SnowballStemmer(language)
+
+
+def set_stopwords_by_language(language, additional_stopwords):
+    global STOPWORDS
+    words = get_stopwords_by_language(language)
+    if not additional_stopwords:
+        additional_stopwords = {}
+    STOPWORDS = frozenset({ w for w in words.split() if w } | { w for w in additional_stopwords if w })
+
+
+def init_textcleanner(language, additional_stopwords):
+    set_stemmer_language(language)
+    set_stopwords_by_language(language, additional_stopwords)
+
+
+def split_sentences(text):
+    processed = replace_abbreviations(text)
+    return [undo_replacement(sentence) for sentence in get_sentences(processed)]
+
+
+def replace_abbreviations(text):
+    return replace_with_separator(text, SEPARATOR, [AB_SENIOR, AB_ACRONYM])
+
+
+def undo_replacement(sentence):
+    return replace_with_separator(sentence, r" ", [UNDO_AB_SENIOR, UNDO_AB_ACRONYM])
+
+
+def replace_with_separator(text, separator, regexs):
+    replacement = r"\1" + separator + r"\2"
+    result = text
+    for regex in regexs:
+        result = regex.sub(replacement, result)
+    return result
+
+
+def get_sentences(text):
+    for match in RE_SENTENCE.finditer(text):
+        yield match.group()
+
+
+# Taken from Gensim
+RE_PUNCT = re.compile('([%s])+' % re.escape(string.punctuation), re.UNICODE)
+def strip_punctuation(s):
+    return RE_PUNCT.sub(" ", s)
+
+
+# Taken from Gensim
+RE_NUMERIC = re.compile(r"[0-9]+", re.UNICODE)
+def strip_numeric(s):
+    return RE_NUMERIC.sub("", s)
+
+
+def remove_stopwords(sentence):
+    return " ".join(w for w in sentence.split() if w not in STOPWORDS)
+
+
+def stem_sentence(sentence):
+    word_stems = [STEMMER.stem(word) for word in sentence.split()]
+    return " ".join(word_stems)
+
+
+def apply_filters(sentence, filters):
+    for f in filters:
+        sentence = f(sentence)
+    return sentence
+
+
+def filter_words(sentences):
+    filters = [lambda x: x.lower(), strip_numeric, strip_punctuation, remove_stopwords,
+               stem_sentence]
+    apply_filters_to_token = lambda token: apply_filters(token, filters)
+    return list(map(apply_filters_to_token, sentences))
+
+
+# Taken from Gensim
+def deaccent(text):
+    """
+    Remove accentuation from the given string.
+    """
+    norm = unicodedata.normalize("NFD", text)
+    result = "".join(ch for ch in norm if unicodedata.category(ch) != 'Mn')
+    return unicodedata.normalize("NFC", result)
+
+
+# Taken from Gensim
+PAT_ALPHABETIC = re.compile('(((?![\d])\w)+)', re.UNICODE)
+def tokenize(text, lowercase=False, deacc=False):
+    """
+    Iteratively yield tokens as unicode strings, optionally also lowercasing them
+    and removing accent marks.
+    """
+    if lowercase:
+        text = text.lower()
+    if deacc:
+        text = deaccent(text)
+    for match in PAT_ALPHABETIC.finditer(text):
+        yield match.group()
+
+
+def merge_syntactic_units(original_units, filtered_units, tags=None):
+    units = []
+    for i in range(len(original_units)):
+        if filtered_units[i] == '':
+            continue
+
+        text = original_units[i]
+        token = filtered_units[i]
+        tag = tags[i][1] if tags else None
+        sentence = SyntacticUnit(text, token, tag)
+        sentence.index = i
+
+        units.append(sentence)
+
+    return units
+
+
+def clean_text_by_sentences(text, language="english", additional_stopwords=None):
+    """ Tokenizes a given text into sentences, applying filters and lemmatizing them.
+    Returns a SyntacticUnit list. """
+    init_textcleanner(language, additional_stopwords)
+    original_sentences = split_sentences(text)
+    filtered_sentences = filter_words(original_sentences)
+
+    return merge_syntactic_units(original_sentences, filtered_sentences)
+
+
+def clean_text_by_word(text, language="english", deacc=False, additional_stopwords=None):
+    """ Tokenizes a given text into words, applying filters and lemmatizing them.
+    Returns a dict of word -> syntacticUnit. """
+    init_textcleanner(language, additional_stopwords)
+    text_without_acronyms = replace_with_separator(text, "", [AB_ACRONYM_LETTERS])
+    original_words = list(tokenize(text_without_acronyms, lowercase=True, deacc=deacc))
+    filtered_words = filter_words(original_words)
+    if HAS_PATTERN:
+        tags = tag(" ".join(original_words))  # tag needs the context of the words in the text
+    else:
+        tags = None
+    units = merge_syntactic_units(original_words, filtered_words, tags)
+    return { unit.text : unit for unit in units }
+
+
+def tokenize_by_word(text, deacc=False):
+    text_without_acronyms = replace_with_separator(text, "", [AB_ACRONYM_LETTERS])
+    return tokenize(text_without_acronyms, lowercase=True, deacc=deacc)
diff --git a/summa/preprocessing/util.py b/summa/preprocessing/util.py
new file mode 100644
index 0000000..0daad9d
--- /dev/null
+++ b/summa/preprocessing/util.py
@@ -0,0 +1,24 @@
+# Natural Language Toolkit: Stemmer Utilities
+#
+# Copyright (C) 2001-2019 NLTK Project
+# Author: Helder <he7d3r@gmail.com>
+# URL: <http://nltk.org/>
+# For license information, see LICENSE.TXT
+
+
+def suffix_replace(original, old, new):
+    """
+    Replaces the old suffix of the original string by a new suffix
+    """
+    return original[: -len(old)] + new
+
+
+def prefix_replace(original, old, new):
+    """
+     Replaces the old prefix of the original string by a new suffix
+    :param original: string
+    :param old: string
+    :param new: string
+    :return: string
+    """
+    return new + original[len(old) :]
diff --git a/summa/summarizer.py b/summa/summarizer.py
new file mode 100644
index 0000000..952625e
--- /dev/null
+++ b/summa/summarizer.py
@@ -0,0 +1,154 @@
+from math import log10
+
+from .pagerank_weighted import pagerank_weighted_scipy as _pagerank
+from .preprocessing.textcleaner import clean_text_by_sentences as _clean_text_by_sentences
+from .commons import build_graph as _build_graph
+from .commons import remove_unreachable_nodes as _remove_unreachable_nodes
+
+
+def _set_graph_edge_weights(graph):
+    for sentence_1 in graph.nodes():
+        for sentence_2 in graph.nodes():
+
+            edge = (sentence_1, sentence_2)
+            if sentence_1 != sentence_2 and not graph.has_edge(edge):
+                similarity = _get_similarity(sentence_1, sentence_2)
+                if similarity != 0:
+                    graph.add_edge(edge, similarity)
+
+    # Handles the case in which all similarities are zero.
+    # The resultant summary will consist of random sentences.
+    if all(graph.edge_weight(edge) == 0 for edge in graph.edges()):
+        _create_valid_graph(graph)
+
+
+def _create_valid_graph(graph):
+    nodes = graph.nodes()
+
+    for i in range(len(nodes)):
+        for j in range(len(nodes)):
+            if i == j:
+                continue
+
+            edge = (nodes[i], nodes[j])
+
+            if graph.has_edge(edge):
+                graph.del_edge(edge)
+
+            graph.add_edge(edge, 1)
+
+
+def _get_similarity(s1, s2):
+    words_sentence_one = s1.split()
+    words_sentence_two = s2.split()
+
+    common_word_count = _count_common_words(words_sentence_one, words_sentence_two)
+
+    log_s1 = log10(len(words_sentence_one))
+    log_s2 = log10(len(words_sentence_two))
+
+    if log_s1 + log_s2 == 0:
+        return 0
+
+    return common_word_count / (log_s1 + log_s2)
+
+
+def _count_common_words(words_sentence_one, words_sentence_two):
+    return len(set(words_sentence_one) & set(words_sentence_two))
+
+
+def _format_results(extracted_sentences, split, score):
+    if score:
+        return [(sentence.text, sentence.score) for sentence in extracted_sentences]
+    if split:
+        return [sentence.text for sentence in extracted_sentences]
+    return "\n".join([sentence.text for sentence in extracted_sentences])
+
+
+def _add_scores_to_sentences(sentences, scores):
+    for sentence in sentences:
+        # Adds the score to the object if it has one.
+        if sentence.token in scores:
+            sentence.score = scores[sentence.token]
+        else:
+            sentence.score = 0
+
+
+def _get_sentences_with_word_count(sentences, words):
+    """ Given a list of sentences, returns a list of sentences with a
+    total word count similar to the word count provided.
+    """
+    word_count = 0
+    selected_sentences = []
+    # Loops until the word count is reached.
+    for sentence in sentences:
+        words_in_sentence = len(sentence.text.split())
+
+        # Checks if the inclusion of the sentence gives a better approximation
+        # to the word parameter.
+        if abs(words - word_count - words_in_sentence) > abs(words - word_count):
+            return selected_sentences
+
+        selected_sentences.append(sentence)
+        word_count += words_in_sentence
+
+    return selected_sentences
+
+
+def _extract_most_important_sentences(sentences, ratio, words):
+    sentences.sort(key=lambda s: s.score, reverse=True)
+
+    # If no "words" option is selected, the number of sentences is
+    # reduced by the provided ratio.
+    if words is None:
+        length = len(sentences) * ratio
+        return sentences[:int(length)]
+
+    # Else, the ratio is ignored.
+    else:
+        return _get_sentences_with_word_count(sentences, words)
+
+
+def summarize(text, ratio=0.2, words=None, language="english", split=False, scores=False, additional_stopwords=None):
+    if not isinstance(text, str):
+        raise ValueError("Text parameter must be a Unicode object (str)!")
+
+    # Gets a list of processed sentences.
+    sentences = _clean_text_by_sentences(text, language, additional_stopwords)
+
+    # Creates the graph and calculates the similarity coefficient for every pair of nodes.
+    graph = _build_graph([sentence.token for sentence in sentences])
+    _set_graph_edge_weights(graph)
+
+    # Remove all nodes with all edges weights equal to zero.
+    _remove_unreachable_nodes(graph)
+
+    # PageRank cannot be run in an empty graph.
+    if len(graph.nodes()) == 0:
+        return [] if split else ""
+
+    # Ranks the tokens using the PageRank algorithm. Returns dict of sentence -> score
+    pagerank_scores = _pagerank(graph)
+
+    # Adds the summa scores to the sentence objects.
+    _add_scores_to_sentences(sentences, pagerank_scores)
+
+    # EDIT: return the whole sentences with scores
+    return sentences
+
+    # Extracts the most important sentences with the selected criterion.
+    # extracted_sentences = _extract_most_important_sentences(sentences, ratio, words)
+
+    # Sorts the extracted sentences by apparition order in the original text.
+    # extracted_sentences.sort(key=lambda s: s.index)
+
+    # return _format_results(extracted_sentences, split, scores)
+
+
+def get_graph(text, language="english"):
+    sentences = _clean_text_by_sentences(text, language)
+
+    graph = _build_graph([sentence.token for sentence in sentences])
+    _set_graph_edge_weights(graph)
+
+    return graph
diff --git a/summa/syntactic_unit.py b/summa/syntactic_unit.py
new file mode 100644
index 0000000..e5feee7
--- /dev/null
+++ b/summa/syntactic_unit.py
@@ -0,0 +1,14 @@
+class SyntacticUnit(object):
+
+    def __init__(self, text, token=None, tag=None):
+        self.text = text
+        self.token = token
+        self.tag = tag[:2] if tag else None  # just first two letters of tag
+        self.index = -1
+        self.score = -1
+
+    def __str__(self):
+        return "Original unit: '" + self.text + "' *-*-*-* " + "Processed unit: '" + self.token + "'"
+
+    def __repr__(self):
+        return str(self)
diff --git a/summa/textrank.py b/summa/textrank.py
new file mode 100644
index 0000000..328a131
--- /dev/null
+++ b/summa/textrank.py
@@ -0,0 +1,97 @@
+import argparse
+import os
+import sys
+import warnings
+
+from .summarizer import summarize
+from .keywords import keywords
+
+# Types of summarization
+SENTENCE = 0
+WORD = 1
+
+DEFAULT_RATIO = 0.2
+
+
+def textrank(text, summarize_by=SENTENCE, ratio=DEFAULT_RATIO, words=None, additional_stopwords=None):
+    if summarize_by == SENTENCE:
+        return summarize(text, ratio, words, additional_stopwords=additional_stopwords)
+    else:
+        return keywords(text, ratio, words, additional_stopwords=additional_stopwords)
+
+
+def existing_file(file_name):
+    try:
+        with open(file_name, 'r') as file:
+            return file.read()
+    except Exception:
+        raise argparse.ArgumentTypeError("The file provided could not be opened.")
+
+
+def restricted_float(x):
+    x = float(x)
+    if x < 0.0 or x > 1.0:
+        raise argparse.ArgumentTypeError("{} not in range [0.0, 1.0]".format(x))
+    return x
+
+
+def parse_args(args):
+    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, prog="textrank", description="Extract the most relevant sentences or keywords of a given text using the TextRank algorithm.")
+
+    group = parser.add_mutually_exclusive_group(required=True)
+    # New API
+    group.add_argument('--summarize', metavar="path/to/file", type=existing_file,
+                       help="Run textrank to summarize the input text.")
+    group.add_argument('--keywords', metavar="path/to/file", type=existing_file,
+                       help="Run textrank to extract keywords from the input text.")
+    # Old API
+    group.add_argument('--text', '-t', metavar="path/to/file", type=existing_file,
+                       help="(Deprecated) Text to summarize if --summary option is selected")
+
+    parser.add_argument('--summary', '-s', metavar="{0,1}", type=int, choices=[SENTENCE, WORD], default=0,
+                        help="(Deprecated) Type of unit to summarize: sentence (0) or word (1)")
+    parser.add_argument('--ratio', '-r', metavar="r", type=restricted_float, default=DEFAULT_RATIO,
+                        help="Float number (0,1] that defines the length of the summary. It's a proportion of the original text")
+    parser.add_argument('--words', '-w', metavar="#words", type=int,
+                        help="Number to limit the length of the summary. The length option is ignored if the word limit is set.")
+    parser.add_argument('--additional_stopwords', '-a', metavar="list,of,stopwords",
+                        help="Either a string of comma separated stopwords or a path to a file which has comma separated stopwords in every line")
+
+    return parser.parse_args(args)
+
+
+def main():
+    args = parse_args(sys.argv[1:])
+
+    mode = None
+    text = None
+
+    if args.summarize:
+        text = args.summarize
+        mode = SENTENCE
+    elif args.keywords:
+        text = args.keywords
+        mode = WORD
+    elif args.summary:  # Old api
+        warnings.warn("The --summary option is deprecated. Please use either --summarize or --keywords", DeprecationWarning)
+        text = args.text
+        mode = args.summary
+
+        if text is None:
+            raise argparse.ArgumentTypeError('Error: no text to summarize provided.')
+    else:
+        raise argparse.ArgumentTypeError('Error: --summarize or --keywords is required')
+
+    additional_stopwords = None
+    if args.additional_stopwords:
+        if os.path.exists(args.additional_stopwords):
+            with open(args.additional_stopwords) as f:
+                additional_stopwords = {s for l in f for s in l.strip().split(",")}
+        else:
+            additional_stopwords = args.additional_stopwords.split(",")
+
+    print(textrank(text, mode, args.ratio, args.words, additional_stopwords))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/template.html b/template.html
new file mode 100644
index 0000000..504c0eb
--- /dev/null
+++ b/template.html
@@ -0,0 +1,31 @@
+<!DOCTYPE html>
+
+<html lang="fr">
+
+  <head>
+
+    <meta charset="UTF-8" />
+
+    <title>TextRank Opacity</title>
+    <meta name="description" content="a call for 2 desks in studio 5 of the Meyboom artist-run spaces">
+
+    <link rel="stylesheet" type="text/css" href="css/main.css" />
+    <link rel="stylesheet" type="text/css" href="css/typography.css" />
+
+    <meta name="viewport" content="width=device-width, initial-scale=1">
+
+  </head>
+
+  <body>
+
+    <main>
+
+      {% for s in sentences %}
+        {{ s.html|safe }}
+      {% endfor %}
+
+    </main>
+
+  </body>
+
+</html>
diff --git a/texts/warehouse.txt b/texts/warehouse.txt
new file mode 100644
index 0000000..b16a42e
--- /dev/null
+++ b/texts/warehouse.txt
@@ -0,0 +1 @@
+A warehouse is a building for storing goods. Warehouses are used by manufacturers, importers, exporters, wholesalers, transport businesses, customs, etc. They are usually large plain buildings in industrial parks on the outskirts of cities, towns, or villages. They usually have loading docks to load and unload goods from trucks. Sometimes warehouses are designed for the loading and unloading of goods directly from railways, airports, or seaports. They often have cranes and forklifts for moving goods, which are usually placed on ISO standard pallets and then loaded into pallet racks. Stored goods can include any raw materials, packing materials, spare parts, components, or finished goods associated with agriculture, manufacturing, and production. In India and Hong Kong, a warehouse may be referred to as a "godown". There are also godowns in the Shanghai Bund.
\ No newline at end of file
diff --git a/www/css/main.css b/www/css/main.css
new file mode 100644
index 0000000..46097e8
--- /dev/null
+++ b/www/css/main.css
@@ -0,0 +1,38 @@
+
+:root{
+  --lh: 1.35rem;
+}
+
+body{
+  margin: var(--lh);
+  line-height: var(--lh);
+}
+
+@media print{
+  body{
+    margin: 0;
+    font-size: 10pt;
+  }
+}
+
+main{
+  max-width: 42rem;
+  margin: 0 auto;
+}
+
+/* h1,h2,h3,h4,h5,h6{
+  line-height: var(--lh);
+} */
+
+h1{
+  text-align: center;
+  margin: calc(2 * var(--lh)) 0;
+}
+
+h2,h3,h4,h5,h6{
+  margin: calc(3 * var(--lh)) 0 var(--lh);
+}
+
+:is(h1,h2,h3,h4,h5,h6) + :is(h1,h2,h3,h4,h5,h6){
+  margin-top: var(--lh);
+}
\ No newline at end of file
diff --git a/www/index.html b/www/index.html
new file mode 100644
index 0000000..1dec090
--- /dev/null
+++ b/www/index.html
@@ -0,0 +1,177 @@
+<!DOCTYPE html>
+
+<html lang="fr">
+
+  <head>
+
+    <meta charset="UTF-8" />
+
+    <title>TextRank Opacity</title>
+    <meta name="description" content="a call for 2 desks in studio 5 of the Meyboom artist-run spaces">
+
+    <link rel="stylesheet" type="text/css" href="css/main.css" />
+    <link rel="stylesheet" type="text/css" href="css/typography.css" />
+
+    <meta name="viewport" content="width=device-width, initial-scale=1">
+
+  </head>
+
+  <body>
+
+    <main>
+
+      
+        <h1>Sambucus</h1>
+      
+        <span style="opacity:0.023354250368401927;">Sambucus is a genus of flowering plants in the family Adoxaceae.</span>
+      
+        <span style="opacity:0.008019401476129553;">The various species are commonly called elder or elderberry.</span>
+      
+        <span style="opacity:0.26031502027326375;">The genus was formerly placed in the honeysuckle family, Caprifoliaceae, but was reclassified as Adoxaceae due to genetic and morphological comparisons to plants in the genus Adoxa.</span>
+      
+        <h2 style="opacity:0.11901683057809066;">Description</h2>
+      
+        <span style="opacity:0.34993223091241904;">The oppositely arranged leaves are pinnate with 5–9 leaflets (or, rarely, 3 or 11).</span>
+      
+        <span style="opacity:0.6657790550844742;">Each leaf is 5–30 cm (2–12 in) long, and the leaflets have serrated margins.</span>
+      
+        <span style="opacity:0.15164144458890563;">They bear large clusters of small white or cream-colored flowers in late spring; these are followed by clusters of small black, blue-black, or red berries (rarely yellow or white).</span>
+      
+        <h3 style="opacity:0.037512192700824155;">Color</h3>
+      
+        <span style="opacity:0.002234682837227867;">Sambucus fruit is rich in anthocyanidins that combine to give elderberry juice an intense blue-purple coloration that turns reddish on dilution with water.</span>
+      
+        <span style="opacity:0.04596217339828307;">These pigments are used as colorants in various products, and "elderberry juice color" is listed by the US FDA as allowable in certified organic food products.</span>
+      
+        <span style="opacity:0.06433972186696155;">In Japan, elderberry juice is listed as an approved "natural color additive" under the Food and Sanitation Law. Fibers can be dyed with elderberry juice (using alum as a mordant) to give a light "elderberry" color.</span>
+      
+        <h3 style="opacity:0.005859165534145169;">Toxicity</h3>
+      
+        <span style="opacity:0.018703350472802083;">Although the cooked berries (pulp and skin) of most species of Sambucus are edible, the uncooked berries and other parts of plants from this genus are poisonous.</span>
+      
+        <span style="opacity:0.0;">Leaves, twigs, branches, seeds, roots, flowers, and berries of Sambucus plants produce cyanogenic glycosides, which have toxic properties.</span>
+      
+        <span style="opacity:0.0024907571958084017;">Ingesting a sufficient quantity of cyanogenic glycosides from berry juice, flower tea, or beverages made from fresh leaves, branches, and fruit has been shown to cause illness, including nausea, vomiting, abdominal cramps, diarrhea, and weakness.</span>
+      
+        <span style="opacity:0.004068285156595224;">In August 1983, a group of 25 people in Monterey County, California, became suddenly ill by ingesting elderberry juice pressed from fresh, uncooked Sambucus mexicana berries, leaves, and stems.</span>
+      
+        <span style="opacity:0.004033434845520135;">The density of cyanogenic glycosides is higher in tea made from flowers (or leaves) than from the berries.The seeds of Sambucus callicarpa are reported to be poisonous and may cause vomiting or diarrhea.</span>
+      
+        <h2 style="opacity:0.3047373240294963;">Taxonomy</h2>
+      
+        <span style="opacity:0.1435576912325227;">The taxonomy of the genus Sambucus L., originally described by Carl Linnaeus and hence its botanical authority, has been complicated by its wide geographical distribution and morphological diversity.</span>
+      
+        <span style="opacity:0.4903541107710174;">This has led to overdescription of the species and infraspecific taxa (subspecies, varieties or forms).</span>
+      
+        <span style="opacity:0.28030017008494884;">The name comes from the Greek word sambuce, an ancient wind instrument, about the removal of pith from the twigs to make whistles.Species recognized in this genus are:</span>
+      
+        <h2 style="opacity:0.3774205134479782;">Distribution and habitat</h2>
+      
+        <span style="opacity:0.5170795700859395;">The genus occurs in temperate to subtropical regions of the world.</span>
+      
+        <span style="opacity:0.7289657600921431;">More widespread in the Northern Hemisphere, its Southern Hemisphere occurrence is restricted to parts of Australasia and South America.</span>
+      
+        <span style="opacity:0.004931060704500269;">Many species are widely cultivated for their ornamental leaves, flowers, and fruit.</span>
+      
+        <h3 style="opacity:0.34774180081331607;">Habitat</h3>
+      
+        <span style="opacity:0.23667717197372362;">Elder commonly grows near farms and homesteads.</span>
+      
+        <span style="opacity:0.3192589413693254;">It is a nitrogen-dependent plant and thus is generally found near places of organic waste disposal.</span>
+      
+        <span style="opacity:0.20272560262481226;">Elders are often grown as a hedgerow plant in Britain since they take very fast, can be bent into shape easily, and grow quite profusely, thus having gained the reputation of being 'an instant hedge'.</span>
+      
+        <span style="opacity:0.632305487285403;">It is not generally affected by soil type or pH level and will virtually grow anywhere sufficient sunlight is available.</span>
+      
+        <h2 style="opacity:0.36011640223198155;">Ecology</h2>
+      
+        <span style="opacity:0.09934447961441183;">In Northern California, elderberries are a food for migrating band-tailed pigeons.</span>
+      
+        <span style="opacity:0.10833689405967695;">Elders are used as food plants by the larvae of some Lepidoptera species including brown-tail, buff ermine, dot moth, emperor moth, engrailed moth, swallow-tailed moth and the V-pug.</span>
+      
+        <span style="opacity:0.40509224339149436;">The crushed foliage and immature fruit have a strong fetid smell.</span>
+      
+        <span style="opacity:0.16493413805985815;">Valley elderberry longhorn beetles in California are very often found around red or blue elderberry bushes.</span>
+      
+        <span style="opacity:1;">Females lay their eggs on the bark.</span>
+      
+        <span style="opacity:0.38299065826644807;">The pith of elder has been used by watchmakers for cleaning tools before intricate work.</span>
+      
+        <h2 style="opacity:0.4959298303208725;">Cultivation</h2>
+      
+        <span style="opacity:0.0023273321750337233;">Traditional uses of Sambucus involved berries, seeds, leaves, and flowers or component extracts.</span>
+      
+        <span style="opacity:0.021538616933372428;">Ornamental varieties of Sambucus are grown in gardens for their showy flowers, fruits and lacy foliage which support habitat for wildlife.</span>
+      
+        <span style="opacity:0.37967191922582566;">Of the many native species, three are used as ornamentals, S.</span>
+      
+        <span style="opacity:0.5720411135910031;">nigra, S.</span>
+      
+        <span style="opacity:1;">canadensis and S.</span>
+      
+        <span style="opacity:1;">racemosa.</span>
+      
+        <h2 style="opacity:0.26037935627574993;">Uses</h2>
+      
+        <h3 style="opacity:0.17679277695747428;">Nutrition</h3>
+      
+        <span style="opacity:0.16562834706461427;">Raw elderberries are 80% water, 18% carbohydrates, and less than 1% each of protein and fat (table).</span>
+      
+        <span style="opacity:0.1368265507355418;">In a 100-gram (3+1⁄2 oz) amount, elderberries supply 305 kilojoules (73 kcal) of food energy and are a rich source of vitamin C, providing 43% of the Daily Value (DV).</span>
+      
+        <span style="opacity:0.2279234330722667;">Elderberries also have moderate contents of vitamin B6 (18% DV) and iron (12% DV), with no other nutrients in significant content.</span>
+      
+        <h3 style="opacity:0.2385522392706748;">Dietary supplement</h3>
+      
+        <span style="opacity:0.007324180292626893;">Elderberry fruit or flowers are used as dietary supplements to prevent or provide relief from minor diseases, such as flu, colds, constipation, and other conditions, served as a tea, extract or in a capsule.</span>
+      
+        <span style="opacity:0.07793442880325537;">The use of elderberry supplements increased early in the COVID-19 pandemic.</span>
+      
+        <span style="opacity:0.5690343662424139;">There is insufficient research to establish its effectiveness for such uses, or its safety profile.</span>
+      
+        <span style="opacity:0.3046053054729051;">The raw or unripe fruit of S.</span>
+      
+        <span style="opacity:0.23386291554217284;">nigra or its extracts may contain a cyanogenic glycoside that is potentially toxic.</span>
+      
+        <h3 style="opacity:0.30036772549577245;">Traditional medicine</h3>
+      
+        <span style="opacity:0.12635509010408633;">Although practitioners of traditional medicine have used elderberry over centuries, there is no high-quality clinical evidence that such practices provide any benefit.</span>
+      
+        <span style="opacity:0.04619027362780958;">The flowers of Sambucus nigra are used to produce elderflower cordial.</span>
+      
+        <span style="opacity:0.5605661316829006;">St-Germain, a French liqueur, is made from elderflowers.</span>
+      
+        <span style="opacity:0.6249597590948577;">Hallands Fläder, a Swedish akvavit, is flavoured with elderflowers.</span>
+      
+        <span style="opacity:0.058473741273256635;">Hollowed elderberry twigs have traditionally been used as spiles to tap maple trees for syrup.</span>
+      
+        <span style="opacity:0.36742927908821876;">Additionally, they have been hollowed out and used as flutes, blowguns, and syringes.The fruit of S.</span>
+      
+        <span style="opacity:0.7665194357270494;">callicarpa is eaten by birds and mammals.</span>
+      
+        <span style="opacity:0.04897270752302531;">It is inedible to humans when raw but can be made into wine.Elderberry twigs and fruit are employed in creating dyes for basketry.</span>
+      
+        <span style="opacity:0.10384311134074758;">These stems are dyed a very deep black by soaking them in a wash made from the berry stems of the elderberry.</span>
+      
+        <h2 style="opacity:0.1940215897275913;">In popular culture</h2>
+      
+        <span style="opacity:0.26902332053709666;">Folklore related to elder trees is extensive and can vary according to region.</span>
+      
+        <span style="opacity:0.042182724177350944;">In some traditions, the elder tree is thought to ward off evil and give protection from witches, while other beliefs say that witches often congregate under the plant, especially when it is full of fruit.</span>
+      
+        <span style="opacity:0.24981940690065454;">If an elder tree was cut down, a spirit known as the Elder Mother would be released and take her revenge.</span>
+      
+        <span style="opacity:0.18680917834736657;">The tree could only safely be cut while chanting a rhyme to the Elder Mother.Made from the branch of an elder tree, the Elder Wand plays a pivotal role in the final book of the Harry Potter series, which was nearly named Harry Potter and the Elder Wand before author J.</span>
+      
+        <span style="opacity:0.22868031954081833;">K. Rowling decided on Harry Potter and the Deathly Hallows.Elton John's 1973 album Don't Shoot Me I'm Only the Piano Player features a song titled "Elderberry Wine".</span>
+      
+        <span style="opacity:0.1876145888622608;">In Monty Python and the Holy Grail, John Cleese as the French Taunter tells the knights of Camelot, "Your mother was a hamster, and your father smelt of elderberries."</span>
+      
+        <h2 style="opacity:NaN;">Gallery</h2>
+      
+
+    </main>
+
+  </body>
+
+</html>
\ No newline at end of file