|
|
|
@ -1,14 +1,36 @@
|
|
|
|
|
import wikipedia
|
|
|
|
|
|
|
|
|
|
wikipedia.set_lang("en")
|
|
|
|
|
|
|
|
|
|
# wikipedia
|
|
|
|
|
# ------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
def get_wikipage(pagename):
|
|
|
|
|
def is_header(line):
|
|
|
|
|
# return a couple of header_text and header_level
|
|
|
|
|
# or return nothing if line is not a header
|
|
|
|
|
|
|
|
|
|
line = line.strip()
|
|
|
|
|
|
|
|
|
|
if line == '':
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
i = 0
|
|
|
|
|
while line[i] == '=' and line[-1-i] == '=':
|
|
|
|
|
i += 1
|
|
|
|
|
|
|
|
|
|
if i == 0:
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
header_text = line[i:(-1-i)].strip()
|
|
|
|
|
header_level = i
|
|
|
|
|
return [header_text, header_level]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_wikipage(pagename, lang = "en"):
|
|
|
|
|
# get wikipedia page content by name of the page
|
|
|
|
|
|
|
|
|
|
wikipedia.set_lang(lang)
|
|
|
|
|
|
|
|
|
|
print(pagename)
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
results = wikipedia.search(pagename, results=1, suggestion=False)
|
|
|
|
|
try:
|
|
|
|
|