You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
|
|
|
import wikipedia
|
|
|
|
|
|
|
|
# wikipedia
|
|
|
|
# ------------------------------------------------------------------------
|
|
|
|
|
|
|
|
def is_header(line):
|
|
|
|
# return a couple of header_text and header_level
|
|
|
|
# or return nothing if line is not a header
|
|
|
|
|
|
|
|
line = line.strip()
|
|
|
|
|
|
|
|
if line == '':
|
|
|
|
return
|
|
|
|
|
|
|
|
i = 0
|
|
|
|
while line[i] == '=' and line[-1-i] == '=':
|
|
|
|
i += 1
|
|
|
|
|
|
|
|
if i == 0:
|
|
|
|
return
|
|
|
|
|
|
|
|
header_text = line[i:(-1-i)].strip()
|
|
|
|
header_level = i
|
|
|
|
return [header_text, header_level]
|
|
|
|
|
|
|
|
|
|
|
|
def get_wikipage(pagename, lang = "en"):
|
|
|
|
# get wikipedia page content by name of the page
|
|
|
|
|
|
|
|
wikipedia.set_lang(lang)
|
|
|
|
|
|
|
|
print(pagename)
|
|
|
|
|
|
|
|
try:
|
|
|
|
results = wikipedia.search(pagename, results=1, suggestion=False)
|
|
|
|
try:
|
|
|
|
pagename = results[0]
|
|
|
|
except IndexError:
|
|
|
|
# if there is no suggestion or search results, the page doesn't exist
|
|
|
|
raise wikipedia.PageError(pagename)
|
|
|
|
return wikipedia.WikipediaPage(pagename, redirect=True, preload=True)
|
|
|
|
except wikipedia.exceptions.DisambiguationError as e:
|
|
|
|
print(e.options)
|
|
|
|
page = ''
|
|
|
|
|
|
|
|
return page
|
|
|
|
|