Spaces:
Runtime error
Runtime error
File size: 2,044 Bytes
437ea75 4d06235 0135350 437ea75 4d06235 437ea75 4aa723b 4d06235 4aa723b 4d06235 4aa723b 437ea75 4aa723b 437ea75 4d06235 d30b29d 4d06235 d30b29d 4d06235 d30b29d 4d06235 437ea75 d30b29d 437ea75 d30b29d 437ea75 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 | from gensim.parsing.preprocessing import STOPWORDS
import wikipedia
import gradio as gr
from gradio.mix import Parallel
import requests
import nltk
from nltk.tokenize import word_tokenize
from nltk.tokenize import sent_tokenize
import re
nltk.download('punkt')
import numpy as np
import pandas as pd
from os import path
from PIL import Image
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator
import matplotlib.pyplot as plt
def opendomain(text,wikipedia_language="en"):
try:
question_words = STOPWORDS.union(
set(['likes', 'play', '.', ',', 'like', "don't", '?', 'use', 'choose', 'important', 'better', '?']))
lower_text = text.lower()
lower_text = word_tokenize(lower_text)
new_text = [i for i in lower_text if i not in question_words]
new_txt = "".join(new_text)
if wikipedia_language:
wikipedia.set_lang(wikipedia_language)
et_page = wikipedia.page(new_txt.replace(" ", ""))
title = et_page.title
content = et_page.content
page_url = et_page.url
linked_pages = et_page.links
text = content
wordcloud = WordCloud(font_path="HelveticaWorld-Regular.ttf").generate(text)
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis("off")
except:
return "Please write correct wikipedia article name OR question"
final_out = re.sub(r'\=.+\=', '', text)
result = list(filter(lambda x: x != '', final_out.split('\n\n')))
answer = []
for i in range(len(result)):
if len(result[i]) > 500:
print(len(result[i]))
summary_point=result[i].split(".")[0]
answer.append(summary_point)
rem_new_line = [] # list for without \n
for i in answer:
rem_new_line.append(i.replace("\n", ""))
gen_output = []
for i in range(len(rem_new_line)):
gen_output.append("* " + rem_new_line[i] + ".")
final_output = "\n\n".join(gen_output)
return final_output
iface = gr.Interface(fn=opendomain, inputs=[gr.inputs.Textbox(lines=5)], outputs="text")
iface.launch()
|