| from gensim.parsing.preprocessing import STOPWORDS |
| import wikipedia |
| import gradio as gr |
| from gradio.mix import Parallel |
| import requests |
| import nltk |
| from nltk.tokenize import word_tokenize |
| from nltk.tokenize import sent_tokenize |
| import re |
| nltk.download('punkt') |
| import numpy as np |
| import pandas as pd |
| from os import path |
| from PIL import Image |
| from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator |
| import matplotlib.pyplot as plt |
|
|
|
|
| def opendomain(text,wikipedia_language="en"): |
| try: |
| question_words = STOPWORDS.union(set(['likes','play','.',',','like',"don't",'?','use','choose','important','better','?'])) |
| lower_text = text.lower() |
| lower_text = word_tokenize(lower_text) |
| new_text = [i for i in lower_text if i not in question_words] |
| new_txt = "".join(new_text) |
| if wikipedia_language: |
| wikipedia.set_lang(wikipedia_language) |
|
|
| et_page = wikipedia.page(new_txt.replace(" ", "")) |
| title = et_page.title |
| content = et_page.content |
| page_url = et_page.url |
| linked_pages = et_page.links |
|
|
| text = content |
| wordcloud = WordCloud(font_path="HelveticaWorld-Regular.ttf").generate(text) |
|
|
| plt.imshow(wordcloud, interpolation='bilinear') |
| plt.axis("off") |
| except: |
| return "Please write correct wikipedia article name OR question" |
|
|
| final_out = re.sub(r'\=.+\=', '', text) |
| result = list(filter(lambda x: x != '', final_out.split('\n\n'))) |
| answer = [] |
| for i in range(6): |
| if len(result[i]) > 500: |
| summary_point=result[i].split(".")[0:3] |
| answer.append(summary_point) |
|
|
| final = "" |
| for value in answer: |
| joint_value = ".".join(value) |
| if final == "": |
| final += joint_value |
| else: |
| final = f"{final}.\n\n{joint_value}" |
|
|
| return final |
|
|
|
|
|
|
| iface = gr.Interface(fn=opendomain, inputs=[gr.inputs.Textbox(lines=5)], outputs="text") |
| iface.launch() |
|
|
|
|