File size: 2,044 Bytes
437ea75
4d06235
 
 
 
 
 
 
 
0135350
437ea75
 
 
 
 
 
4d06235
 
437ea75
4aa723b
 
 
 
 
 
 
 
 
4d06235
4aa723b
 
 
 
 
4d06235
4aa723b
 
437ea75
4aa723b
 
 
 
437ea75
 
 
4d06235
 
d30b29d
4d06235
d30b29d
4d06235
 
 
d30b29d
 
 
 
 
 
 
4d06235
437ea75
d30b29d
437ea75
 
 
d30b29d
 
437ea75
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
from gensim.parsing.preprocessing import STOPWORDS
import wikipedia
import gradio as gr
from gradio.mix import Parallel
import requests
import nltk
from nltk.tokenize import word_tokenize
from nltk.tokenize import sent_tokenize
import re
nltk.download('punkt')
import numpy as np
import pandas as pd
from os import path
from PIL import Image
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator
import matplotlib.pyplot as plt


def opendomain(text,wikipedia_language="en"):
  try:
    question_words = STOPWORDS.union(
      set(['likes', 'play', '.', ',', 'like', "don't", '?', 'use', 'choose', 'important', 'better', '?']))
    lower_text = text.lower()
    lower_text = word_tokenize(lower_text)
    new_text = [i for i in lower_text if i not in question_words]
    new_txt = "".join(new_text)
    if wikipedia_language:
      wikipedia.set_lang(wikipedia_language)

    et_page = wikipedia.page(new_txt.replace(" ", ""))
    title = et_page.title
    content = et_page.content
    page_url = et_page.url
    linked_pages = et_page.links

    text = content
    wordcloud = WordCloud(font_path="HelveticaWorld-Regular.ttf").generate(text)

    plt.imshow(wordcloud, interpolation='bilinear')
    plt.axis("off")
  except:
    return "Please write correct wikipedia article name OR question"


  final_out = re.sub(r'\=.+\=', '', text)
  result = list(filter(lambda x: x != '', final_out.split('\n\n')))
  answer = []
  for i in range(len(result)):
      if len(result[i]) > 500:
          print(len(result[i]))
          summary_point=result[i].split(".")[0]
          answer.append(summary_point)

  rem_new_line = []                                             # list for without \n
  for i in answer:
    rem_new_line.append(i.replace("\n", ""))
  gen_output = []
  for i in range(len(rem_new_line)):
    gen_output.append("* " + rem_new_line[i] + ".")
  final_output = "\n\n".join(gen_output)


  return final_output



iface = gr.Interface(fn=opendomain, inputs=[gr.inputs.Textbox(lines=5)], outputs="text")
iface.launch()