jaimin commited on
Commit
437ea75
·
1 Parent(s): 0135350

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -13
app.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import wikipedia
2
  import gradio as gr
3
  from gradio.mix import Parallel
@@ -7,27 +8,38 @@ from nltk.tokenize import word_tokenize
7
  from nltk.tokenize import sent_tokenize
8
  import re
9
  nltk.download('punkt')
 
 
 
 
 
 
10
 
11
 
12
- def opendomain(text):
13
- question_words = ["what", "why", "when", "where","name", "is", "how", "do", "does",
14
- "which", "are", "could", "would","should", "has", "have", "whom",
15
- "whose", "don't", "a", "an","?",".","the","i","you","he","she","it",
16
- "that","this",",","am",","]
17
  lower_text = text.lower()
18
  lower_text = word_tokenize(lower_text)
19
  new_text = [i for i in lower_text if i not in question_words]
20
  new_txt = "".join(new_text)
 
 
21
 
22
- r = requests.post(
23
- url="https://jaimin-new-content.hf.space/run/predict",
24
- json={"data": [new_txt, "en"]},
25
- )
26
- response = r.json()
27
- text1 = response["data"]
28
- final_out = text1[0]
29
- final_out=re.sub(r'\=.+\=', '', final_out)
30
 
 
 
 
 
 
 
 
 
 
31
  result = list(filter(lambda x: x != '', final_out.split('\n\n')))
32
 
33
  answer = []
@@ -49,3 +61,8 @@ def opendomain(text):
49
  iface = gr.Interface(fn=opendomain, inputs=[gr.inputs.Textbox(lines=5)], outputs="text")
50
  iface.launch()
51
 
 
 
 
 
 
 
1
+ from gensim.parsing.preprocessing import STOPWORDS
2
  import wikipedia
3
  import gradio as gr
4
  from gradio.mix import Parallel
 
8
  from nltk.tokenize import sent_tokenize
9
  import re
10
  nltk.download('punkt')
11
+ import numpy as np
12
+ import pandas as pd
13
+ from os import path
14
+ from PIL import Image
15
+ from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator
16
+ import matplotlib.pyplot as plt
17
 
18
 
19
+ def opendomain(text,wikipedia_language="en"):
20
+ question_words = STOPWORDS.union(set(['likes','play','.',',','like',"don't",'?','use','choose','important','better','?']))
 
 
 
21
  lower_text = text.lower()
22
  lower_text = word_tokenize(lower_text)
23
  new_text = [i for i in lower_text if i not in question_words]
24
  new_txt = "".join(new_text)
25
+ if wikipedia_language:
26
+ wikipedia.set_lang(wikipedia_language)
27
 
28
+ et_page = wikipedia.page(new_txt.replace(" ", ""))
29
+ title = et_page.title
30
+ content = et_page.content
31
+ page_url = et_page.url
32
+ linked_pages = et_page.links
 
 
 
33
 
34
+ text = content
35
+ print(type(text))
36
+ wordcloud = WordCloud(font_path="HelveticaWorld-Regular.ttf").generate(text)
37
+
38
+ plt.imshow(wordcloud, interpolation='bilinear')
39
+ plt.axis("off")
40
+
41
+
42
+ final_out = re.sub(r'\=.+\=', '', text)
43
  result = list(filter(lambda x: x != '', final_out.split('\n\n')))
44
 
45
  answer = []
 
61
  iface = gr.Interface(fn=opendomain, inputs=[gr.inputs.Textbox(lines=5)], outputs="text")
62
  iface.launch()
63
 
64
+
65
+
66
+
67
+
68
+