| | import gradio as gr |
| | import wikipedia |
| | import numpy as np |
| | import pandas as pd |
| | from os import path |
| | from PIL import Image |
| | from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator |
| | import matplotlib.pyplot as plt |
| |
|
| | def wikipediaScrap(article_name, wikipedia_language = "en - English"): |
| | wikipedia_language = wikipedia_language.split(" - ")[0] |
| | |
| | if wikipedia_language: |
| | wikipedia.set_lang(wikipedia_language) |
| |
|
| | |
| | et_page = wikipedia.page(article_name) |
| | title = et_page.title |
| | content = et_page.content |
| | page_url = et_page.url |
| | linked_pages = et_page.links |
| | |
| | text = content |
| |
|
| | |
| | wordcloud = WordCloud(font_path="HelveticaWorld-Regular.ttf").generate(text) |
| |
|
| | |
| | plt.imshow(wordcloud, interpolation='bilinear') |
| | plt.axis("off") |
| | |
| | return title, content, page_url, "\n". join(linked_pages), plt |
| |
|
| | css = """ |
| | footer {display:none !important} |
| | .output-markdown{display:none !important} |
| | footer {visibility: hidden} |
| | #dsd_button {background: purple, color: white} |
| | |
| | textarea[data-testid="textbox"] { height: 178px !important} |
| | |
| | #mytext {height: 43px !important;} |
| | |
| | .max-h-[30rem] {max-height: 18rem !important;} |
| | |
| | .hover\:bg-orange-50:hover { |
| | --tw-bg-opacity: 1 !important; |
| | background-color: rgb(229,225,255) !important; |
| | } |
| | """ |
| |
|
| | ini_dict = wikipedia.languages() |
| | |
| | |
| | keys = [] |
| | values = [] |
| | language=[] |
| |
|
| | items = ini_dict.items() |
| | for item in items: |
| | keys.append(item[0]), values.append(item[1]) |
| | language.append(item[0]+" - "+item[1]) |
| |
|
| | with gr.Blocks(title="Wikipedia Article Scrape | Data Science Dojo", css = css) as demo: |
| | with gr.Row(): |
| | inp = gr.Textbox(placeholder="Enter the name of wikipedia article", label="Wikipedia article name") |
| | lan = gr.Dropdown(label=" Select Language", choices=language, value=language[105], interactive=True) |
| | |
| | btn = gr.Button("Start Scraping", elem_id="dsd_button") |
| | with gr.Row(): |
| | with gr.Column(): |
| | gr.Markdown("""## About""") |
| | title = gr.Textbox(label="Article title") |
| | url = gr.Textbox(label="Article URL") |
| | with gr.Column(): |
| | gr.Markdown("""## Wordcloud""") |
| | wordcloud = gr.Plot() |
| | gr.Markdown("""### Content""") |
| | with gr.Row(): |
| | content = gr.Textbox(label="Content") |
| | gr.Markdown("""### Linked Articles""") |
| | with gr.Row(): |
| | linked = gr.Textbox(label="Linked Articles") |
| | with gr.Row(): |
| | gr.Examples( |
| | examples = [["Eiffel Tower", "en - English"], ["Eiffel tower", 'ur - اردو']], fn=wikipediaScrap, inputs=[inp, lan], outputs=[title, content, url, linked, wordcloud], cache_examples=True) |
| | btn.click(fn=wikipediaScrap, inputs=[inp, lan], outputs=[title, content, url, linked, wordcloud]) |
| | |
| | demo.launch() |