Spaces:
Build error
Build error
Commit
·
dffeab2
1
Parent(s):
09d0101
Added download button for JSON file download.
Browse files
app.py
CHANGED
|
@@ -3,6 +3,7 @@ from bs4 import BeautifulSoup
|
|
| 3 |
import re
|
| 4 |
from urllib.parse import urlparse
|
| 5 |
import gradio as gr
|
|
|
|
| 6 |
|
| 7 |
|
| 8 |
def extract_wikipedia_text(raw_text, language):
|
|
@@ -47,10 +48,13 @@ def scrape(url):
|
|
| 47 |
)
|
| 48 |
contents = extract_wikipedia_text(raw_text, language)
|
| 49 |
json_output = {"source": url, f"title-{language}": title, "pages": contents}
|
| 50 |
-
|
|
|
|
|
|
|
| 51 |
|
| 52 |
|
| 53 |
-
|
|
|
|
| 54 |
gr.Markdown(
|
| 55 |
f"""
|
| 56 |
<center>
|
|
@@ -60,8 +64,10 @@ with gr.Blocks() as demo:
|
|
| 60 |
)
|
| 61 |
with gr.Row():
|
| 62 |
inp = gr.Textbox(placeholder="Wikipedia URL")
|
| 63 |
-
|
|
|
|
|
|
|
| 64 |
btn = gr.Button("Scrape")
|
| 65 |
-
btn.click(fn=scrape, inputs=inp, outputs=out)
|
| 66 |
|
| 67 |
-
demo.launch()
|
|
|
|
| 3 |
import re
|
| 4 |
from urllib.parse import urlparse
|
| 5 |
import gradio as gr
|
| 6 |
+
import json
|
| 7 |
|
| 8 |
|
| 9 |
def extract_wikipedia_text(raw_text, language):
|
|
|
|
| 48 |
)
|
| 49 |
contents = extract_wikipedia_text(raw_text, language)
|
| 50 |
json_output = {"source": url, f"title-{language}": title, "pages": contents}
|
| 51 |
+
with open("data.json", "w") as f:
|
| 52 |
+
json.dump(json_output, f)
|
| 53 |
+
return json_output, "data.json"
|
| 54 |
|
| 55 |
|
| 56 |
+
style_sheet = "#json-output { max-height: 400px; overflow-y: auto; }"
|
| 57 |
+
with gr.Blocks(css=style_sheet) as demo:
|
| 58 |
gr.Markdown(
|
| 59 |
f"""
|
| 60 |
<center>
|
|
|
|
| 64 |
)
|
| 65 |
with gr.Row():
|
| 66 |
inp = gr.Textbox(placeholder="Wikipedia URL")
|
| 67 |
+
with gr.Column():
|
| 68 |
+
out = gr.JSON(elem_id="json-output")
|
| 69 |
+
out_download = gr.File()
|
| 70 |
btn = gr.Button("Scrape")
|
| 71 |
+
btn.click(fn=scrape, inputs=inp, outputs=[out, out_download])
|
| 72 |
|
| 73 |
+
demo.launch(debug=True)
|