Spaces:
Running
Running
| import gradio as gr | |
| import requests | |
| from bs4 import BeautifulSoup | |
| from markdownify import MarkdownConverter | |
| def md(soup, **options): | |
| return MarkdownConverter(**options).convert_soup(soup) | |
| def main_fn(url: str, check: list[int]): | |
| response = requests.get(url) | |
| soup = BeautifulSoup(response.text) | |
| for tag in ["script", "style"]: | |
| target = soup.find_all(tag) | |
| for t in target: | |
| t.clear | |
| body = soup.find("body") | |
| main = soup.find("main") | |
| if main: | |
| return md(main, strip=check) | |
| return md(body) | |
| demo = gr.Interface( | |
| main_fn, | |
| title="URL to Markdown", | |
| description="""<div style="width: fit-content; margin: 0 auto;">Gets HTML given by URL and converts it to Markdown.Does not support dynamically generated HTML such as React.</div> | |
| <div style="width: fit-content; margin: 0 auto;">URLで与えたHTMLを取得してMarkdownに変換します。Reactなどの動的に生成されるHTMLには対応していません</div> | |
| <div style="width: fit-content; margin: 0 auto;"><a href="https://huggingface.co/spaces/moritalous/url-to-markdown-v2">New Version is here.</a></div>""", | |
| inputs=[ | |
| gr.Text(label="URL", placeholder="https://*****"), | |
| gr.CheckboxGroup( | |
| label="Ignore tags(無視するタグ)", | |
| choices=["a", "img", "noscript"], | |
| value=["a", "img"], | |
| ), | |
| ], | |
| outputs=[gr.TextArea(label="Markdown", show_copy_button=True)], | |
| allow_flagging="never", | |
| ) | |
| demo.launch() | |