orgoflu commited on
Commit
908d904
·
verified ·
1 Parent(s): 3bd5481
Files changed (1) hide show
  1. app.py +28 -0
app.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import trafilatura
3
+ import requests
4
+
5
+ def extract(url):
6
+ headers = {"User-Agent": "Mozilla/5.0"}
7
+ try:
8
+ r = requests.get(url, headers=headers, timeout=10)
9
+ r.raise_for_status()
10
+ text = trafilatura.extract(r.text)
11
+ return text or "본문을 추출할 수 없습니다."
12
+ except requests.exceptions.Timeout:
13
+ return "요청이 시간 초과되었습니다."
14
+ except requests.exceptions.RequestException as e:
15
+ return f"요청 실패: {e}"
16
+ except Exception as e:
17
+ return f"에러 발생: {e}"
18
+
19
+ iface = gr.Interface(
20
+ fn=extract,
21
+ inputs=gr.Textbox(label="URL 입력", placeholder="https://example.com"),
22
+ outputs=gr.Textbox(label="추출된 본문", lines=20),
23
+ title="본문 추출기",
24
+ description="웹페이지 URL을 입력하면 본문만 추출합니다."
25
+ )
26
+
27
+ if __name__ == "__main__":
28
+ iface.launch()