orgoflu commited on
Commit
1a41c9a
Β·
verified Β·
1 Parent(s): a1acc56

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -21
app.py CHANGED
@@ -1,31 +1,60 @@
1
- from flask import Flask, request, jsonify
 
2
  import requests
 
 
 
 
3
 
4
- app = Flask(__name__)
5
-
6
- @app.route("/proxy")
7
- def proxy():
8
- target_url = request.args.get("url")
9
- if not target_url:
10
- return jsonify({"error": "url νŒŒλΌλ―Έν„°κ°€ ν•„μš”ν•©λ‹ˆλ‹€"}), 400
 
 
11
 
 
 
 
 
12
  try:
13
- res = requests.get(target_url, timeout=10, headers={
14
- "User-Agent": "Mozilla/5.0 (HTMLGrabber/1.0)"
15
- })
16
- res.raise_for_status()
17
- html = res.text
 
 
 
 
 
 
 
18
 
19
- # HTML을 Markdown μ½”λ“œλΈ”λ‘ ν˜•νƒœλ‘œ λ³€ν™˜
20
- code_block = f"```html\n{html}\n```"
21
 
22
- # CORS ν—ˆμš©
23
- response = jsonify({"code_block": code_block})
24
- response.headers["Access-Control-Allow-Origin"] = "*"
25
- return response
26
 
27
  except Exception as e:
28
- return jsonify({"error": str(e)}), 500
 
 
 
 
 
 
 
 
 
 
 
29
 
30
  if __name__ == "__main__":
31
- app.run(host="0.0.0.0", port=7860)
 
1
+ import gradio as gr
2
+ import trafilatura
3
  import requests
4
+ from markdownify import markdownify as md
5
+ from sumy.parsers.plaintext import PlaintextParser
6
+ from sumy.nlp.tokenizers import Tokenizer
7
+ from sumy.summarizers.text_rank import TextRankSummarizer
8
 
9
+ # === μžλ™μš”μ•½ ν•¨μˆ˜ ===
10
+ def auto_summarize(text, sentence_count=3):
11
+ try:
12
+ parser = PlaintextParser.from_string(text, Tokenizer("english"))
13
+ summarizer = TextRankSummarizer()
14
+ sents = [str(s) for s in summarizer(parser.document, sentence_count)]
15
+ return " ".join(sents).strip()
16
+ except Exception:
17
+ return text[:500]
18
 
19
+ # === URL νŒŒμ‹± + λͺ¨λ“œλ³„ 좜λ ₯ ===
20
+ def parse_and_display(url, mode):
21
+ if not url.strip():
22
+ return "❌ URL을 μž…λ ₯ν•˜μ„Έμš”."
23
  try:
24
+ headers = {"User-Agent": "Mozilla/5.0"}
25
+ r = requests.get(url, headers=headers, timeout=10)
26
+ r.raise_for_status()
27
+
28
+ html_content = trafilatura.extract(
29
+ r.text,
30
+ output_format="html",
31
+ include_tables=True,
32
+ favor_recall=True
33
+ )
34
+ if not html_content:
35
+ return "본문을 μΆ”μΆœν•  수 μ—†μŠ΅λ‹ˆλ‹€."
36
 
37
+ markdown_text = md(html_content, heading_style="ATX")
38
+ summary_text = auto_summarize(markdown_text, sentence_count=3)
39
 
40
+ if mode == "μžλ™μš”μ•½":
41
+ return f"πŸ“ **μžλ™μš”μ•½**\n\n{summary_text}\n\nπŸ”— [원문 보기]({url})"
42
+ else:
43
+ return f"πŸ“„ **원문**\n\n{markdown_text}\n\nπŸ”— [원문 보기]({url})"
44
 
45
  except Exception as e:
46
+ return f"μ—λŸ¬: {str(e)}"
47
+
48
+ # === Gradio UI ===
49
+ with gr.Blocks() as demo:
50
+ gr.Markdown("## πŸ”— 링크 β†’ μžλ™μš”μ•½ / 원문 보기")
51
+ with gr.Row():
52
+ url_input = gr.Textbox(label="URL μž…λ ₯", placeholder="https://example.com", scale=3)
53
+ mode_select = gr.Radio(["μžλ™μš”μ•½", "원문"], value="μžλ™μš”μ•½", label="κΈ°λ³Έ λͺ¨λ“œ", scale=1)
54
+ output = gr.Markdown()
55
+ run_btn = gr.Button("μΆ”κ°€")
56
+
57
+ run_btn.click(parse_and_display, inputs=[url_input, mode_select], outputs=output)
58
 
59
  if __name__ == "__main__":
60
+ demo.launch()