orgoflu commited on
Commit
b454ab3
ยท
verified ยท
1 Parent(s): dbf0822

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -1
app.py CHANGED
@@ -1,3 +1,8 @@
 
 
 
 
 
1
  import gradio as gr
2
  import trafilatura
3
  import requests
@@ -7,7 +12,7 @@ from sumy.nlp.tokenizers import Tokenizer
7
  from sumy.summarizers.text_rank import TextRankSummarizer
8
 
9
  def summarize_text(text, sentence_count=3):
10
- # ํ•œ๊ตญ์–ด๋ผ๋„ english ํ† ํฌ๋‚˜์ด์ €๋กœ ๋ฌธ์žฅ ๋‹จ์œ„ ๋ถ„๋ฆฌ
11
  parser = PlaintextParser.from_string(text, Tokenizer("english"))
12
  summarizer = TextRankSummarizer()
13
  summary_sentences = summarizer(parser.document, sentence_count)
@@ -19,6 +24,7 @@ def extract_and_summarize(url):
19
  r = requests.get(url, headers=headers, timeout=10)
20
  r.raise_for_status()
21
 
 
22
  html_content = trafilatura.extract(
23
  r.text,
24
  output_format="html",
@@ -29,11 +35,18 @@ def extract_and_summarize(url):
29
  if not html_content:
30
  return "๋ณธ๋ฌธ์„ ์ถ”์ถœํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.", ""
31
 
 
32
  markdown_text = md(html_content, heading_style="ATX")
 
 
33
  summary = summarize_text(markdown_text, sentence_count=3)
34
 
35
  return markdown_text, summary
36
 
 
 
 
 
37
  except Exception as e:
38
  return f"์—๋Ÿฌ ๋ฐœ์ƒ: {e}", ""
39
 
 
1
+ import nltk
2
+ # NLTK ํ† ํฌ๋‚˜์ด์ € ๋ฆฌ์†Œ์Šค ์ž๋™ ๋‹ค์šด๋กœ๋“œ
3
+ nltk.download("punkt")
4
+ nltk.download("punkt_tab")
5
+
6
  import gradio as gr
7
  import trafilatura
8
  import requests
 
12
  from sumy.summarizers.text_rank import TextRankSummarizer
13
 
14
  def summarize_text(text, sentence_count=3):
15
+ # ํ•œ๊ตญ์–ด๋„ ๋ฌธ์žฅ ๋‹จ์œ„๋กœ ๋Š๊ธฐ ์œ„ํ•ด english ํ† ํฌ๋‚˜์ด์ € ์‚ฌ์šฉ
16
  parser = PlaintextParser.from_string(text, Tokenizer("english"))
17
  summarizer = TextRankSummarizer()
18
  summary_sentences = summarizer(parser.document, sentence_count)
 
24
  r = requests.get(url, headers=headers, timeout=10)
25
  r.raise_for_status()
26
 
27
+ # HTML ํ˜•ํƒœ๋กœ ๋ณธ๋ฌธ ์ถ”์ถœ
28
  html_content = trafilatura.extract(
29
  r.text,
30
  output_format="html",
 
35
  if not html_content:
36
  return "๋ณธ๋ฌธ์„ ์ถ”์ถœํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.", ""
37
 
38
+ # HTML โ†’ Markdown ๋ณ€ํ™˜
39
  markdown_text = md(html_content, heading_style="ATX")
40
+
41
+ # ์š”์•ฝ ์ƒ์„ฑ
42
  summary = summarize_text(markdown_text, sentence_count=3)
43
 
44
  return markdown_text, summary
45
 
46
+ except requests.exceptions.Timeout:
47
+ return "์š”์ฒญ์ด ์‹œ๊ฐ„ ์ดˆ๊ณผ๋˜์—ˆ์Šต๋‹ˆ๋‹ค.", ""
48
+ except requests.exceptions.RequestException as e:
49
+ return f"์š”์ฒญ ์‹คํŒจ: {e}", ""
50
  except Exception as e:
51
  return f"์—๋Ÿฌ ๋ฐœ์ƒ: {e}", ""
52