orgoflu commited on
Commit
dbf0822
ยท
verified ยท
1 Parent(s): 506f767

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -11
app.py CHANGED
@@ -2,14 +2,13 @@ import gradio as gr
2
  import trafilatura
3
  import requests
4
  from markdownify import markdownify as md
5
- from bs4 import BeautifulSoup
6
- from urllib.parse import urljoin
7
  from sumy.parsers.plaintext import PlaintextParser
8
  from sumy.nlp.tokenizers import Tokenizer
9
  from sumy.summarizers.text_rank import TextRankSummarizer
10
 
11
  def summarize_text(text, sentence_count=3):
12
- parser = PlaintextParser.from_string(text, Tokenizer("korean"))
 
13
  summarizer = TextRankSummarizer()
14
  summary_sentences = summarizer(parser.document, sentence_count)
15
  return "\n".join(str(sentence) for sentence in summary_sentences)
@@ -20,7 +19,6 @@ def extract_and_summarize(url):
20
  r = requests.get(url, headers=headers, timeout=10)
21
  r.raise_for_status()
22
 
23
- # HTML ํ˜•ํƒœ๋กœ ๋ณธ๋ฌธ ์ถ”์ถœ
24
  html_content = trafilatura.extract(
25
  r.text,
26
  output_format="html",
@@ -31,18 +29,11 @@ def extract_and_summarize(url):
31
  if not html_content:
32
  return "๋ณธ๋ฌธ์„ ์ถ”์ถœํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.", ""
33
 
34
- # HTML โ†’ Markdown ๋ณ€ํ™˜
35
  markdown_text = md(html_content, heading_style="ATX")
36
-
37
- # ์š”์•ฝ ์ƒ์„ฑ
38
  summary = summarize_text(markdown_text, sentence_count=3)
39
 
40
  return markdown_text, summary
41
 
42
- except requests.exceptions.Timeout:
43
- return "์š”์ฒญ์ด ์‹œ๊ฐ„ ์ดˆ๊ณผ๋˜์—ˆ์Šต๋‹ˆ๋‹ค.", ""
44
- except requests.exceptions.RequestException as e:
45
- return f"์š”์ฒญ ์‹คํŒจ: {e}", ""
46
  except Exception as e:
47
  return f"์—๋Ÿฌ ๋ฐœ์ƒ: {e}", ""
48
 
 
2
  import trafilatura
3
  import requests
4
  from markdownify import markdownify as md
 
 
5
  from sumy.parsers.plaintext import PlaintextParser
6
  from sumy.nlp.tokenizers import Tokenizer
7
  from sumy.summarizers.text_rank import TextRankSummarizer
8
 
9
  def summarize_text(text, sentence_count=3):
10
+ # ํ•œ๊ตญ์–ด๋ผ๋„ english ํ† ํฌ๋‚˜์ด์ €๋กœ ๋ฌธ์žฅ ๋‹จ์œ„ ๋ถ„๋ฆฌ
11
+ parser = PlaintextParser.from_string(text, Tokenizer("english"))
12
  summarizer = TextRankSummarizer()
13
  summary_sentences = summarizer(parser.document, sentence_count)
14
  return "\n".join(str(sentence) for sentence in summary_sentences)
 
19
  r = requests.get(url, headers=headers, timeout=10)
20
  r.raise_for_status()
21
 
 
22
  html_content = trafilatura.extract(
23
  r.text,
24
  output_format="html",
 
29
  if not html_content:
30
  return "๋ณธ๋ฌธ์„ ์ถ”์ถœํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.", ""
31
 
 
32
  markdown_text = md(html_content, heading_style="ATX")
 
 
33
  summary = summarize_text(markdown_text, sentence_count=3)
34
 
35
  return markdown_text, summary
36
 
 
 
 
 
37
  except Exception as e:
38
  return f"์—๋Ÿฌ ๋ฐœ์ƒ: {e}", ""
39