orgoflu commited on
Commit
f03ed3b
ยท
verified ยท
1 Parent(s): ddaa72b
Files changed (1) hide show
  1. app.py +10 -3
app.py CHANGED
@@ -7,7 +7,14 @@ def extract(url):
7
  try:
8
  r = requests.get(url, headers=headers, timeout=10)
9
  r.raise_for_status()
10
- text = trafilatura.extract(r.text)
 
 
 
 
 
 
 
11
  return text or "๋ณธ๋ฌธ์„ ์ถ”์ถœํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
12
  except requests.exceptions.Timeout:
13
  return "์š”์ฒญ์ด ์‹œ๊ฐ„ ์ดˆ๊ณผ๋˜์—ˆ์Šต๋‹ˆ๋‹ค."
@@ -19,9 +26,9 @@ def extract(url):
19
  iface = gr.Interface(
20
  fn=extract,
21
  inputs=gr.Textbox(label="URL ์ž…๋ ฅ", placeholder="https://example.com"),
22
- outputs=gr.Textbox(label="์ถ”์ถœ๋œ ๋ณธ๋ฌธ", lines=20),
23
  title="๋ณธ๋ฌธ ์ถ”์ถœ๊ธฐ",
24
- description="์›นํŽ˜์ด์ง€ URL์„ ์ž…๋ ฅํ•˜๋ฉด ๋ณธ๋ฌธ๋งŒ ์ถ”์ถœํ•ฉ๋‹ˆ๋‹ค."
25
  )
26
 
27
  if __name__ == "__main__":
 
7
  try:
8
  r = requests.get(url, headers=headers, timeout=10)
9
  r.raise_for_status()
10
+ # full_text=True โ†’ ๊ฐ€๋Šฅํ•œ ๋ชจ๋“  ํ…์ŠคํŠธ ์ถ”์ถœ
11
+ text = trafilatura.extract(
12
+ r.text,
13
+ include_comments=False,
14
+ include_tables=True,
15
+ no_fallback=False,
16
+ favor_recall=True
17
+ )
18
  return text or "๋ณธ๋ฌธ์„ ์ถ”์ถœํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
19
  except requests.exceptions.Timeout:
20
  return "์š”์ฒญ์ด ์‹œ๊ฐ„ ์ดˆ๊ณผ๋˜์—ˆ์Šต๋‹ˆ๋‹ค."
 
26
  iface = gr.Interface(
27
  fn=extract,
28
  inputs=gr.Textbox(label="URL ์ž…๋ ฅ", placeholder="https://example.com"),
29
+ outputs=gr.Textbox(label="์ถ”์ถœ๋œ ๋ณธ๋ฌธ", lines=30),
30
  title="๋ณธ๋ฌธ ์ถ”์ถœ๊ธฐ",
31
+ description="์›นํŽ˜์ด์ง€ URL์„ ์ž…๋ ฅํ•˜๋ฉด ๊ฐ€๋Šฅํ•œ ๋งŽ์€ ๋ณธ๋ฌธ์„ ์ถ”์ถœํ•ฉ๋‹ˆ๋‹ค."
32
  )
33
 
34
  if __name__ == "__main__":