Kims12 commited on
Commit
a241c57
ยท
verified ยท
1 Parent(s): 4670b8c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -20
app.py CHANGED
@@ -23,13 +23,13 @@ def scrape_naver_blog(url):
23
  soup = BeautifulSoup(response.content, 'html.parser')
24
 
25
  # ์ œ๋ชฉ ์ถ”์ถœ
26
- # XPath: /html/body/div[7]/div[1]/div[2]/div[2]/div[2]/div[1]/div/div/div[10]/div[1]/div/table[2]/tbody/tr/td[2]/div[1]/div/div[1]/div/div/div[2]/div/p/span
27
- # BeautifulSoup์—์„œ๋Š” XPath ๋Œ€์‹  CSS ์„ ํƒ์ž๋ฅผ ์‚ฌ์šฉํ•ฉ๋‹ˆ๋‹ค.
28
- # ํ•ด๋‹น XPath๋ฅผ ๋ถ„์„ํ•˜์—ฌ CSS ์„ ํƒ์ž๋กœ ๋ณ€ํ™˜ํ•˜๊ฑฐ๋‚˜, ๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ์˜ ํด๋ž˜์Šค๋ช…์„ ์ด์šฉํ•˜์—ฌ ์ถ”์ถœํ•ฉ๋‹ˆ๋‹ค.
29
- # ๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ์˜ HTML ๊ตฌ์กฐ๋Š” ๋™์ ์œผ๋กœ ๋ณ€ํ•  ์ˆ˜ ์žˆ์œผ๋ฏ€๋กœ, ํด๋ž˜์Šค๋ช…์„ ๊ธฐ๋ฐ˜์œผ๋กœ ์•ˆ์ •์ ์œผ๋กœ ์ถ”์ถœํ•˜๋„๋ก ํ•ฉ๋‹ˆ๋‹ค.
30
-
31
- # ์˜ˆ์‹œ๋กœ ์ œ๋ชฉ์ด ํฌํ•จ๋œ ํƒœ๊ทธ์˜ ํด๋ž˜์Šค๋ช…์„ ์‚ฌ์šฉ (์‹ค์ œ ํด๋ž˜์Šค๋ช…์€ ๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ ๊ตฌ์กฐ์— ๋”ฐ๋ผ ๋‹ค๋ฅผ ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค)
32
- title_element = soup.find('span', class_='se-fs- se-ff-') # ์‹ค์ œ ํด๋ž˜์Šค๋ช…์œผ๋กœ ์ˆ˜์ • ํ•„์š”
33
  if not title_element:
34
  print("์ œ๋ชฉ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.")
35
  title = "์ œ๋ชฉ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
@@ -38,10 +38,14 @@ def scrape_naver_blog(url):
38
  print(f"์ถ”์ถœ๋œ ์ œ๋ชฉ: {title}")
39
 
40
  # ๋‚ด์šฉ ํ…์ŠคํŠธ ์ถ”์ถœ
41
- # XPath: /html/body/div[7]/div[1]/div[2]/div[2]/div[2]/div[1]/div/div/div[10]/div[1]/div/table[2]/tbody/tr/td[2]/div[1]/div/div[2]/div[2]/div/div/div/p[3]/span[2]
42
- # ๋งˆ์ฐฌ๊ฐ€์ง€๋กœ CSS ์„ ํƒ์ž๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ ๋‚ด์šฉ ์ถ”์ถœ
43
- content_elements = soup.find_all('span', class_='se-fs- se-ff-') # ์‹ค์ œ ํด๋ž˜์Šค๋ช…์œผ๋กœ ์ˆ˜์ • ํ•„์š”
44
 
 
 
 
 
45
  if not content_elements:
46
  print("๋‚ด์šฉ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.")
47
  content = "๋‚ด์šฉ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
@@ -60,16 +64,18 @@ def scrape_naver_blog(url):
60
  return f"An error occurred: {e}"
61
 
62
  # Gradio ์ธํ„ฐํŽ˜์ด์Šค ์„ค์ •
63
- iface = gr.Interface(
64
- fn=scrape_naver_blog,
65
- inputs=gr.inputs.Textbox(lines=2, placeholder="๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ URL์„ ์ž…๋ ฅํ•˜์„ธ์š”"),
66
- outputs="text",
67
- title="๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ ์Šคํฌ๋ž˜ํผ",
68
- description="๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ URL์„ ์ž…๋ ฅํ•˜๋ฉด ์ œ๋ชฉ๊ณผ ๋‚ด์šฉ์„ ์ถ”์ถœํ•ฉ๋‹ˆ๋‹ค.",
69
- examples=[
70
- ["https://blog.naver.com/lafleur7/223723723486"]
71
- ]
72
- )
 
 
73
 
74
  # ์• ํ”Œ๋ฆฌ์ผ€์ด์…˜ ์‹คํ–‰
75
  iface.launch()
 
23
  soup = BeautifulSoup(response.content, 'html.parser')
24
 
25
  # ์ œ๋ชฉ ์ถ”์ถœ
26
+ # ๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ์˜ ์ œ๋ชฉ์€ ์ผ๋ฐ˜์ ์œผ๋กœ <h3> ํƒœ๊ทธ ๋˜๋Š” ํŠน์ • ํด๋ž˜์Šค๋ช…์„ ๊ฐ€์ง‘๋‹ˆ๋‹ค.
27
+ # ์‹ค์ œ HTML ๊ตฌ์กฐ์— ๋งž๊ฒŒ ์ˆ˜์ •ํ•ด์•ผ ํ•ฉ๋‹ˆ๋‹ค.
28
+ title_element = soup.find('h3', {'class': 'se_textarea'}) # ์˜ˆ์‹œ ํด๋ž˜์Šค๋ช…
29
+ if not title_element:
30
+ # ๋‹ค๋ฅธ ๊ฐ€๋Šฅํ•œ ์œ„์น˜ ์‹œ๋„
31
+ title_element = soup.find('span', {'class': 'se-fs- se-ff-'})
32
+
33
  if not title_element:
34
  print("์ œ๋ชฉ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.")
35
  title = "์ œ๋ชฉ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
 
38
  print(f"์ถ”์ถœ๋œ ์ œ๋ชฉ: {title}")
39
 
40
  # ๋‚ด์šฉ ํ…์ŠคํŠธ ์ถ”์ถœ
41
+ # ๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ์˜ ๋‚ด์šฉ์€ ์ผ๋ฐ˜์ ์œผ๋กœ <div> ํƒœ๊ทธ ๋‚ด์— ํŠน์ • ํด๋ž˜์Šค๋ช…์„ ๊ฐ€์ง‘๋‹ˆ๋‹ค.
42
+ # ์‹ค์ œ HTML ๊ตฌ์กฐ์— ๋งž๊ฒŒ ์ˆ˜์ •ํ•ด์•ผ ํ•ฉ๋‹ˆ๋‹ค.
43
+ content_elements = soup.find_all('span', {'class': 'se-fs- se-ff-'}) # ์˜ˆ์‹œ ํด๋ž˜์Šค๋ช…
44
 
45
+ if not content_elements:
46
+ # ๋‹ค๋ฅธ ๊ฐ€๋Šฅํ•œ ์œ„์น˜ ์‹œ๋„
47
+ content_elements = soup.find_all('div', {'class': 'se-component se-text se-l-default'})
48
+
49
  if not content_elements:
50
  print("๋‚ด์šฉ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.")
51
  content = "๋‚ด์šฉ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
 
64
  return f"An error occurred: {e}"
65
 
66
  # Gradio ์ธํ„ฐํŽ˜์ด์Šค ์„ค์ •
67
+ with gr.Blocks() as iface:
68
+ gr.Markdown("# ๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ ์Šคํฌ๋ž˜ํผ")
69
+ gr.Markdown("๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ URL์„ ์ž…๋ ฅํ•˜๋ฉด ์ œ๋ชฉ๊ณผ ๋‚ด์šฉ์„ ์ถ”์ถœํ•ฉ๋‹ˆ๋‹ค.")
70
+
71
+ with gr.Row():
72
+ url_input = gr.Textbox(label="๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ URL", placeholder="https://blog.naver.com/lafleur7/223723723486")
73
+
74
+ scrape_button = gr.Button("์Šคํฌ๋ž˜ํ•‘")
75
+
76
+ output_text = gr.Textbox(label="๊ฒฐ๊ณผ", lines=20)
77
+
78
+ scrape_button.click(fn=scrape_naver_blog, inputs=url_input, outputs=output_text)
79
 
80
  # ์• ํ”Œ๋ฆฌ์ผ€์ด์…˜ ์‹คํ–‰
81
  iface.launch()