ZORYE commited on
Commit
55bc956
ยท
verified ยท
1 Parent(s): 8ecdd7c

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +154 -0
app.py ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ from bs4 import BeautifulSoup
3
+ import re
4
+ import os
5
+ from rembg import remove
6
+ from PIL import Image, ImageDraw, ImageFont
7
+ import io
8
+ import gradio as gr
9
+
10
+ # โ–ผ ๊ธฐ๋ณธ ๋‹ค์šด๋กœ๋“œ ํด๋” (Spaces ํ˜ธํ™˜์šฉ ์ƒ๋Œ€ ๊ฒฝ๋กœ)
11
+ DEFAULT_DOWNLOAD_DIR = "./downloads"
12
+
13
+ # ํ…์ŠคํŠธ ์ „์ฒ˜๋ฆฌ ํ•จ์ˆ˜๋“ค
14
+ def clean_text(text):
15
+ cleaned = re.sub(r'[^\w\s๊ฐ€-ํžฃ]', '', text)
16
+ return cleaned
17
+
18
+ def select_language(text):
19
+ hangul = re.findall(r'[๊ฐ€-ํžฃ]+', text)
20
+ if hangul:
21
+ return ' '.join(hangul)
22
+ else:
23
+ english = re.findall(r'[A-Za-z]+', text)
24
+ return ' '.join(english)
25
+
26
+ # ์›Œํ„ฐ๋งˆํฌ ์‚ฝ์ž… ํ•จ์ˆ˜
27
+ def add_watermark(image, text="gooditem gooditem gooditem gooditem gooditem"):
28
+ watermark = Image.new('RGBA', image.size, (0,0,0,0))
29
+ draw = ImageDraw.Draw(watermark)
30
+
31
+ font_size = int(image.size[0] * 0.04)
32
+ try:
33
+ font = ImageFont.truetype("ariali.ttf", font_size) # ํฐํŠธ ํŒŒ์ผ ์—†์„ ๊ฒฝ์šฐ ๊ธฐ๋ณธ ํฐํŠธ ์‚ฌ์šฉ
34
+ except:
35
+ font = ImageFont.load_default()
36
+
37
+ bbox = draw.textbbox((0, 0), text, font=font)
38
+ text_width = bbox[2] - bbox[0]
39
+ text_height = bbox[3] - bbox[1]
40
+
41
+ x = (image.size[0] - text_width) // 2
42
+ y = (image.size[1] - text_height) // 2
43
+
44
+ draw.text((x, y), text, font=font, fill=(255, 255, 255, 31))
45
+ combined = Image.alpha_composite(image.convert('RGBA'), watermark)
46
+ return combined
47
+
48
+ # ํ”„๋กœ๊ทธ๋žจ/์•„์ดํ…œ ์ •๋ณด ๋ธ”๋ก ์ถ”์ถœ
49
+ def extract_info_block(article_text):
50
+ # ํ”„๋กœ๊ทธ๋žจ๋ช…~๋ชจ๋ธ๋ช…๊นŒ์ง€ ๋ฌถ์ธ ๋ธ”๋ก ์ถ”์ถœ
51
+ pattern = r'(KBS2.*?<๋ชจ๋ธ๋ช…>.*?)(?:\n\n|$)'
52
+ match = re.search(pattern, article_text, re.DOTALL)
53
+ if match:
54
+ return match.group(1).strip()
55
+ else:
56
+ return None
57
+
58
+ # ๋ธŒ๋žœ๋“œ/๋ชจ๋ธ๋ช…์œผ๋กœ ๊ฒ€์ƒ‰ ์ฟผ๋ฆฌ ์ƒ์„ฑ
59
+ def extract_product_info(article_text):
60
+ brand_match = re.search(r'<๋ธŒ๋žœ๋“œ>[ \t]*([^\n]+)', article_text)
61
+ model_match = re.search(r'<๋ชจ๋ธ๋ช…>[ \t]*([^\n]+)', article_text)
62
+
63
+ brand = brand_match.group(1).strip() if brand_match else None
64
+ model = model_match.group(1).strip() if model_match else None
65
+
66
+ if brand and model:
67
+ brand_words = re.findall(r'[A-Za-z]+', brand)
68
+ selected_brand = ' '.join(brand_words[:2]) # ๋ธŒ๋žœ๋“œ ์•ž ๋‘ ๋‹จ์–ด
69
+ search_query = f"{selected_brand} {model}"
70
+ search_url = f"https://www.coupang.com/np/search?component=&q={search_query.replace(' ', '+')}"
71
+ return model, search_url
72
+ else:
73
+ return None, None
74
+
75
+ # ๋ฉ”์ธ ์ฒ˜๋ฆฌ ํ•จ์ˆ˜
76
+ def process_url(tistory_url, output_dir):
77
+ result = []
78
+ try:
79
+ response = requests.get(tistory_url)
80
+ soup = BeautifulSoup(response.text, 'html.parser')
81
+
82
+ img_tags = soup.find_all('img')
83
+ img_urls = [img['src'] for img in img_tags if 'src' in img.attrs]
84
+
85
+ # ์ด๋ฏธ์ง€ ์ฒ˜๋ฆฌ
86
+ for idx, img_url in enumerate(img_urls):
87
+ base_name = os.path.basename(img_url).split('?')[0]
88
+ if base_name.startswith('new_ico'):
89
+ continue # new_ico๋กœ ์‹œ์ž‘ํ•˜๋Š” ์•„์ด์ฝ˜์€ ๋ฌด์‹œ
90
+
91
+ img_data = requests.get(img_url).content
92
+ input_img = Image.open(io.BytesIO(img_data)).convert('RGBA')
93
+
94
+ input_bytes = io.BytesIO()
95
+ input_img.save(input_bytes, format='PNG')
96
+ removed = remove(input_bytes.getvalue())
97
+ img_no_bg = Image.open(io.BytesIO(removed)).convert('RGBA')
98
+
99
+ final_img = add_watermark(img_no_bg)
100
+
101
+ os.makedirs(output_dir, exist_ok=True)
102
+ name, ext = os.path.splitext(base_name)
103
+ save_path = os.path.join(output_dir, f"{name}_{idx+1}_processed.png")
104
+ final_img.save(save_path)
105
+ result.append(f"โœ”๏ธ ์ €์žฅ ์™„๋ฃŒ: {save_path}")
106
+
107
+ # ํ…์ŠคํŠธ ์ถ”์ถœ
108
+ article_text = soup.get_text()
109
+
110
+ # ํ”„๋กœ๊ทธ๋žจ ์ •๋ณด ๋ธ”๋ก ์ถ”์ถœ
111
+ info_block = extract_info_block(article_text)
112
+
113
+ # ์ œํ’ˆ ์ •๋ณด ์ถ”์ถœ
114
+ item_name, search_url = extract_product_info(article_text)
115
+
116
+ # ์ •๋ฆฌ๋œ ์ถœ๋ ฅ ํฌ๋งท
117
+ if info_block:
118
+ result.append("\n===== ํ”„๋กœ๊ทธ๋žจ ์ •๋ณด ๋ธ”๋ก =====")
119
+ result.append(info_block)
120
+ result.append("================================\n")
121
+
122
+ if item_name and search_url:
123
+ result.append(f"๐Ÿ›๏ธ ์•„์ดํ…œ : {item_name}")
124
+ result.append(f"๐Ÿ”— ์ฟ ํŒก ๋งํฌ: {search_url}")
125
+
126
+ except Exception as e:
127
+ result.append(f"โŒ URL ์ฒ˜๋ฆฌ ์‹คํŒจ: {tistory_url} / ์—๋Ÿฌ: {e}")
128
+
129
+ return result
130
+
131
+ # ์—ฌ๋Ÿฌ URL ์ฒ˜๋ฆฌ
132
+ def process_multiple_urls(urls_text, output_dir):
133
+ urls = [url.strip() for url in urls_text.strip().splitlines() if url.strip()]
134
+ all_results = []
135
+ for url in urls:
136
+ all_results.extend(process_url(url, output_dir))
137
+ all_results.append("-" * 50)
138
+ return "\n".join(all_results)
139
+
140
+ # Gradio UI ๊ตฌ์„ฑ
141
+ with gr.Blocks() as app:
142
+ gr.Markdown("# โœจ ํ‹ฐ์Šคํ† ๋ฆฌ ์ž๋™ ์ฒ˜๋ฆฌ๊ธฐ โœจ\n- ์ด๋ฏธ์ง€ ๋ฐฐ๊ฒฝ ์ œ๊ฑฐ + ์›Œํ„ฐ๋งˆํฌ ์‚ฝ์ž…\n- ์ œํ’ˆ๋ช… ์ถ”์ถœ ํ›„ ์ฟ ํŒก ๊ฒ€์ƒ‰ ๋งํฌ ์ƒ์„ฑ\n- ํ”„๋กœ๊ทธ๋žจ ์ •๋ณด ๋ธ”๋ก ์ €์žฅ")
143
+
144
+ with gr.Row():
145
+ urls_input = gr.Textbox(label="ํ‹ฐ์Šคํ† ๋ฆฌ ๊ฒŒ์‹œ๊ธ€ URL ์—ฌ๋Ÿฌ ๊ฐœ (์ค„๋ฐ”๊ฟˆํ•ด์„œ ์ž…๋ ฅ)", lines=5, placeholder="https://example1.com\nhttps://example2.com")
146
+ with gr.Row():
147
+ output_folder = gr.Textbox(label="์ €์žฅํ•  ํด๋” ๊ฒฝ๋กœ", value=DEFAULT_DOWNLOAD_DIR)
148
+
149
+ process_button = gr.Button("์ฒ˜๋ฆฌ ์‹œ์ž‘ ๐Ÿš€")
150
+ output_text = gr.Textbox(label="๊ฒฐ๊ณผ", lines=20)
151
+
152
+ process_button.click(fn=process_multiple_urls, inputs=[urls_input, output_folder], outputs=[output_text])
153
+
154
+ app.launch()