ZORYE commited on
Commit
44fe2bb
ยท
verified ยท
1 Parent(s): 0d2efcb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -131
app.py CHANGED
@@ -172,134 +172,3 @@ process_button.click(
172
 
173
  app.launch()
174
 
175
-
176
-
177
-
178
-
179
- def extract_info_block(article_text):
180
- pattern = r'(KBS2.*?<๋ชจ๋ธ๋ช…>.*?)(?:\n\n|$)'
181
- match = re.search(pattern, article_text, re.DOTALL)
182
-
183
- if match:
184
- return match.group(1).strip()
185
- else:
186
- return None
187
-
188
- # ์ œํ’ˆ๋ช… ์ถ”์ถœ ํ•จ์ˆ˜ ์ˆ˜์ • (์—ฌ๋Ÿฌ ๋ชจ๋ธ๋ช… ์ฒ˜๋ฆฌ)
189
-
190
- def extract_product_info(article_text):
191
- brand_match = re.search(r'<๋ธŒ๋žœ๋“œ>[ \t]*([^\n]+)', article_text)
192
- brand = brand_match.group(1).strip() if brand_match else None
193
-
194
- model_matches = re.findall(r'<๋ชจ๋ธ๋ช…>[ \t]*([^\n]+)', article_text)
195
-
196
- result = []
197
-
198
- if brand and model_matches:
199
- brand_words = re.findall(r'[A-Za-z]+', brand)
200
- selected_brand = ' '.join(brand_words[:2])
201
-
202
- for model in model_matches:
203
- model = model.strip()
204
- search_query = f"{selected_brand} {model}"
205
- search_url = f"https://www.coupang.com/np/search?component=&q={search_query.replace(' ', '+')}"
206
- result.append((model, search_url))
207
-
208
- return result
209
-
210
- # process_url ํ•จ์ˆ˜ ์ˆ˜์ • (๋ชจ๋ธ๋ช… ์—ฌ๋Ÿฌ๊ฐœ ๋Œ€์‘)
211
-
212
- def process_url(tistory_url, output_dir):
213
- result = []
214
- try:
215
- response = requests.get(tistory_url)
216
- soup = BeautifulSoup(response.text, 'html.parser')
217
-
218
- img_tags = soup.find_all('img')
219
- img_urls = [img['src'] for img in img_tags if 'src' in img.attrs]
220
-
221
- # ์ด๋ฏธ์ง€ ์ฒ˜๋ฆฌ
222
- for idx, img_url in enumerate(img_urls):
223
- base_name = os.path.basename(img_url).split('?')[0]
224
- if base_name.startswith('new_ico'):
225
- continue
226
-
227
- img_data = requests.get(img_url).content
228
- input_img = Image.open(io.BytesIO(img_data)).convert('RGBA')
229
-
230
- input_bytes = io.BytesIO()
231
- input_img.save(input_bytes, format='PNG')
232
- removed = remove(input_bytes.getvalue())
233
- img_no_bg = Image.open(io.BytesIO(removed)).convert('RGBA')
234
-
235
- final_img = add_watermark(img_no_bg)
236
-
237
- os.makedirs(output_dir, exist_ok=True)
238
- name, ext = os.path.splitext(base_name)
239
- save_path = os.path.join(output_dir, f"{name}_{idx+1}_processed.png")
240
- final_img.save(save_path)
241
- result.append(f"โœ”๏ธ ์ €์žฅ ์™„๋ฃŒ: {save_path}")
242
-
243
- # ํ…์ŠคํŠธ ์ถ”์ถœ
244
- article_text = soup.get_text()
245
-
246
- # ํ”„๋กœ๊ทธ๋žจ ์ •๋ณด ๋ธ”๋ก ์ถ”์ถœ
247
- info_block = extract_info_block(article_text)
248
-
249
- # ์ œํ’ˆ ์ •๋ณด ์ถ”์ถœ (์—ฌ๋Ÿฌ ๋ชจ๋ธ๋ช… ์ฒ˜๋ฆฌ)
250
- item_info_list = extract_product_info(article_text)
251
-
252
- # ์ •๋ฆฌ๋œ ์ถœ๋ ฅ ํฌ๋งท
253
- if info_block:
254
- result.append("\n===== ํ”„๋กœ๊ทธ๋žจ ์ •๋ณด ๋ธ”๋ก =====")
255
- result.append(info_block)
256
- result.append("================================\n")
257
-
258
- for model_name, search_url in item_info_list:
259
- result.append(f"๐Ÿ›๏ธ ์•„์ดํ…œ : {model_name}")
260
- result.append(f"๐Ÿ”— ์ฟ ํŒก ๋งํฌ: {search_url}")
261
-
262
- except Exception as e:
263
- result.append(f"โŒ URL ์ฒ˜๋ฆฌ ์‹คํŒจ: {tistory_url} / ์—๋Ÿฌ: {e}")
264
-
265
- return result
266
-
267
-
268
- # ์—ฌ๋Ÿฌ URL ์ฒ˜๋ฆฌ
269
- def process_multiple_urls(urls_text, output_dir):
270
- urls = [url.strip() for url in urls_text.strip().splitlines() if url.strip()]
271
- all_results = []
272
- for url in urls:
273
- all_results.extend(process_url(url, output_dir))
274
- all_results.append("-" * 50)
275
-
276
- final_text = "\n".join(all_results)
277
-
278
- # ๊ฒฐ๊ณผ๋ฅผ ํŒŒ์ผ๋กœ ์ €์žฅ
279
- os.makedirs(output_dir, exist_ok=True)
280
- result_file_path = os.path.join(output_dir, "result_log.txt")
281
- with open(result_file_path, 'w', encoding='utf-8') as f:
282
- f.write(final_text)
283
-
284
- return final_text, result_file_path
285
- # Gradio UI
286
- # Gradio ์•ฑ
287
- with gr.Blocks() as app:
288
- gr.Markdown("# โœจ ํ‹ฐ์Šคํ† ๋ฆฌ ์ž๋™ ์ฒ˜๋ฆฌ๊ธฐ โœจ\n- ์ด๋ฏธ์ง€ ๋ฐฐ๊ฒฝ ์ œ๊ฑฐ + ์›Œํ„ฐ๋งˆํฌ ์‚ฝ์ž…\n- ์ œํ’ˆ๋ช… ์ถ”์ถœ ํ›„ ์ฟ ํŒก ๊ฒ€์ƒ‰ ๋งํฌ ์ƒ์„ฑ\n- ๋‹ค์šด๋กœ๋“œ ๊ธฐ๋Šฅ ์ถ”๊ฐ€!")
289
-
290
- with gr.Row():
291
- urls_input = gr.Textbox(label="ํ‹ฐ์Šคํ† ๋ฆฌ ๊ฒŒ์‹œ๊ธ€ URL ์—ฌ๋Ÿฌ ๊ฐœ (์ค„๋ฐ”๊ฟˆํ•ด์„œ ์ž…๋ ฅ)", lines=5, placeholder="https://example1.com\nhttps://example2.com")
292
- with gr.Row():
293
- output_folder = gr.Textbox(label="์ €์žฅํ•  ํด๋” ๊ฒฝ๋กœ", value=DEFAULT_DOWNLOAD_DIR)
294
-
295
- process_button = gr.Button("์ฒ˜๋ฆฌ ์‹œ์ž‘ ๐Ÿš€")
296
- output_text = gr.Textbox(label="๊ฒฐ๊ณผ", lines=20)
297
- download_file = gr.File(label="๊ฒฐ๊ณผ ๋‹ค์šด๋กœ๋“œ") # ๋‹ค์šด๋กœ๋“œ ๋ฒ„ํŠผ ์ถ”๊ฐ€
298
-
299
- process_button.click(
300
- fn=process_multiple_urls,
301
- inputs=[urls_input, output_folder],
302
- outputs=[output_text, download_file] # ์ถœ๋ ฅ ๋‘ ๊ฐœ ์—ฐ๊ฒฐ
303
- )
304
-
305
- app.launch()
 
172
 
173
  app.launch()
174