ZORYE commited on
Commit
0d2efcb
ยท
verified ยท
1 Parent(s): 74a9056

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +130 -0
app.py CHANGED
@@ -6,6 +6,7 @@ from rembg import remove
6
  from PIL import Image, ImageDraw, ImageFont
7
  import io
8
  import gradio as gr
 
9
 
10
  # โ–ผ ๊ธฐ๋ณธ ๋‹ค์šด๋กœ๋“œ ํด๋”
11
  DEFAULT_DOWNLOAD_DIR = "./downloads"
@@ -46,6 +47,135 @@ def add_watermark(image, text="gooditem gooditem gooditem gooditem gooditem"):
46
  return combined
47
 
48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  def extract_info_block(article_text):
50
  pattern = r'(KBS2.*?<๋ชจ๋ธ๋ช…>.*?)(?:\n\n|$)'
51
  match = re.search(pattern, article_text, re.DOTALL)
 
6
  from PIL import Image, ImageDraw, ImageFont
7
  import io
8
  import gradio as gr
9
+ import shutil
10
 
11
  # โ–ผ ๊ธฐ๋ณธ ๋‹ค์šด๋กœ๋“œ ํด๋”
12
  DEFAULT_DOWNLOAD_DIR = "./downloads"
 
47
  return combined
48
 
49
 
50
+
51
+
52
+ def extract_info_block(article_text): pattern = r'(KBS2.?<๋ชจ๋ธ๋ช…>.?)(?:\n\n|$)' match = re.search(pattern, article_text, re.DOTALL)
53
+
54
+ if match:
55
+ return match.group(1).strip()
56
+ else:
57
+ return None
58
+
59
+ #์ œํ’ˆ๋ช… ์ถ”์ถœ ํ•จ์ˆ˜ ์ˆ˜์ • (์—ฌ๋Ÿฌ ๋ชจ๋ธ๋ช… ์ฒ˜๋ฆฌ)
60
+
61
+ def extract_product_info(article_text): brand_match = re.search(r'<๋ธŒ๋žœ๋“œ>[ \t]*([^\n]+)', article_text) brand = brand_match.group(1).strip() if brand_match else None
62
+
63
+ model_matches = re.findall(r'<๋ชจ๋ธ๋ช…>[ \t]*([^\n]+)', article_text)
64
+
65
+ result = []
66
+
67
+ if brand and model_matches:
68
+ brand_words = re.findall(r'[A-Za-z]+', brand)
69
+ selected_brand = ' '.join(brand_words[:2])
70
+
71
+ for model in model_matches:
72
+ model = model.strip()
73
+ search_query = f"{selected_brand} {model}"
74
+ search_url = f"https://www.coupang.com/np/search?component=&q={search_query.replace(' ', '+')}"
75
+ result.append((model, search_url))
76
+
77
+ return result
78
+
79
+ #process_url ํ•จ์ˆ˜ ์ˆ˜์ • (๋ชจ๋ธ๋ช… ์—ฌ๋Ÿฌ๊ฐœ ๋Œ€์‘, ์ด๋ฏธ์ง€ ๊ฒฝ๋กœ ๋ฐ˜ํ™˜)
80
+
81
+ def process_url(tistory_url, output_dir): result = [] image_paths = [] try: response = requests.get(tistory_url) soup = BeautifulSoup(response.text, 'html.parser')
82
+
83
+ img_tags = soup.find_all('img')
84
+ img_urls = [img['src'] for img in img_tags if 'src' in img.attrs]
85
+
86
+ # ์ด๋ฏธ์ง€ ์ฒ˜๋ฆฌ
87
+ for idx, img_url in enumerate(img_urls):
88
+ base_name = os.path.basename(img_url).split('?')[0]
89
+ if base_name.startswith('new_ico'):
90
+ continue
91
+
92
+ img_data = requests.get(img_url).content
93
+ input_img = Image.open(io.BytesIO(img_data)).convert('RGBA')
94
+
95
+ input_bytes = io.BytesIO()
96
+ input_img.save(input_bytes, format='PNG')
97
+ removed = remove(input_bytes.getvalue())
98
+ img_no_bg = Image.open(io.BytesIO(removed)).convert('RGBA')
99
+
100
+ final_img = add_watermark(img_no_bg)
101
+
102
+ os.makedirs(output_dir, exist_ok=True)
103
+ name, ext = os.path.splitext(base_name)
104
+ save_path = os.path.join(output_dir, f"{name}_{idx+1}_processed.png")
105
+ final_img.save(save_path)
106
+ result.append(f"โœ”๏ธ ์ €์žฅ ์™„๋ฃŒ: {save_path}")
107
+ image_paths.append(save_path)
108
+
109
+ # ํ…์ŠคํŠธ ์ถ”์ถœ
110
+ article_text = soup.get_text()
111
+
112
+ # ํ”„๋กœ๊ทธ๋žจ ์ •๋ณด ๋ธ”๋ก ์ถ”์ถœ
113
+ info_block = extract_info_block(article_text)
114
+
115
+ # ์ œํ’ˆ ์ •๋ณด ์ถ”์ถœ (์—ฌ๋Ÿฌ ๋ชจ๋ธ๋ช… ์ฒ˜๋ฆฌ)
116
+ item_info_list = extract_product_info(article_text)
117
+
118
+ # ์ •๋ฆฌ๋œ ์ถœ๋ ฅ ํฌ๋งท
119
+ if info_block:
120
+ result.append("\n===== ํ”„๋กœ๊ทธ๋žจ ์ •๋ณด ๋ธ”๋ก =====")
121
+ result.append(info_block)
122
+ result.append("================================\n")
123
+
124
+ for model_name, search_url in item_info_list:
125
+ result.append(f"๐Ÿ›๏ธ ์•„์ดํ…œ : {model_name}")
126
+ result.append(f"๐Ÿ”— ์ฟ ํŒก ๋งํฌ: {search_url}")
127
+
128
+ except Exception as e:
129
+ result.append(f"โŒ URL ์ฒ˜๋ฆฌ ์‹คํŒจ: {tistory_url} / ์—๋Ÿฌ: {e}")
130
+
131
+ return result, image_paths
132
+
133
+ #์—ฌ๋Ÿฌ URL ์ฒ˜๋ฆฌ ๋ฐ ํŒŒ์ผ ์ €์žฅ
134
+
135
+ def process_multiple_urls(urls_text, output_dir): urls = [url.strip() for url in urls_text.strip().splitlines() if url.strip()] all_results = [] all_images = []
136
+
137
+ for url in urls:
138
+ results, image_paths = process_url(url, output_dir)
139
+ all_results.extend(results)
140
+ all_images.extend(image_paths)
141
+ all_results.append("-" * 50)
142
+
143
+ final_text = "\n".join(all_results)
144
+
145
+ # ๊ฒฐ๊ณผ ์ €์žฅ
146
+ os.makedirs(output_dir, exist_ok=True)
147
+ result_file_path = os.path.join(output_dir, "result_log.txt")
148
+ with open(result_file_path, 'w', encoding='utf-8-sig') as f:
149
+ f.write(final_text)
150
+
151
+ return final_text, result_file_path, all_images
152
+
153
+ Gradio ์•ฑ ๊ตฌ์„ฑ
154
+
155
+ with gr.Blocks() as app: gr.Markdown("# โœจ ํ‹ฐ์Šคํ† ๋ฆฌ ์ž๋™ ์ฒ˜๋ฆฌ๊ธฐ โœจ\n- ์ด๋ฏธ์ง€ ๋ฐฐ๊ฒฝ ์ œ๊ฑฐ + ์›Œํ„ฐ๋งˆํฌ ์‚ฝ์ž…\n- ์ œํ’ˆ๋ช… ์ถ”์ถœ ํ›„ ์ฟ ํŒก ๊ฒ€์ƒ‰ ๋งํฌ ์ƒ์„ฑ\n- ๋‹ค์šด๋กœ๋“œ ๊ธฐ๋Šฅ ์ถ”๊ฐ€!")
156
+
157
+ with gr.Row():
158
+ urls_input = gr.Textbox(label="ํ‹ฐ์Šคํ† ๋ฆฌ ๊ฒŒ์‹œ๊ธ€ URL ์—ฌ๋Ÿฌ ๊ฐœ (์ค„๋ฐ”๊ฟˆํ•ด์„œ ์ž…๋ ฅ)", lines=5, placeholder="https://example1.com\nhttps://example2.com")
159
+ with gr.Row():
160
+ output_folder = gr.Textbox(label="์ €์žฅํ•  ํด๋” ๊ฒฝ๋กœ", value=DEFAULT_DOWNLOAD_DIR)
161
+
162
+ process_button = gr.Button("์ฒ˜๋ฆฌ ์‹œ์ž‘ ๐Ÿš€")
163
+ output_text = gr.Textbox(label="๊ฒฐ๊ณผ", lines=20)
164
+ download_log = gr.File(label="๊ฒฐ๊ณผ ๋กœ๊ทธ ๋‹ค์šด๋กœ๋“œ")
165
+ download_images = gr.File(label="๋‹ค์šด๋กœ๋“œํ•œ ์ด๋ฏธ์ง€๋“ค", file_types=['.png'], interactive=True, multiple=True)
166
+
167
+ process_button.click(
168
+ fn=process_multiple_urls,
169
+ inputs=[urls_input, output_folder],
170
+ outputs=[output_text, download_log, download_images]
171
+ )
172
+
173
+ app.launch()
174
+
175
+
176
+
177
+
178
+
179
  def extract_info_block(article_text):
180
  pattern = r'(KBS2.*?<๋ชจ๋ธ๋ช…>.*?)(?:\n\n|$)'
181
  match = re.search(pattern, article_text, re.DOTALL)