Update app.py
Browse files
app.py
CHANGED
|
@@ -172,134 +172,3 @@ process_button.click(
|
|
| 172 |
|
| 173 |
app.launch()
|
| 174 |
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
def extract_info_block(article_text):
|
| 180 |
-
pattern = r'(KBS2.*?<๋ชจ๋ธ๋ช
>.*?)(?:\n\n|$)'
|
| 181 |
-
match = re.search(pattern, article_text, re.DOTALL)
|
| 182 |
-
|
| 183 |
-
if match:
|
| 184 |
-
return match.group(1).strip()
|
| 185 |
-
else:
|
| 186 |
-
return None
|
| 187 |
-
|
| 188 |
-
# ์ ํ๋ช
์ถ์ถ ํจ์ ์์ (์ฌ๋ฌ ๋ชจ๋ธ๋ช
์ฒ๋ฆฌ)
|
| 189 |
-
|
| 190 |
-
def extract_product_info(article_text):
|
| 191 |
-
brand_match = re.search(r'<๋ธ๋๋>[ \t]*([^\n]+)', article_text)
|
| 192 |
-
brand = brand_match.group(1).strip() if brand_match else None
|
| 193 |
-
|
| 194 |
-
model_matches = re.findall(r'<๋ชจ๋ธ๋ช
>[ \t]*([^\n]+)', article_text)
|
| 195 |
-
|
| 196 |
-
result = []
|
| 197 |
-
|
| 198 |
-
if brand and model_matches:
|
| 199 |
-
brand_words = re.findall(r'[A-Za-z]+', brand)
|
| 200 |
-
selected_brand = ' '.join(brand_words[:2])
|
| 201 |
-
|
| 202 |
-
for model in model_matches:
|
| 203 |
-
model = model.strip()
|
| 204 |
-
search_query = f"{selected_brand} {model}"
|
| 205 |
-
search_url = f"https://www.coupang.com/np/search?component=&q={search_query.replace(' ', '+')}"
|
| 206 |
-
result.append((model, search_url))
|
| 207 |
-
|
| 208 |
-
return result
|
| 209 |
-
|
| 210 |
-
# process_url ํจ์ ์์ (๋ชจ๋ธ๋ช
์ฌ๋ฌ๊ฐ ๋์)
|
| 211 |
-
|
| 212 |
-
def process_url(tistory_url, output_dir):
|
| 213 |
-
result = []
|
| 214 |
-
try:
|
| 215 |
-
response = requests.get(tistory_url)
|
| 216 |
-
soup = BeautifulSoup(response.text, 'html.parser')
|
| 217 |
-
|
| 218 |
-
img_tags = soup.find_all('img')
|
| 219 |
-
img_urls = [img['src'] for img in img_tags if 'src' in img.attrs]
|
| 220 |
-
|
| 221 |
-
# ์ด๋ฏธ์ง ์ฒ๋ฆฌ
|
| 222 |
-
for idx, img_url in enumerate(img_urls):
|
| 223 |
-
base_name = os.path.basename(img_url).split('?')[0]
|
| 224 |
-
if base_name.startswith('new_ico'):
|
| 225 |
-
continue
|
| 226 |
-
|
| 227 |
-
img_data = requests.get(img_url).content
|
| 228 |
-
input_img = Image.open(io.BytesIO(img_data)).convert('RGBA')
|
| 229 |
-
|
| 230 |
-
input_bytes = io.BytesIO()
|
| 231 |
-
input_img.save(input_bytes, format='PNG')
|
| 232 |
-
removed = remove(input_bytes.getvalue())
|
| 233 |
-
img_no_bg = Image.open(io.BytesIO(removed)).convert('RGBA')
|
| 234 |
-
|
| 235 |
-
final_img = add_watermark(img_no_bg)
|
| 236 |
-
|
| 237 |
-
os.makedirs(output_dir, exist_ok=True)
|
| 238 |
-
name, ext = os.path.splitext(base_name)
|
| 239 |
-
save_path = os.path.join(output_dir, f"{name}_{idx+1}_processed.png")
|
| 240 |
-
final_img.save(save_path)
|
| 241 |
-
result.append(f"โ๏ธ ์ ์ฅ ์๋ฃ: {save_path}")
|
| 242 |
-
|
| 243 |
-
# ํ
์คํธ ์ถ์ถ
|
| 244 |
-
article_text = soup.get_text()
|
| 245 |
-
|
| 246 |
-
# ํ๋ก๊ทธ๋จ ์ ๋ณด ๋ธ๋ก ์ถ์ถ
|
| 247 |
-
info_block = extract_info_block(article_text)
|
| 248 |
-
|
| 249 |
-
# ์ ํ ์ ๋ณด ์ถ์ถ (์ฌ๋ฌ ๋ชจ๋ธ๋ช
์ฒ๋ฆฌ)
|
| 250 |
-
item_info_list = extract_product_info(article_text)
|
| 251 |
-
|
| 252 |
-
# ์ ๋ฆฌ๋ ์ถ๋ ฅ ํฌ๋งท
|
| 253 |
-
if info_block:
|
| 254 |
-
result.append("\n===== ํ๋ก๊ทธ๋จ ์ ๋ณด ๋ธ๋ก =====")
|
| 255 |
-
result.append(info_block)
|
| 256 |
-
result.append("================================\n")
|
| 257 |
-
|
| 258 |
-
for model_name, search_url in item_info_list:
|
| 259 |
-
result.append(f"๐๏ธ ์์ดํ
: {model_name}")
|
| 260 |
-
result.append(f"๐ ์ฟ ํก ๋งํฌ: {search_url}")
|
| 261 |
-
|
| 262 |
-
except Exception as e:
|
| 263 |
-
result.append(f"โ URL ์ฒ๋ฆฌ ์คํจ: {tistory_url} / ์๋ฌ: {e}")
|
| 264 |
-
|
| 265 |
-
return result
|
| 266 |
-
|
| 267 |
-
|
| 268 |
-
# ์ฌ๋ฌ URL ์ฒ๋ฆฌ
|
| 269 |
-
def process_multiple_urls(urls_text, output_dir):
|
| 270 |
-
urls = [url.strip() for url in urls_text.strip().splitlines() if url.strip()]
|
| 271 |
-
all_results = []
|
| 272 |
-
for url in urls:
|
| 273 |
-
all_results.extend(process_url(url, output_dir))
|
| 274 |
-
all_results.append("-" * 50)
|
| 275 |
-
|
| 276 |
-
final_text = "\n".join(all_results)
|
| 277 |
-
|
| 278 |
-
# ๊ฒฐ๊ณผ๋ฅผ ํ์ผ๋ก ์ ์ฅ
|
| 279 |
-
os.makedirs(output_dir, exist_ok=True)
|
| 280 |
-
result_file_path = os.path.join(output_dir, "result_log.txt")
|
| 281 |
-
with open(result_file_path, 'w', encoding='utf-8') as f:
|
| 282 |
-
f.write(final_text)
|
| 283 |
-
|
| 284 |
-
return final_text, result_file_path
|
| 285 |
-
# Gradio UI
|
| 286 |
-
# Gradio ์ฑ
|
| 287 |
-
with gr.Blocks() as app:
|
| 288 |
-
gr.Markdown("# โจ ํฐ์คํ ๋ฆฌ ์๋ ์ฒ๋ฆฌ๊ธฐ โจ\n- ์ด๋ฏธ์ง ๋ฐฐ๊ฒฝ ์ ๊ฑฐ + ์ํฐ๋งํฌ ์ฝ์
\n- ์ ํ๋ช
์ถ์ถ ํ ์ฟ ํก ๊ฒ์ ๋งํฌ ์์ฑ\n- ๋ค์ด๋ก๋ ๊ธฐ๋ฅ ์ถ๊ฐ!")
|
| 289 |
-
|
| 290 |
-
with gr.Row():
|
| 291 |
-
urls_input = gr.Textbox(label="ํฐ์คํ ๋ฆฌ ๊ฒ์๊ธ URL ์ฌ๋ฌ ๊ฐ (์ค๋ฐ๊ฟํด์ ์
๋ ฅ)", lines=5, placeholder="https://example1.com\nhttps://example2.com")
|
| 292 |
-
with gr.Row():
|
| 293 |
-
output_folder = gr.Textbox(label="์ ์ฅํ ํด๋ ๊ฒฝ๋ก", value=DEFAULT_DOWNLOAD_DIR)
|
| 294 |
-
|
| 295 |
-
process_button = gr.Button("์ฒ๋ฆฌ ์์ ๐")
|
| 296 |
-
output_text = gr.Textbox(label="๊ฒฐ๊ณผ", lines=20)
|
| 297 |
-
download_file = gr.File(label="๊ฒฐ๊ณผ ๋ค์ด๋ก๋") # ๋ค์ด๋ก๋ ๋ฒํผ ์ถ๊ฐ
|
| 298 |
-
|
| 299 |
-
process_button.click(
|
| 300 |
-
fn=process_multiple_urls,
|
| 301 |
-
inputs=[urls_input, output_folder],
|
| 302 |
-
outputs=[output_text, download_file] # ์ถ๋ ฅ ๋ ๊ฐ ์ฐ๊ฒฐ
|
| 303 |
-
)
|
| 304 |
-
|
| 305 |
-
app.launch()
|
|
|
|
| 172 |
|
| 173 |
app.launch()
|
| 174 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|