CPS / app.py
ZORYE's picture
Update app.py
b11fe8e verified
import requests
from bs4 import BeautifulSoup
import re
import os
from rembg import remove
from PIL import Image, ImageDraw, ImageFont
import io
import gradio as gr
import shutil
# โ–ผ ๊ธฐ๋ณธ ๋‹ค์šด๋กœ๋“œ ํด๋”
DEFAULT_DOWNLOAD_DIR = "./downloads"
# ํ…์ŠคํŠธ ์ „์ฒ˜๋ฆฌ ํ•จ์ˆ˜
def clean_text(text):
cleaned = re.sub(r'[^\w\s๊ฐ€-ํžฃ]', '', text)
return cleaned
def select_language(text):
hangul = re.findall(r'[๊ฐ€-ํžฃ]+', text)
if hangul:
return ' '.join(hangul)
else:
english = re.findall(r'[A-Za-z]+', text)
return ' '.join(english)
# ์›Œํ„ฐ๋งˆํฌ ์‚ฝ์ž… ํ•จ์ˆ˜
def add_watermark(image, text="gooditem gooditem gooditem gooditem gooditem"):
watermark = Image.new('RGBA', image.size, (0,0,0,0))
draw = ImageDraw.Draw(watermark)
font_size = int(image.size[0] * 0.04)
try:
font = ImageFont.truetype("ariali.ttf", font_size)
except:
font = ImageFont.load_default()
bbox = draw.textbbox((0, 0), text, font=font)
text_width = bbox[2] - bbox[0]
text_height = bbox[3] - bbox[1]
x = (image.size[0] - text_width) // 2
y = (image.size[1] - text_height) // 2
draw.text((x, y), text, font=font, fill=(255, 255, 255, 31))
combined = Image.alpha_composite(image.convert('RGBA'), watermark)
return combined
def extract_info_block(article_text):
pattern = r'(KBS2.*?<๋ชจ๋ธ๋ช…>.*?)(?:\n\n|$)'
match = re.search(pattern, article_text, re.DOTALL)
if match:
return match.group(1).strip()
else:
return None
#์ œํ’ˆ๋ช… ์ถ”์ถœ ํ•จ์ˆ˜ ์ˆ˜์ • (์—ฌ๋Ÿฌ ๋ชจ๋ธ๋ช… ์ฒ˜๋ฆฌ)
def extract_product_info(article_text):
brand_match = re.search(r'<๋ธŒ๋žœ๋“œ>[ \t]*([^\n]+)', article_text)
brand = brand_match.group(1).strip() if brand_match else None
model_matches = re.findall(r'<๋ชจ๋ธ๋ช…>[ \t]*([^\n]+)', article_text)
result = []
if brand and model_matches:
brand_words = re.findall(r'[A-Za-z]+', brand)
selected_brand = ' '.join(brand_words[:2])
for model in model_matches:
model = model.strip()
search_query = f"{selected_brand} {model}"
search_url = f"https://www.coupang.com/np/search?component=&q={search_query.replace(' ', '+')}"
result.append((model, search_url))
return result
#process_url ํ•จ์ˆ˜ ์ˆ˜์ • (๋ชจ๋ธ๋ช… ์—ฌ๋Ÿฌ๊ฐœ ๋Œ€์‘, ์ด๋ฏธ์ง€ ๊ฒฝ๋กœ ๋ฐ˜ํ™˜)
def process_url(tistory_url, output_dir):
result = []
image_paths = []
try:
response = requests.get(tistory_url)
soup = BeautifulSoup(response.text, 'html.parser')
img_tags = soup.find_all('img')
img_urls = [img['src'] for img in img_tags if 'src' in img.attrs]
# (์ค‘๋žต, ์—ฌ๊ธฐ์— ์ด๋ฏธ์ง€ ์ฒ˜๋ฆฌ, ํ…์ŠคํŠธ ์ถ”์ถœ ๋กœ์ง ์ด์–ด์„œ)
except Exception as e:
result.append(f"โŒ URL ์ฒ˜๋ฆฌ ์‹คํŒจ: {tistory_url} / ์—๋Ÿฌ: {e}")
return result, image_paths
#์—ฌ๋Ÿฌ URL ์ฒ˜๋ฆฌ ๋ฐ ํŒŒ์ผ ์ €์žฅ
def process_multiple_urls(urls_text, output_dir):
urls = [url.strip() for url in urls_text.strip().splitlines() if url.strip()]
all_results = []
all_images = []
for url in urls:
results, image_paths = process_url(url, output_dir)
all_results.extend(results)
all_images.extend(image_paths)
all_results.append("-" * 50)
final_text = "\n".join(all_results)
# ๊ฒฐ๊ณผ ์ €์žฅ
os.makedirs(output_dir, exist_ok=True)
result_file_path = os.path.join(output_dir, "result_log.txt")
with open(result_file_path, 'w', encoding='utf-8-sig') as f:
f.write(final_text)
return final_text, result_file_path, all_images
# Gradio ์•ฑ ๊ตฌ์„ฑ
with gr.Blocks() as app:
gr.Markdown("# โœจ ํ‹ฐ์Šคํ† ๋ฆฌ ์ž๋™ ์ฒ˜๋ฆฌ๊ธฐ โœจ\n- ์ด๋ฏธ์ง€ ๋ฐฐ๊ฒฝ ์ œ๊ฑฐ + ์›Œํ„ฐ๋งˆํฌ ์‚ฝ์ž…\n- ์ œํ’ˆ๋ช… ์ถ”์ถœ ํ›„ ์ฟ ํŒก ๊ฒ€์ƒ‰ ๋งํฌ ์ƒ์„ฑ\n- ๋‹ค์šด๋กœ๋“œ ๊ธฐ๋Šฅ ์ถ”๊ฐ€!")
with gr.Row():
urls_input = gr.Textbox(label="ํ‹ฐ์Šคํ† ๋ฆฌ ๊ฒŒ์‹œ๊ธ€ URL ์—ฌ๋Ÿฌ ๊ฐœ (์ค„๋ฐ”๊ฟˆํ•ด์„œ ์ž…๋ ฅ)", lines=5, placeholder="https://example1.com\nhttps://example2.com")
with gr.Row():
output_folder = gr.Textbox(label="์ €์žฅํ•  ํด๋” ๊ฒฝ๋กœ", value=DEFAULT_DOWNLOAD_DIR)
process_button = gr.Button("์ฒ˜๋ฆฌ ์‹œ์ž‘ ๐Ÿš€")
output_text = gr.Textbox(label="๊ฒฐ๊ณผ", lines=20)
download_log = gr.File(label="๊ฒฐ๊ณผ ๋กœ๊ทธ ๋‹ค์šด๋กœ๋“œ")
download_images = gr.Files(label="๋‹ค์šด๋กœ๋“œํ•œ ์ด๋ฏธ์ง€๋“ค", file_types=['.png'], interactive=True) # โ† ์—ฌ๊ธฐ ์ˆ˜์ •!!
process_button.click(
fn=process_multiple_urls,
inputs=[urls_input, output_folder],
outputs=[output_text, download_log, download_images]
)
app.launch()