| import requests |
| from bs4 import BeautifulSoup |
| import re |
| import os |
| from rembg import remove |
| from PIL import Image, ImageDraw, ImageFont |
| import io |
| import gradio as gr |
| import shutil |
|
|
| |
| DEFAULT_DOWNLOAD_DIR = "./downloads" |
|
|
| |
| def clean_text(text): |
| cleaned = re.sub(r'[^\w\s๊ฐ-ํฃ]', '', text) |
| return cleaned |
|
|
| def select_language(text): |
| hangul = re.findall(r'[๊ฐ-ํฃ]+', text) |
| if hangul: |
| return ' '.join(hangul) |
| else: |
| english = re.findall(r'[A-Za-z]+', text) |
| return ' '.join(english) |
|
|
| |
| def add_watermark(image, text="gooditem gooditem gooditem gooditem gooditem"): |
| watermark = Image.new('RGBA', image.size, (0,0,0,0)) |
| draw = ImageDraw.Draw(watermark) |
|
|
| font_size = int(image.size[0] * 0.04) |
| try: |
| font = ImageFont.truetype("ariali.ttf", font_size) |
| except: |
| font = ImageFont.load_default() |
|
|
| bbox = draw.textbbox((0, 0), text, font=font) |
| text_width = bbox[2] - bbox[0] |
| text_height = bbox[3] - bbox[1] |
|
|
| x = (image.size[0] - text_width) // 2 |
| y = (image.size[1] - text_height) // 2 |
|
|
| draw.text((x, y), text, font=font, fill=(255, 255, 255, 31)) |
| combined = Image.alpha_composite(image.convert('RGBA'), watermark) |
| return combined |
|
|
|
|
| def extract_info_block(article_text): |
| pattern = r'(KBS2.*?<๋ชจ๋ธ๋ช
>.*?)(?:\n\n|$)' |
| match = re.search(pattern, article_text, re.DOTALL) |
|
|
| if match: |
| return match.group(1).strip() |
| else: |
| return None |
|
|
| |
|
|
| def extract_product_info(article_text): |
| brand_match = re.search(r'<๋ธ๋๋>[ \t]*([^\n]+)', article_text) |
| brand = brand_match.group(1).strip() if brand_match else None |
|
|
| model_matches = re.findall(r'<๋ชจ๋ธ๋ช
>[ \t]*([^\n]+)', article_text) |
|
|
| result = [] |
|
|
| if brand and model_matches: |
| brand_words = re.findall(r'[A-Za-z]+', brand) |
| selected_brand = ' '.join(brand_words[:2]) |
|
|
| for model in model_matches: |
| model = model.strip() |
| search_query = f"{selected_brand} {model}" |
| search_url = f"https://www.coupang.com/np/search?component=&q={search_query.replace(' ', '+')}" |
| result.append((model, search_url)) |
|
|
| return result |
|
|
|
|
| |
|
|
| def process_url(tistory_url, output_dir): |
| result = [] |
| image_paths = [] |
| |
| try: |
| response = requests.get(tistory_url) |
| soup = BeautifulSoup(response.text, 'html.parser') |
|
|
| img_tags = soup.find_all('img') |
| img_urls = [img['src'] for img in img_tags if 'src' in img.attrs] |
|
|
| |
| |
| except Exception as e: |
| result.append(f"โ URL ์ฒ๋ฆฌ ์คํจ: {tistory_url} / ์๋ฌ: {e}") |
|
|
| return result, image_paths |
|
|
| |
| |
|
|
| def process_multiple_urls(urls_text, output_dir): |
| urls = [url.strip() for url in urls_text.strip().splitlines() if url.strip()] |
| all_results = [] |
| all_images = [] |
|
|
| for url in urls: |
| results, image_paths = process_url(url, output_dir) |
| all_results.extend(results) |
| all_images.extend(image_paths) |
| all_results.append("-" * 50) |
|
|
| final_text = "\n".join(all_results) |
|
|
| |
| os.makedirs(output_dir, exist_ok=True) |
| result_file_path = os.path.join(output_dir, "result_log.txt") |
| with open(result_file_path, 'w', encoding='utf-8-sig') as f: |
| f.write(final_text) |
|
|
| return final_text, result_file_path, all_images |
| |
| |
|
|
| with gr.Blocks() as app: |
| gr.Markdown("# โจ ํฐ์คํ ๋ฆฌ ์๋ ์ฒ๋ฆฌ๊ธฐ โจ\n- ์ด๋ฏธ์ง ๋ฐฐ๊ฒฝ ์ ๊ฑฐ + ์ํฐ๋งํฌ ์ฝ์
\n- ์ ํ๋ช
์ถ์ถ ํ ์ฟ ํก ๊ฒ์ ๋งํฌ ์์ฑ\n- ๋ค์ด๋ก๋ ๊ธฐ๋ฅ ์ถ๊ฐ!") |
|
|
| with gr.Row(): |
| urls_input = gr.Textbox(label="ํฐ์คํ ๋ฆฌ ๊ฒ์๊ธ URL ์ฌ๋ฌ ๊ฐ (์ค๋ฐ๊ฟํด์ ์
๋ ฅ)", lines=5, placeholder="https://example1.com\nhttps://example2.com") |
| with gr.Row(): |
| output_folder = gr.Textbox(label="์ ์ฅํ ํด๋ ๊ฒฝ๋ก", value=DEFAULT_DOWNLOAD_DIR) |
|
|
| process_button = gr.Button("์ฒ๋ฆฌ ์์ ๐") |
| output_text = gr.Textbox(label="๊ฒฐ๊ณผ", lines=20) |
| download_log = gr.File(label="๊ฒฐ๊ณผ ๋ก๊ทธ ๋ค์ด๋ก๋") |
| download_images = gr.Files(label="๋ค์ด๋ก๋ํ ์ด๋ฏธ์ง๋ค", file_types=['.png'], interactive=True) |
|
|
| process_button.click( |
| fn=process_multiple_urls, |
| inputs=[urls_input, output_folder], |
| outputs=[output_text, download_log, download_images] |
| ) |
|
|
| app.launch() |
|
|