File size: 4,702 Bytes
55bc956
 
 
 
 
 
 
 
0d2efcb
55bc956
5312c47
55bc956
 
5312c47
55bc956
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5312c47
55bc956
 
 
 
 
 
 
 
 
 
 
 
 
 
f00ed24
27072e8
 
 
0d2efcb
27072e8
 
 
 
0d2efcb
 
 
52783a1
 
 
0d2efcb
52783a1
0d2efcb
52783a1
0d2efcb
52783a1
 
 
0d2efcb
52783a1
 
 
 
 
 
 
0d2efcb
 
 
 
4dfce56
 
 
 
 
 
 
0d2efcb
4dfce56
 
0d2efcb
4dfce56
 
 
 
0d2efcb
4dfce56
048ff73
 
0d2efcb
 
048ff73
 
 
6caff03
0d2efcb
048ff73
 
 
 
 
0d2efcb
048ff73
0d2efcb
048ff73
 
 
 
 
0d2efcb
048ff73
 
b11fe8e
0d2efcb
40e0500
 
0d2efcb
b11fe8e
 
 
 
0d2efcb
40e0500
 
 
b11fe8e
0d2efcb
40e0500
 
 
 
 
0d2efcb
b11fe8e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
import requests
from bs4 import BeautifulSoup
import re
import os
from rembg import remove
from PIL import Image, ImageDraw, ImageFont
import io
import gradio as gr
import shutil

# โ–ผ ๊ธฐ๋ณธ ๋‹ค์šด๋กœ๋“œ ํด๋”
DEFAULT_DOWNLOAD_DIR = "./downloads"

# ํ…์ŠคํŠธ ์ „์ฒ˜๋ฆฌ ํ•จ์ˆ˜
def clean_text(text):
    cleaned = re.sub(r'[^\w\s๊ฐ€-ํžฃ]', '', text)
    return cleaned

def select_language(text):
    hangul = re.findall(r'[๊ฐ€-ํžฃ]+', text)
    if hangul:
        return ' '.join(hangul)
    else:
        english = re.findall(r'[A-Za-z]+', text)
        return ' '.join(english)

# ์›Œํ„ฐ๋งˆํฌ ์‚ฝ์ž… ํ•จ์ˆ˜
def add_watermark(image, text="gooditem gooditem gooditem gooditem gooditem"):
    watermark = Image.new('RGBA', image.size, (0,0,0,0))
    draw = ImageDraw.Draw(watermark)

    font_size = int(image.size[0] * 0.04)
    try:
        font = ImageFont.truetype("ariali.ttf", font_size)
    except:
        font = ImageFont.load_default()

    bbox = draw.textbbox((0, 0), text, font=font)
    text_width = bbox[2] - bbox[0]
    text_height = bbox[3] - bbox[1]

    x = (image.size[0] - text_width) // 2
    y = (image.size[1] - text_height) // 2

    draw.text((x, y), text, font=font, fill=(255, 255, 255, 31))
    combined = Image.alpha_composite(image.convert('RGBA'), watermark)
    return combined


def extract_info_block(article_text):
    pattern = r'(KBS2.*?<๋ชจ๋ธ๋ช…>.*?)(?:\n\n|$)'
    match = re.search(pattern, article_text, re.DOTALL)

    if match:
        return match.group(1).strip()
    else:
        return None

#์ œํ’ˆ๋ช… ์ถ”์ถœ ํ•จ์ˆ˜ ์ˆ˜์ • (์—ฌ๋Ÿฌ ๋ชจ๋ธ๋ช… ์ฒ˜๋ฆฌ)

def extract_product_info(article_text):
    brand_match = re.search(r'<๋ธŒ๋žœ๋“œ>[ \t]*([^\n]+)', article_text)
    brand = brand_match.group(1).strip() if brand_match else None

    model_matches = re.findall(r'<๋ชจ๋ธ๋ช…>[ \t]*([^\n]+)', article_text)

    result = []

    if brand and model_matches:
        brand_words = re.findall(r'[A-Za-z]+', brand)
        selected_brand = ' '.join(brand_words[:2])

        for model in model_matches:
            model = model.strip()
            search_query = f"{selected_brand} {model}"
            search_url = f"https://www.coupang.com/np/search?component=&q={search_query.replace(' ', '+')}"
            result.append((model, search_url))

    return result


#process_url ํ•จ์ˆ˜ ์ˆ˜์ • (๋ชจ๋ธ๋ช… ์—ฌ๋Ÿฌ๊ฐœ ๋Œ€์‘, ์ด๋ฏธ์ง€ ๊ฒฝ๋กœ ๋ฐ˜ํ™˜)

def process_url(tistory_url, output_dir):
    result = []
    image_paths = []
    
    try:
        response = requests.get(tistory_url)
        soup = BeautifulSoup(response.text, 'html.parser')

        img_tags = soup.find_all('img')
        img_urls = [img['src'] for img in img_tags if 'src' in img.attrs]

        # (์ค‘๋žต, ์—ฌ๊ธฐ์— ์ด๋ฏธ์ง€ ์ฒ˜๋ฆฌ, ํ…์ŠคํŠธ ์ถ”์ถœ ๋กœ์ง ์ด์–ด์„œ)
        
    except Exception as e:
        result.append(f"โŒ URL ์ฒ˜๋ฆฌ ์‹คํŒจ: {tistory_url} / ์—๋Ÿฌ: {e}")

    return result, image_paths

    
#์—ฌ๋Ÿฌ URL ์ฒ˜๋ฆฌ ๋ฐ ํŒŒ์ผ ์ €์žฅ

def process_multiple_urls(urls_text, output_dir):
    urls = [url.strip() for url in urls_text.strip().splitlines() if url.strip()]
    all_results = []
    all_images = []

    for url in urls:
        results, image_paths = process_url(url, output_dir)
        all_results.extend(results)
        all_images.extend(image_paths)
        all_results.append("-" * 50)

    final_text = "\n".join(all_results)

    # ๊ฒฐ๊ณผ ์ €์žฅ
    os.makedirs(output_dir, exist_ok=True)
    result_file_path = os.path.join(output_dir, "result_log.txt")
    with open(result_file_path, 'w', encoding='utf-8-sig') as f:
        f.write(final_text)

    return final_text, result_file_path, all_images
    
# Gradio ์•ฑ ๊ตฌ์„ฑ

with gr.Blocks() as app:
    gr.Markdown("# โœจ ํ‹ฐ์Šคํ† ๋ฆฌ ์ž๋™ ์ฒ˜๋ฆฌ๊ธฐ โœจ\n- ์ด๋ฏธ์ง€ ๋ฐฐ๊ฒฝ ์ œ๊ฑฐ + ์›Œํ„ฐ๋งˆํฌ ์‚ฝ์ž…\n- ์ œํ’ˆ๋ช… ์ถ”์ถœ ํ›„ ์ฟ ํŒก ๊ฒ€์ƒ‰ ๋งํฌ ์ƒ์„ฑ\n- ๋‹ค์šด๋กœ๋“œ ๊ธฐ๋Šฅ ์ถ”๊ฐ€!")

    with gr.Row():
        urls_input = gr.Textbox(label="ํ‹ฐ์Šคํ† ๋ฆฌ ๊ฒŒ์‹œ๊ธ€ URL ์—ฌ๋Ÿฌ ๊ฐœ (์ค„๋ฐ”๊ฟˆํ•ด์„œ ์ž…๋ ฅ)", lines=5, placeholder="https://example1.com\nhttps://example2.com")
    with gr.Row():
        output_folder = gr.Textbox(label="์ €์žฅํ•  ํด๋” ๊ฒฝ๋กœ", value=DEFAULT_DOWNLOAD_DIR)

    process_button = gr.Button("์ฒ˜๋ฆฌ ์‹œ์ž‘ ๐Ÿš€")
    output_text = gr.Textbox(label="๊ฒฐ๊ณผ", lines=20)
    download_log = gr.File(label="๊ฒฐ๊ณผ ๋กœ๊ทธ ๋‹ค์šด๋กœ๋“œ")
    download_images = gr.Files(label="๋‹ค์šด๋กœ๋“œํ•œ ์ด๋ฏธ์ง€๋“ค", file_types=['.png'], interactive=True)  # โ† ์—ฌ๊ธฐ ์ˆ˜์ •!!

    process_button.click(
        fn=process_multiple_urls,
        inputs=[urls_input, output_folder],
        outputs=[output_text, download_log, download_images]
    )

app.launch()