File size: 4,702 Bytes
55bc956 0d2efcb 55bc956 5312c47 55bc956 5312c47 55bc956 5312c47 55bc956 f00ed24 27072e8 0d2efcb 27072e8 0d2efcb 52783a1 0d2efcb 52783a1 0d2efcb 52783a1 0d2efcb 52783a1 0d2efcb 52783a1 0d2efcb 4dfce56 0d2efcb 4dfce56 0d2efcb 4dfce56 0d2efcb 4dfce56 048ff73 0d2efcb 048ff73 6caff03 0d2efcb 048ff73 0d2efcb 048ff73 0d2efcb 048ff73 0d2efcb 048ff73 b11fe8e 0d2efcb 40e0500 0d2efcb b11fe8e 0d2efcb 40e0500 b11fe8e 0d2efcb 40e0500 0d2efcb b11fe8e | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 | import requests
from bs4 import BeautifulSoup
import re
import os
from rembg import remove
from PIL import Image, ImageDraw, ImageFont
import io
import gradio as gr
import shutil
# โผ ๊ธฐ๋ณธ ๋ค์ด๋ก๋ ํด๋
DEFAULT_DOWNLOAD_DIR = "./downloads"
# ํ
์คํธ ์ ์ฒ๋ฆฌ ํจ์
def clean_text(text):
cleaned = re.sub(r'[^\w\s๊ฐ-ํฃ]', '', text)
return cleaned
def select_language(text):
hangul = re.findall(r'[๊ฐ-ํฃ]+', text)
if hangul:
return ' '.join(hangul)
else:
english = re.findall(r'[A-Za-z]+', text)
return ' '.join(english)
# ์ํฐ๋งํฌ ์ฝ์
ํจ์
def add_watermark(image, text="gooditem gooditem gooditem gooditem gooditem"):
watermark = Image.new('RGBA', image.size, (0,0,0,0))
draw = ImageDraw.Draw(watermark)
font_size = int(image.size[0] * 0.04)
try:
font = ImageFont.truetype("ariali.ttf", font_size)
except:
font = ImageFont.load_default()
bbox = draw.textbbox((0, 0), text, font=font)
text_width = bbox[2] - bbox[0]
text_height = bbox[3] - bbox[1]
x = (image.size[0] - text_width) // 2
y = (image.size[1] - text_height) // 2
draw.text((x, y), text, font=font, fill=(255, 255, 255, 31))
combined = Image.alpha_composite(image.convert('RGBA'), watermark)
return combined
def extract_info_block(article_text):
pattern = r'(KBS2.*?<๋ชจ๋ธ๋ช
>.*?)(?:\n\n|$)'
match = re.search(pattern, article_text, re.DOTALL)
if match:
return match.group(1).strip()
else:
return None
#์ ํ๋ช
์ถ์ถ ํจ์ ์์ (์ฌ๋ฌ ๋ชจ๋ธ๋ช
์ฒ๋ฆฌ)
def extract_product_info(article_text):
brand_match = re.search(r'<๋ธ๋๋>[ \t]*([^\n]+)', article_text)
brand = brand_match.group(1).strip() if brand_match else None
model_matches = re.findall(r'<๋ชจ๋ธ๋ช
>[ \t]*([^\n]+)', article_text)
result = []
if brand and model_matches:
brand_words = re.findall(r'[A-Za-z]+', brand)
selected_brand = ' '.join(brand_words[:2])
for model in model_matches:
model = model.strip()
search_query = f"{selected_brand} {model}"
search_url = f"https://www.coupang.com/np/search?component=&q={search_query.replace(' ', '+')}"
result.append((model, search_url))
return result
#process_url ํจ์ ์์ (๋ชจ๋ธ๋ช
์ฌ๋ฌ๊ฐ ๋์, ์ด๋ฏธ์ง ๊ฒฝ๋ก ๋ฐํ)
def process_url(tistory_url, output_dir):
result = []
image_paths = []
try:
response = requests.get(tistory_url)
soup = BeautifulSoup(response.text, 'html.parser')
img_tags = soup.find_all('img')
img_urls = [img['src'] for img in img_tags if 'src' in img.attrs]
# (์ค๋ต, ์ฌ๊ธฐ์ ์ด๋ฏธ์ง ์ฒ๋ฆฌ, ํ
์คํธ ์ถ์ถ ๋ก์ง ์ด์ด์)
except Exception as e:
result.append(f"โ URL ์ฒ๋ฆฌ ์คํจ: {tistory_url} / ์๋ฌ: {e}")
return result, image_paths
#์ฌ๋ฌ URL ์ฒ๋ฆฌ ๋ฐ ํ์ผ ์ ์ฅ
def process_multiple_urls(urls_text, output_dir):
urls = [url.strip() for url in urls_text.strip().splitlines() if url.strip()]
all_results = []
all_images = []
for url in urls:
results, image_paths = process_url(url, output_dir)
all_results.extend(results)
all_images.extend(image_paths)
all_results.append("-" * 50)
final_text = "\n".join(all_results)
# ๊ฒฐ๊ณผ ์ ์ฅ
os.makedirs(output_dir, exist_ok=True)
result_file_path = os.path.join(output_dir, "result_log.txt")
with open(result_file_path, 'w', encoding='utf-8-sig') as f:
f.write(final_text)
return final_text, result_file_path, all_images
# Gradio ์ฑ ๊ตฌ์ฑ
with gr.Blocks() as app:
gr.Markdown("# โจ ํฐ์คํ ๋ฆฌ ์๋ ์ฒ๋ฆฌ๊ธฐ โจ\n- ์ด๋ฏธ์ง ๋ฐฐ๊ฒฝ ์ ๊ฑฐ + ์ํฐ๋งํฌ ์ฝ์
\n- ์ ํ๋ช
์ถ์ถ ํ ์ฟ ํก ๊ฒ์ ๋งํฌ ์์ฑ\n- ๋ค์ด๋ก๋ ๊ธฐ๋ฅ ์ถ๊ฐ!")
with gr.Row():
urls_input = gr.Textbox(label="ํฐ์คํ ๋ฆฌ ๊ฒ์๊ธ URL ์ฌ๋ฌ ๊ฐ (์ค๋ฐ๊ฟํด์ ์
๋ ฅ)", lines=5, placeholder="https://example1.com\nhttps://example2.com")
with gr.Row():
output_folder = gr.Textbox(label="์ ์ฅํ ํด๋ ๊ฒฝ๋ก", value=DEFAULT_DOWNLOAD_DIR)
process_button = gr.Button("์ฒ๋ฆฌ ์์ ๐")
output_text = gr.Textbox(label="๊ฒฐ๊ณผ", lines=20)
download_log = gr.File(label="๊ฒฐ๊ณผ ๋ก๊ทธ ๋ค์ด๋ก๋")
download_images = gr.Files(label="๋ค์ด๋ก๋ํ ์ด๋ฏธ์ง๋ค", file_types=['.png'], interactive=True) # โ ์ฌ๊ธฐ ์์ !!
process_button.click(
fn=process_multiple_urls,
inputs=[urls_input, output_folder],
outputs=[output_text, download_log, download_images]
)
app.launch()
|