Spaces:

ZORYE
/

CPS

Sleeping

File size: 4,702 Bytes

55bc956
 
 
 
 
 
 
 
0d2efcb
55bc956
5312c47
55bc956
 
5312c47
55bc956
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5312c47
55bc956
 
 
 
 
 
 
 
 
 
 
 
 
 
f00ed24
27072e8
 
 
0d2efcb
27072e8
 
 
 
0d2efcb
 
 
52783a1
 
 
0d2efcb
52783a1
0d2efcb
52783a1
0d2efcb
52783a1
 
 
0d2efcb
52783a1
 
 
 
 
 
 
0d2efcb
 
 
 
4dfce56
 
 
 
 
 
 
0d2efcb
4dfce56
 
0d2efcb
4dfce56
 
 
 
0d2efcb
4dfce56
048ff73
 
0d2efcb
 
048ff73
 
 
6caff03
0d2efcb
048ff73
 
 
 
 
0d2efcb
048ff73
0d2efcb
048ff73
 
 
 
 
0d2efcb
048ff73
 
b11fe8e
0d2efcb
40e0500
 
0d2efcb
b11fe8e
 
 
 
0d2efcb
40e0500
 
 
b11fe8e
0d2efcb
40e0500
 
 
 
 
0d2efcb
b11fe8e

import requests
from bs4 import BeautifulSoup
import re
import os
from rembg import remove
from PIL import Image, ImageDraw, ImageFont
import io
import gradio as gr
import shutil

# ▼ 기본 다운로드 폴더
DEFAULT_DOWNLOAD_DIR = "./downloads"

# 텍스트 전처리 함수
def clean_text(text):
    cleaned = re.sub(r'[^\w\s가-힣]', '', text)
    return cleaned

def select_language(text):
    hangul = re.findall(r'[가-힣]+', text)
    if hangul:
        return ' '.join(hangul)
    else:
        english = re.findall(r'[A-Za-z]+', text)
        return ' '.join(english)

# 워터마크 삽입 함수
def add_watermark(image, text="gooditem gooditem gooditem gooditem gooditem"):
    watermark = Image.new('RGBA', image.size, (0,0,0,0))
    draw = ImageDraw.Draw(watermark)

    font_size = int(image.size[0] * 0.04)
    try:
        font = ImageFont.truetype("ariali.ttf", font_size)
    except:
        font = ImageFont.load_default()

    bbox = draw.textbbox((0, 0), text, font=font)
    text_width = bbox[2] - bbox[0]
    text_height = bbox[3] - bbox[1]

    x = (image.size[0] - text_width) // 2
    y = (image.size[1] - text_height) // 2

    draw.text((x, y), text, font=font, fill=(255, 255, 255, 31))
    combined = Image.alpha_composite(image.convert('RGBA'), watermark)
    return combined


def extract_info_block(article_text):
    pattern = r'(KBS2.*?<모델명>.*?)(?:\n\n|$)'
    match = re.search(pattern, article_text, re.DOTALL)

    if match:
        return match.group(1).strip()
    else:
        return None

#제품명 추출 함수 수정 (여러 모델명 처리)

def extract_product_info(article_text):
    brand_match = re.search(r'<브랜드>[ \t]*([^\n]+)', article_text)
    brand = brand_match.group(1).strip() if brand_match else None

    model_matches = re.findall(r'<모델명>[ \t]*([^\n]+)', article_text)

    result = []

    if brand and model_matches:
        brand_words = re.findall(r'[A-Za-z]+', brand)
        selected_brand = ' '.join(brand_words[:2])

        for model in model_matches:
            model = model.strip()
            search_query = f"{selected_brand} {model}"
            search_url = f"https://www.coupang.com/np/search?component=&q={search_query.replace(' ', '+')}"
            result.append((model, search_url))

    return result


#process_url 함수 수정 (모델명 여러개 대응, 이미지 경로 반환)

def process_url(tistory_url, output_dir):
    result = []
    image_paths = []
    
    try:
        response = requests.get(tistory_url)
        soup = BeautifulSoup(response.text, 'html.parser')

        img_tags = soup.find_all('img')
        img_urls = [img['src'] for img in img_tags if 'src' in img.attrs]

        # (중략, 여기에 이미지 처리, 텍스트 추출 로직 이어서)
        
    except Exception as e:
        result.append(f"❌ URL 처리 실패: {tistory_url} / 에러: {e}")

    return result, image_paths

    
#여러 URL 처리 및 파일 저장

def process_multiple_urls(urls_text, output_dir):
    urls = [url.strip() for url in urls_text.strip().splitlines() if url.strip()]
    all_results = []
    all_images = []

    for url in urls:
        results, image_paths = process_url(url, output_dir)
        all_results.extend(results)
        all_images.extend(image_paths)
        all_results.append("-" * 50)

    final_text = "\n".join(all_results)

    # 결과 저장
    os.makedirs(output_dir, exist_ok=True)
    result_file_path = os.path.join(output_dir, "result_log.txt")
    with open(result_file_path, 'w', encoding='utf-8-sig') as f:
        f.write(final_text)

    return final_text, result_file_path, all_images
    
# Gradio 앱 구성

with gr.Blocks() as app:
    gr.Markdown("# ✨ 티스토리 자동 처리기 ✨\n- 이미지 배경 제거 + 워터마크 삽입\n- 제품명 추출 후 쿠팡 검색 링크 생성\n- 다운로드 기능 추가!")

    with gr.Row():
        urls_input = gr.Textbox(label="티스토리 게시글 URL 여러 개 (줄바꿈해서 입력)", lines=5, placeholder="https://example1.com\nhttps://example2.com")
    with gr.Row():
        output_folder = gr.Textbox(label="저장할 폴더 경로", value=DEFAULT_DOWNLOAD_DIR)

    process_button = gr.Button("처리 시작 🚀")
    output_text = gr.Textbox(label="결과", lines=20)
    download_log = gr.File(label="결과 로그 다운로드")
    download_images = gr.Files(label="다운로드한 이미지들", file_types=['.png'], interactive=True)  # ← 여기 수정!!

    process_button.click(
        fn=process_multiple_urls,
        inputs=[urls_input, output_folder],
        outputs=[output_text, download_log, download_images]
    )

app.launch()