Spaces:

ZORYE
/

CPS

Sleeping

App Files Files Community

ZORYE commited on Apr 26, 2025

Commit

55bc956

verified ·

1 Parent(s): 8ecdd7c

Create app.py

Browse files

Files changed (1) hide show

app.py +154 -0

app.py ADDED Viewed

	@@ -0,0 +1,154 @@

+import requests
+from bs4 import BeautifulSoup
+import re
+import os
+from rembg import remove
+from PIL import Image, ImageDraw, ImageFont
+import io
+import gradio as gr
+# ▼ 기본 다운로드 폴더 (Spaces 호환용 상대 경로)
+DEFAULT_DOWNLOAD_DIR = "./downloads"
+# 텍스트 전처리 함수들
+def clean_text(text):
+    cleaned = re.sub(r'[^\w\s가-힣]', '', text)
+    return cleaned
+def select_language(text):
+    hangul = re.findall(r'[가-힣]+', text)
+    if hangul:
+        return ' '.join(hangul)
+    else:
+        english = re.findall(r'[A-Za-z]+', text)
+        return ' '.join(english)
+# 워터마크 삽입 함수
+def add_watermark(image, text="gooditem gooditem gooditem gooditem gooditem"):
+    watermark = Image.new('RGBA', image.size, (0,0,0,0))
+    draw = ImageDraw.Draw(watermark)
+    font_size = int(image.size[0] * 0.04)
+    try:
+        font = ImageFont.truetype("ariali.ttf", font_size)  # 폰트 파일 없을 경우 기본 폰트 사용
+    except:
+        font = ImageFont.load_default()
+    bbox = draw.textbbox((0, 0), text, font=font)
+    text_width = bbox[2] - bbox[0]
+    text_height = bbox[3] - bbox[1]
+    x = (image.size[0] - text_width) // 2
+    y = (image.size[1] - text_height) // 2
+    draw.text((x, y), text, font=font, fill=(255, 255, 255, 31))
+    combined = Image.alpha_composite(image.convert('RGBA'), watermark)
+    return combined
+# 프로그램/아이템 정보 블록 추출
+def extract_info_block(article_text):
+    # 프로그램명~모델명까지 묶인 블록 추출
+    pattern = r'(KBS2.*?<모델명>.*?)(?:\n\n|$)'
+    match = re.search(pattern, article_text, re.DOTALL)
+    if match:
+        return match.group(1).strip()
+    else:
+        return None
+# 브랜드/모델명으로 검색 쿼리 생성
+def extract_product_info(article_text):
+    brand_match = re.search(r'<브랜드>[ \t]*([^\n]+)', article_text)
+    model_match = re.search(r'<모델명>[ \t]*([^\n]+)', article_text)
+    brand = brand_match.group(1).strip() if brand_match else None
+    model = model_match.group(1).strip() if model_match else None
+    if brand and model:
+        brand_words = re.findall(r'[A-Za-z]+', brand)
+        selected_brand = ' '.join(brand_words[:2])  # 브랜드 앞 두 단어
+        search_query = f"{selected_brand} {model}"
+        search_url = f"https://www.coupang.com/np/search?component=&q={search_query.replace(' ', '+')}"
+        return model, search_url
+    else:
+        return None, None
+# 메인 처리 함수
+def process_url(tistory_url, output_dir):
+    result = []
+    try:
+        response = requests.get(tistory_url)
+        soup = BeautifulSoup(response.text, 'html.parser')
+        img_tags = soup.find_all('img')
+        img_urls = [img['src'] for img in img_tags if 'src' in img.attrs]
+        # 이미지 처리
+        for idx, img_url in enumerate(img_urls):
+            base_name = os.path.basename(img_url).split('?')[0]
+            if base_name.startswith('new_ico'):
+                continue  # new_ico로 시작하는 아이콘은 무시
+            img_data = requests.get(img_url).content
+            input_img = Image.open(io.BytesIO(img_data)).convert('RGBA')
+            input_bytes = io.BytesIO()
+            input_img.save(input_bytes, format='PNG')
+            removed = remove(input_bytes.getvalue())
+            img_no_bg = Image.open(io.BytesIO(removed)).convert('RGBA')
+            final_img = add_watermark(img_no_bg)
+            os.makedirs(output_dir, exist_ok=True)
+            name, ext = os.path.splitext(base_name)
+            save_path = os.path.join(output_dir, f"{name}_{idx+1}_processed.png")
+            final_img.save(save_path)
+            result.append(f"✔️ 저장 완료: {save_path}")
+        # 텍스트 추출
+        article_text = soup.get_text()
+        # 프로그램 정보 블록 추출
+        info_block = extract_info_block(article_text)
+        # 제품 정보 추출
+        item_name, search_url = extract_product_info(article_text)
+        # 정리된 출력 포맷
+        if info_block:
+            result.append("\n===== 프로그램 정보 블록 =====")
+            result.append(info_block)
+            result.append("================================\n")
+        if item_name and search_url:
+            result.append(f"🛍️ 아이템 : {item_name}")
+            result.append(f"🔗 쿠팡 링크: {search_url}")
+    except Exception as e:
+        result.append(f"❌ URL 처리 실패: {tistory_url} / 에러: {e}")
+    return result
+# 여러 URL 처리
+def process_multiple_urls(urls_text, output_dir):
+    urls = [url.strip() for url in urls_text.strip().splitlines() if url.strip()]
+    all_results = []
+    for url in urls:
+        all_results.extend(process_url(url, output_dir))
+        all_results.append("-" * 50)
+    return "\n".join(all_results)
+# Gradio UI 구성
+with gr.Blocks() as app:
+    gr.Markdown("# ✨ 티스토리 자동 처리기 ✨\n- 이미지 배경 제거 + 워터마크 삽입\n- 제품명 추출 후 쿠팡 검색 링크 생성\n- 프로그램 정보 블록 저장")
+    with gr.Row():
+        urls_input = gr.Textbox(label="티스토리 게시글 URL 여러 개 (줄바꿈해서 입력)", lines=5, placeholder="https://example1.com\nhttps://example2.com")
+    with gr.Row():
+        output_folder = gr.Textbox(label="저장할 폴더 경로", value=DEFAULT_DOWNLOAD_DIR)
+    process_button = gr.Button("처리 시작 🚀")
+    output_text = gr.Textbox(label="결과", lines=20)
+    process_button.click(fn=process_multiple_urls, inputs=[urls_input, output_folder], outputs=[output_text])
+app.launch()