import requests from bs4 import BeautifulSoup import re import os from rembg import remove from PIL import Image, ImageDraw, ImageFont import io import gradio as gr import shutil # ▼ 기본 다운로드 폴더 DEFAULT_DOWNLOAD_DIR = "./downloads" # 텍스트 전처리 함수 def clean_text(text): cleaned = re.sub(r'[^\w\s가-힣]', '', text) return cleaned def select_language(text): hangul = re.findall(r'[가-힣]+', text) if hangul: return ' '.join(hangul) else: english = re.findall(r'[A-Za-z]+', text) return ' '.join(english) # 워터마크 삽입 함수 def add_watermark(image, text="gooditem gooditem gooditem gooditem gooditem"): watermark = Image.new('RGBA', image.size, (0,0,0,0)) draw = ImageDraw.Draw(watermark) font_size = int(image.size[0] * 0.04) try: font = ImageFont.truetype("ariali.ttf", font_size) except: font = ImageFont.load_default() bbox = draw.textbbox((0, 0), text, font=font) text_width = bbox[2] - bbox[0] text_height = bbox[3] - bbox[1] x = (image.size[0] - text_width) // 2 y = (image.size[1] - text_height) // 2 draw.text((x, y), text, font=font, fill=(255, 255, 255, 31)) combined = Image.alpha_composite(image.convert('RGBA'), watermark) return combined def extract_info_block(article_text): pattern = r'(KBS2.*?<모델명>.*?)(?:\n\n|$)' match = re.search(pattern, article_text, re.DOTALL) if match: return match.group(1).strip() else: return None #제품명 추출 함수 수정 (여러 모델명 처리) def extract_product_info(article_text): brand_match = re.search(r'<브랜드>[ \t]*([^\n]+)', article_text) brand = brand_match.group(1).strip() if brand_match else None model_matches = re.findall(r'<모델명>[ \t]*([^\n]+)', article_text) result = [] if brand and model_matches: brand_words = re.findall(r'[A-Za-z]+', brand) selected_brand = ' '.join(brand_words[:2]) for model in model_matches: model = model.strip() search_query = f"{selected_brand} {model}" search_url = f"https://www.coupang.com/np/search?component=&q={search_query.replace(' ', '+')}" result.append((model, search_url)) return result #process_url 함수 수정 (모델명 여러개 대응, 이미지 경로 반환) def process_url(tistory_url, output_dir): result = [] image_paths = [] try: response = requests.get(tistory_url) soup = BeautifulSoup(response.text, 'html.parser') img_tags = soup.find_all('img') img_urls = [img['src'] for img in img_tags if 'src' in img.attrs] # (중략, 여기에 이미지 처리, 텍스트 추출 로직 이어서) except Exception as e: result.append(f"❌ URL 처리 실패: {tistory_url} / 에러: {e}") return result, image_paths #여러 URL 처리 및 파일 저장 def process_multiple_urls(urls_text, output_dir): urls = [url.strip() for url in urls_text.strip().splitlines() if url.strip()] all_results = [] all_images = [] for url in urls: results, image_paths = process_url(url, output_dir) all_results.extend(results) all_images.extend(image_paths) all_results.append("-" * 50) final_text = "\n".join(all_results) # 결과 저장 os.makedirs(output_dir, exist_ok=True) result_file_path = os.path.join(output_dir, "result_log.txt") with open(result_file_path, 'w', encoding='utf-8-sig') as f: f.write(final_text) return final_text, result_file_path, all_images # Gradio 앱 구성 with gr.Blocks() as app: gr.Markdown("# ✨ 티스토리 자동 처리기 ✨\n- 이미지 배경 제거 + 워터마크 삽입\n- 제품명 추출 후 쿠팡 검색 링크 생성\n- 다운로드 기능 추가!") with gr.Row(): urls_input = gr.Textbox(label="티스토리 게시글 URL 여러 개 (줄바꿈해서 입력)", lines=5, placeholder="https://example1.com\nhttps://example2.com") with gr.Row(): output_folder = gr.Textbox(label="저장할 폴더 경로", value=DEFAULT_DOWNLOAD_DIR) process_button = gr.Button("처리 시작 🚀") output_text = gr.Textbox(label="결과", lines=20) download_log = gr.File(label="결과 로그 다운로드") download_images = gr.Files(label="다운로드한 이미지들", file_types=['.png'], interactive=True) # ← 여기 수정!! process_button.click( fn=process_multiple_urls, inputs=[urls_input, output_folder], outputs=[output_text, download_log, download_images] ) app.launch()