Spaces:

ZORYE
/

CPS

Sleeping

App Files Files Community

CPS / app.py

ZORYE

Update app.py

b11fe8e verified 12 months ago

raw

history blame contribute delete

4.7 kB

	import requests
	from bs4 import BeautifulSoup
	import re
	import os
	from rembg import remove
	from PIL import Image, ImageDraw, ImageFont
	import io
	import gradio as gr
	import shutil

	# ▼ 기본 다운로드 폴더
	DEFAULT_DOWNLOAD_DIR = "./downloads"

	# 텍스트 전처리 함수
	def clean_text(text):
	cleaned = re.sub(r'[^\w\s가-힣]', '', text)
	return cleaned

	def select_language(text):
	hangul = re.findall(r'[가-힣]+', text)
	if hangul:
	return ' '.join(hangul)
	else:
	english = re.findall(r'[A-Za-z]+', text)
	return ' '.join(english)

	# 워터마크 삽입 함수
	def add_watermark(image, text="gooditem gooditem gooditem gooditem gooditem"):
	watermark = Image.new('RGBA', image.size, (0,0,0,0))
	draw = ImageDraw.Draw(watermark)

	font_size = int(image.size[0] * 0.04)
	try:
	font = ImageFont.truetype("ariali.ttf", font_size)
	except:
	font = ImageFont.load_default()

	bbox = draw.textbbox((0, 0), text, font=font)
	text_width = bbox[2] - bbox[0]
	text_height = bbox[3] - bbox[1]

	x = (image.size[0] - text_width) // 2
	y = (image.size[1] - text_height) // 2

	draw.text((x, y), text, font=font, fill=(255, 255, 255, 31))
	combined = Image.alpha_composite(image.convert('RGBA'), watermark)
	return combined


	def extract_info_block(article_text):
	pattern = r'(KBS2.?<모델명>.?)(?:\n\n\|$)'
	match = re.search(pattern, article_text, re.DOTALL)

	if match:
	return match.group(1).strip()
	else:
	return None

	#제품명 추출 함수 수정 (여러 모델명 처리)

	def extract_product_info(article_text):
	brand_match = re.search(r'<브랜드>[ \t]*([^\n]+)', article_text)
	brand = brand_match.group(1).strip() if brand_match else None

	model_matches = re.findall(r'<모델명>[ \t]*([^\n]+)', article_text)

	result = []

	if brand and model_matches:
	brand_words = re.findall(r'[A-Za-z]+', brand)
	selected_brand = ' '.join(brand_words[:2])

	for model in model_matches:
	model = model.strip()
	search_query = f"{selected_brand} {model}"
	search_url = f"https://www.coupang.com/np/search?component=&q={search_query.replace(' ', '+')}"
	result.append((model, search_url))

	return result


	#process_url 함수 수정 (모델명 여러개 대응, 이미지 경로 반환)

	def process_url(tistory_url, output_dir):
	result = []
	image_paths = []

	try:
	response = requests.get(tistory_url)
	soup = BeautifulSoup(response.text, 'html.parser')

	img_tags = soup.find_all('img')
	img_urls = [img['src'] for img in img_tags if 'src' in img.attrs]

	# (중략, 여기에 이미지 처리, 텍스트 추출 로직 이어서)

	except Exception as e:
	result.append(f"❌ URL 처리 실패: {tistory_url} / 에러: {e}")

	return result, image_paths


	#여러 URL 처리 및 파일 저장

	def process_multiple_urls(urls_text, output_dir):
	urls = [url.strip() for url in urls_text.strip().splitlines() if url.strip()]
	all_results = []
	all_images = []

	for url in urls:
	results, image_paths = process_url(url, output_dir)
	all_results.extend(results)
	all_images.extend(image_paths)
	all_results.append("-" * 50)

	final_text = "\n".join(all_results)

	# 결과 저장
	os.makedirs(output_dir, exist_ok=True)
	result_file_path = os.path.join(output_dir, "result_log.txt")
	with open(result_file_path, 'w', encoding='utf-8-sig') as f:
	f.write(final_text)

	return final_text, result_file_path, all_images

	# Gradio 앱 구성

	with gr.Blocks() as app:
	gr.Markdown("# ✨ 티스토리 자동 처리기 ✨\n- 이미지 배경 제거 + 워터마크 삽입\n- 제품명 추출 후 쿠팡 검색 링크 생성\n- 다운로드 기능 추가!")

	with gr.Row():
	urls_input = gr.Textbox(label="티스토리 게시글 URL 여러 개 (줄바꿈해서 입력)", lines=5, placeholder="https://example1.com\nhttps://example2.com")
	with gr.Row():
	output_folder = gr.Textbox(label="저장할 폴더 경로", value=DEFAULT_DOWNLOAD_DIR)

	process_button = gr.Button("처리 시작 🚀")
	output_text = gr.Textbox(label="결과", lines=20)
	download_log = gr.File(label="결과 로그 다운로드")
	download_images = gr.Files(label="다운로드한 이미지들", file_types=['.png'], interactive=True) # ← 여기 수정!!

	process_button.click(
	fn=process_multiple_urls,
	inputs=[urls_input, output_folder],
	outputs=[output_text, download_log, download_images]
	)

	app.launch()