G3YGSDE / info_blog.py
ssboost's picture
Update info_blog.py
161ac86 verified
import os
import random
import re
import requests
import logging
from bs4 import BeautifulSoup
import html
import markdown2
from dotenv import load_dotenv # ์ถ”๊ฐ€๋œ ๋ถ€๋ถ„
# ํ™˜๊ฒฝ ๋ณ€์ˆ˜ ๋กœ๋“œ
load_dotenv() # ์ถ”๊ฐ€๋œ ๋ถ€๋ถ„
# ๋กœ๊น… ์„ค์ •
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
# ์ƒ์ˆ˜ ์ •์˜
TARGET_CHAR_LENGTH = 4000 # ์ •๋ณด์„ฑ ๋ธ”๋กœ๊ทธ ์ตœ์†Œ ๊ธ€์ž์ˆ˜
MIN_SECTION_LENGTH = 600 # ๊ฐ ์†Œ์ œ๋ชฉ ์•„๋ž˜ ์ตœ์†Œ ๊ธ€์ž์ˆ˜
MAX_TOKENS = 15000 # Gemini API ์ตœ๋Œ€ ํ† ํฐ ์ˆ˜
TEMPERATURE = 0.85 # Gemini API ์˜จ๋„ ๊ฐ’
TOP_P = 0.9 # Gemini API top_p ๊ฐ’
# API ๊ด€๋ จ ์„ค์ •
API_BASE_URL = os.getenv("API_BASE_URL", "").rstrip('/')
API_KEY = os.getenv("API_KEY", "")
API_HEADERS = {
"x-api-key": API_KEY,
"content-type": "application/json"
}
# API ํ‚ค ์„ค์ •
def load_gemini_api_keys():
# ์—ฌ๋Ÿฌ ๊ฐœ์˜ API ํ‚ค ๋กœ๋“œ
api_keys = [
os.getenv("GEMINI_API_KEY_1", ""),
os.getenv("GEMINI_API_KEY_2", ""),
os.getenv("GEMINI_API_KEY_3", ""),
os.getenv("GEMINI_API_KEY_4", ""),
os.getenv("GEMINI_API_KEY_5", "")
]
# ๋นˆ ํ‚ค ์ œ๊ฑฐ
api_keys = [key for key in api_keys if key]
# ๊ธฐ๋ณธ ํ‚ค๊ฐ€ ์—†์œผ๋ฉด GEMINI_API_KEY ํ™˜๊ฒฝ๋ณ€์ˆ˜ ์‚ฌ์šฉ
if not api_keys:
default_key = os.getenv("GEMINI_API_KEY")
if default_key:
api_keys.append(default_key)
if not api_keys:
raise ValueError("API ํ‚ค๊ฐ€ ์„ค์ •๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค. .env ํŒŒ์ผ์— GEMINI_API_KEY ๋˜๋Š” GEMINI_API_KEY_1~5๋ฅผ ์ถ”๊ฐ€ํ•˜์„ธ์š”.")
logging.info(f"์ด {len(api_keys)}๊ฐœ์˜ API ํ‚ค๊ฐ€ ๋กœ๋“œ๋˜์—ˆ์Šต๋‹ˆ๋‹ค.")
return api_keys
# ๋งค๋ฒˆ ๋žœ๋คํ•˜๊ฒŒ API ํ‚ค ์„ ํƒํ•˜๋Š” ํ•จ์ˆ˜
def get_random_gemini_client():
"""๋žœ๋ค API ํ‚ค๋กœ Gemini ํด๋ผ์ด์–ธํŠธ ์ดˆ๊ธฐํ™” ๋ฐ ๋ฐ˜ํ™˜"""
from google import genai
from google.genai import types
api_keys = load_gemini_api_keys()
if not api_keys:
raise ValueError("์‚ฌ์šฉ ๊ฐ€๋Šฅํ•œ API ํ‚ค๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.")
# ๋žœ๋คํ•˜๊ฒŒ API ํ‚ค ์ธ๋ฑ์Šค ์„ ํƒ
random_index = random.randint(0, len(api_keys) - 1)
selected_key = api_keys[random_index]
logging.info(f"๋žœ๋ค API ํ‚ค ์„ ํƒ: ์ธ๋ฑ์Šค {random_index + 1}")
# Gemini ํด๋ผ์ด์–ธํŠธ ์ดˆ๊ธฐํ™” ๋ฐ ๋ฐ˜ํ™˜
return genai.Client(api_key=selected_key)
# --- Google Gemini SDK ์ดˆ๊ธฐํ™” ์ˆ˜์ • ---
# ๊ธฐ์กด ์ฝ”๋“œ ์ฃผ์„ ์ฒ˜๋ฆฌ:
# from google import genai
# from google.genai import types
# client = genai.Client(api_key=gemini_api_key)
# ํ•จ์ˆ˜ ํ˜ธ์ถœ ๋ฐฉ์‹์œผ๋กœ ๋ณ€๊ฒฝ
def fetch_references(topic):
"""API๋ฅผ ํ†ตํ•ด ์ฐธ๊ณ  ๋ธ”๋กœ๊ทธ ๊ธ€ ๊ฐ€์ ธ์˜ค๊ธฐ"""
try:
if not topic or not topic.strip():
return ["๊ฒ€์ƒ‰ ํ‚ค์›Œ๋“œ๋ฅผ ์ž…๋ ฅํ•ด์ฃผ์„ธ์š”."] * 3
encoded_keyword = requests.utils.quote(topic.strip())
url = f"{API_BASE_URL}/search/{encoded_keyword}"
logging.info(f"API ํ˜ธ์ถœ URL: {url}")
logging.info(f"API ํ—ค๋”: {API_HEADERS}")
response = requests.get(url, headers=API_HEADERS)
logging.info(f"API ์‘๋‹ต ์ƒํƒœ: {response.status_code}")
logging.info(f"API ์‘๋‹ต ๋‚ด์šฉ: {response.text}")
if response.ok:
result = response.json()
return [
result.get("reference1", "์ฐธ๊ณ ๊ธ€1์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."),
result.get("reference2", "์ฐธ๊ณ ๊ธ€2์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."),
result.get("reference3", "์ฐธ๊ณ ๊ธ€3์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.")
]
else:
return [f"API ์˜ค๋ฅ˜: {response.text}"] * 3
except Exception as e:
return [f"์ฐธ๊ณ ๊ธ€ ์ˆ˜์ง‘ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}"] * 3
def fetch_crawl_results(query):
"""API๋ฅผ ํ†ตํ•ด ๋ธ”๋กœ๊ทธ ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ ๊ฐ€์ ธ์˜ค๊ธฐ (์ฐธ๊ณ ๊ธ€ 3๊ฐœ)"""
references = fetch_references(query)
return references[0], references[1], references[2]
def get_style_prompt(style="์นœ๊ทผํ•œ"):
prompts = {
"์นœ๊ทผํ•œ": """
[์นœ๊ทผํ•œ ์ •๋ณด์„ฑ ๋ธ”๋กœ๊ทธ ์Šคํƒ€์ผ ๊ฐ€์ด๋“œ]
1. ํ†ค๊ณผ ์–ด์กฐ
- ๋Œ€ํ™”ํ•˜๋“ฏ ํŽธ์•ˆํ•˜๊ณ  ์นœ๊ทผํ•œ ๋งํˆฌ ์‚ฌ์šฉ
- ์ฃผ์ œ์— ๋Œ€ํ•œ ๊ด€์‹ฌ๊ณผ ํ˜ธ๊ธฐ์‹ฌ์„ ๋‹ด์€ ํ‘œํ˜„ ์‚ฌ์šฉ
2. ๋ฌธ์žฅ ๋ฐ ์–ดํˆฌ
- ๋ฐ˜๋“œ์‹œ 'ํ•ด์š”์ฒด'๋กœ ์ž‘์„ฑ, ์ ˆ๋Œ€ '์Šต๋‹ˆ๋‹ค'์ฒด๋ฅผ ์‚ฌ์šฉํ•˜์ง€ ๋ง ๊ฒƒ
- '~์š”'๋กœ ๋๋‚˜๋„๋ก ์ž‘์„ฑ, '~๋‹ค'๋กœ ๋๋‚˜์ง€ ์•Š๊ฒŒ ํ•˜๋ผ
- ๊ตฌ์–ด์ฒด ํ‘œํ˜„ ์‚ฌ์šฉ (์˜ˆ: "~ํ–ˆ์–ด์š”", "~์ธ ๊ฒƒ ๊ฐ™์•„์š”")
- ์ด๋ชจํ‹ฐ์ฝ˜์€ ์‚ฌ์šฉํ•˜์ง€ ๋งˆ์„ธ์š”
3. ์šฉ์–ด ๋ฐ ์„ค๋ช… ๋ฐฉ์‹
- ์ „๋ฌธ ์šฉ์–ด๋Š” ์‰ฌ์šด ๋‹จ์–ด๋กœ ํ’€์–ด์„œ ์„ค๋ช…
- ๋น„์œ ๋‚˜ ์€์œ ๋ฅผ ํ™œ์šฉํ•˜์—ฌ ๋ณต์žกํ•œ ๊ฐœ๋… ์„ค๋ช…
- ์ˆ˜์‚ฌ์˜๋ฌธ๋ฌธ ํ™œ์šฉํ•˜์—ฌ ๋…์ž์™€ ์†Œํ†ตํ•˜๋Š” ๋А๋‚Œ ์ฃผ๊ธฐ (์˜ˆ: "์—ฌ๋Ÿฌ๋ถ„๋„ ์ด๋Ÿฐ ์ƒ๊ฐ ํ•ด๋ณด์…จ๋‚˜์š”?")
- ๊ตฌ์ฒด์  ์‚ฌ๋ก€์™€ ์˜ˆ์‹œ๋ฅผ ํ†ตํ•œ ๊ฐœ๋… ์„ค๋ช…
4. ์ •๋ณด ์ „๋‹ฌ ๋ฐฉ์‹
- ๊ฐœ์ธ์ ์ธ ๊ด€์ ์— ๋…น์—ฌ ์ž์—ฐ์Šค๋Ÿฝ๊ฒŒ ์ •๋ณด ์ „๋‹ฌ
- ๋ณต์žกํ•œ ๊ฐœ๋…์„ ๋‹จ๊ณ„์ ์œผ๋กœ ์„ค๋ช…
- ๋…์ž๊ฐ€ ์‹ค์ œ๋กœ ํ™œ์šฉํ•  ์ˆ˜ ์žˆ๋Š” ์‹ค์šฉ์  ์ •๋ณด ์ œ๊ณต
5. ๋…์ž์™€์˜ ์ƒํ˜ธ์ž‘์šฉ
- ๋…์ž์˜ ์˜๊ฒฌ์„ ๋ฌผ์–ด๋ณด๋Š” ์งˆ๋ฌธ ํฌํ•จ
- ์‹ค์ƒํ™œ์— ์ ์šฉํ•  ์ˆ˜ ์žˆ๋Š” ํŒ์ด๋‚˜ ์กฐ์–ธ ์ œ๊ณต
์ฃผ์˜์‚ฌํ•ญ: ๋„ˆ๋ฌด ๊ฐ€๋ฒผ์šด ํ†ค์€ ์ง€์–‘ํ•˜๊ณ , ์ฃผ์ œ์˜ ์ „๋ฌธ์„ฑ๊ณผ ์‹ ๋ขฐ์„ฑ์„ ํ•ด์น˜์ง€ ์•Š๋Š” ์„ ์—์„œ ์นœ๊ทผํ•จ ์œ ์ง€
""",
"์ผ๋ฐ˜์ ์ธ": """
#์ผ๋ฐ˜์ ์ธ ์ •๋ณด์„ฑ ๋ธ”๋กœ๊ทธ ์Šคํƒ€์ผ ๊ฐ€์ด๋“œ
1. ํ†ค๊ณผ ์–ด์กฐ
- ์ค‘๋ฆฝ์ ์ด๊ณ  ๊ฐ๊ด€์ ์ธ ํ†ค ์œ ์ง€
- ์ ์ ˆํ•œ ์กด๋Œ“๋ง ์‚ฌ์šฉ (์˜ˆ: "~ํ•ฉ๋‹ˆ๋‹ค", "~์ž…๋‹ˆ๋‹ค")
- ์ •๋ณด ์ „๋‹ฌ ์ค‘์‹ฌ์˜ ๋ช…ํ™•ํ•œ ์–ดํˆฌ
2. ๋‚ด์šฉ ๊ตฌ์กฐ ๋ฐ ์ „๊ฐœ
- ๋ช…ํ™•ํ•œ ์ฃผ์ œ ์†Œ๊ฐœ๋กœ ์‹œ์ž‘
- ๋…ผ๋ฆฌ์ ์ธ ์ˆœ์„œ๋กœ ์ •๋ณด ์ „๊ฐœ (๋ฐฐ๊ฒฝ โ†’ ์ฃผ์š” ๊ฐœ๋… โ†’ ๋ถ„์„ โ†’ ์ ์šฉ ๋“ฑ)
- ํ•ต์‹ฌ ํฌ์ธํŠธ๋ฅผ ๊ฐ•์กฐํ•˜๋Š” ์†Œ์ œ๋ชฉ ํ™œ์šฉ
- ์ ์ ˆํ•œ ๊ธธ์ด์˜ ๋‹จ๋ฝ์œผ๋กœ ๊ตฌ์„ฑ
3. ์šฉ์–ด ๋ฐ ์„ค๋ช… ๋ฐฉ์‹
- ์ผ๋ฐ˜์ ์œผ๋กœ ์ดํ•ดํ•˜๊ธฐ ์‰ฌ์šด ์šฉ์–ด ์„ ํƒ
- ํ•„์š”์‹œ ์ „๋ฌธ ์šฉ์–ด์— ๊ฐ„๋‹จํ•œ ์„ค๋ช… ์ถ”๊ฐ€
- ๊ฐ๊ด€์ ์ธ ์ •๋ณด ์ œ๊ณต์— ์ค‘์ 
- ๊ท ํ˜• ์žกํžŒ ์‹œ๊ฐ์—์„œ ๋‹ค์–‘ํ•œ ๊ด€์  ์ œ์‹œ
4. ์ •๋ณด ์ „๋‹ฌ ๋ฐฉ์‹
- ์ฃผ์ œ์˜ ๊ธฐ๋ณธ ๊ฐœ๋…๊ณผ ์›๋ฆฌ ๋ช…ํ™•ํ•˜๊ฒŒ ์ œ๊ณต
- ๊ตฌ์ฒด์ ์ธ ์˜ˆ์‹œ์™€ ์‚ฌ๋ก€ ํฌํ•จ
- ์‹œ๊ฐ์  ์š”์†Œ(๊ทธ๋ž˜ํ”„, ํ‘œ ๋“ฑ) ์„ค๋ช… ๋ฐฉ์‹
- ์ตœ์‹  ์—ฐ๊ตฌ๋‚˜ ๋™ํ–ฅ ์ฐธ๊ณ 
5. ๋…์ž ์ƒํ˜ธ์ž‘์šฉ
- ์ ์ ˆํžˆ ๋…์ž์˜ ์ƒ๊ฐ์„ ๋ฌป๋Š” ์งˆ๋ฌธ ํฌํ•จ
- ์ถ”๊ฐ€ ์ •๋ณด๋ฅผ ์ฐพ์„ ์ˆ˜ ์žˆ๋Š” ํ‚ค์›Œ๋“œ ์ œ์‹œ
- ์‹ค์šฉ์ ์ธ ์ ์šฉ ๋ฐฉ์•ˆ ์ œ๊ณต
6. ๋งˆ๋ฌด๋ฆฌ
- ์ฃผ์š” ๋‚ด์šฉ ๊ฐ„๋‹จํžˆ ์š”์•ฝ
- ์ฃผ์ œ์˜ ์ค‘์š”์„ฑ์ด๋‚˜ ์˜์˜ ๊ฐ•์กฐ
- ํ–ฅํ›„ ์ „๋ง์ด๋‚˜ ์ถ”๊ฐ€ ๊ณ ๋ ค์‚ฌํ•ญ ์ œ์‹œ
์ฃผ์˜์‚ฌํ•ญ: ๋„ˆ๋ฌด ๋”ฑ๋”ฑํ•˜๊ฑฐ๋‚˜ ์ง€๋ฃจํ•˜์ง€ ์•Š๋„๋ก ๋‹ค์–‘ํ•œ ์˜ˆ์‹œ์™€ ๊ฐ๊ด€์  ์ •๋ณด ์‚ฌ์ด์˜ ๊ท ํ˜• ์œ ์ง€
""",
"์ „๋ฌธ์ ์ธ": """
#์ „๋ฌธ์ ์ธ ์ •๋ณด์„ฑ ๋ธ”๋กœ๊ทธ ์Šคํƒ€์ผ ๊ฐ€์ด๋“œ
1. ํ†ค๊ณผ ๊ตฌ์กฐ
- ๊ณต์‹์ ์ด๊ณ  ์ „๋ฌธ์ ์ธ ํ†ค ์‚ฌ์šฉ
- ๊ฐ๊ด€์ ์ด๊ณ  ๋ถ„์„์ ์ธ ์ ‘๊ทผ ์œ ์ง€
- ๋ช…ํ™•ํ•œ ์„œ๋ก (๊ฐœ์š”), ๋ณธ๋ก (์ƒ์„ธ ๋ถ„์„), ๊ฒฐ๋ก (์ข…ํ•ฉ ํ‰๊ฐ€) ๊ตฌ์กฐ
- ์ฒด๊ณ„์ ์ธ ์ •๋ณด ์ „๊ฐœ
- ์„ธ๋ถ€ ์„น์…˜์„ ์œ„ํ•œ ๋ช…ํ™•ํ•œ ์†Œ์ œ๋ชฉ ์‚ฌ์šฉ
2. ๋‚ด์šฉ ๊ตฌ์„ฑ ๋ฐ ์ „๊ฐœ
- ์ฃผ์ œ์˜ ์—ญ์‚ฌ์  ๋ฐฐ๊ฒฝ, ์ด๋ก ์  ๊ธฐ๋ฐ˜, ํ˜„์žฌ ๋™ํ–ฅ ๋“ฑ ์‹ฌ์ธต์  ์ •๋ณด ํฌํ•จ
- ๋…ผ๋ฆฌ์  ์—ฐ๊ฒฐ์„ ์œ„ํ•œ ์ „ํ™˜์–ด ํ™œ์šฉ
- ์ „๋ฌธ ์šฉ์–ด ์ ์ ˆํžˆ ํ™œ์šฉ (ํ•„์š”์‹œ ๊ฐ„๋žตํ•œ ์„ค๋ช… ์ œ๊ณต)
- ์‹ฌ์ธต์ ์ธ ๋ถ„์„๊ณผ ๋น„ํŒ์  ํ‰๊ฐ€ ์ œ๊ณต
- ๋‹ค์–‘ํ•œ ๊ด€์ ๊ณผ ์ด๋ก ์  ํ”„๋ ˆ์ž„์›Œํฌ ์ œ์‹œ
3. ๋ฐ์ดํ„ฐ ๋ฐ ๊ทผ๊ฑฐ ํ™œ์šฉ
- ํ†ต๊ณ„, ์—ฐ๊ตฌ ๊ฒฐ๊ณผ, ์‚ฌ๋ก€ ์—ฐ๊ตฌ ๋“ฑ ๊ฐ๊ด€์  ๋ฐ์ดํ„ฐ ํ™œ์šฉ
- ์ฃผ์ œ ๋ถ„์„์„ ์œ„ํ•œ ์ฒด๊ณ„์ ์ธ ํ”„๋ ˆ์ž„์›Œํฌ ์ œ์‹œ
- ์ˆ˜์น˜ ๋ฐ์ดํ„ฐ๋Š” ๋ช…ํ™•ํžˆ ์„ค๋ช… (์ถ”์„ธ, ์ƒ๊ด€๊ด€๊ณ„, ์ธ๊ณผ๊ด€๊ณ„ ๋“ฑ)
- ํ•™์ˆ ์  ๊ทผ๊ฑฐ์™€ ํ˜„์‹ค ์ ์šฉ์˜ ๊ท ํ˜•
4. ์ „๋ฌธ์  ์ •๋ณด ์ œ๊ณต
- ์ตœ์‹  ์—ฐ๊ตฌ ๋™ํ–ฅ ๋ฐ ๋ฐœ์ „ ๋ฐฉํ–ฅ ๋ถ„์„
- ์ด๋ก ๊ณผ ์‹ค์ œ ์ ์šฉ ์‚ฌ์ด์˜ ๊ฐ„๊ทน ๋ถ„์„
- ์ฃผ์ œ ๊ด€๋ จ ์Ÿ์ ๊ณผ ๋…ผ์Ÿ์  ์†Œ๊ฐœ
- ์ฒด๊ณ„์ ์ธ ๋ฌธ์ œ ํ•ด๊ฒฐ ์ ‘๊ทผ๋ฒ• ์ œ์‹œ
5. ๋งˆ๋ฌด๋ฆฌ
- ํ•ต์‹ฌ ์ •๋ณด ์š”์•ฝ ๋ฐ ์ข…ํ•ฉ ํ‰๊ฐ€
- ์ฃผ์ œ์˜ ํ•™๋ฌธ์ , ์‹ค์šฉ์  ์˜์˜ ๋ถ„์„
- ํ–ฅํ›„ ์—ฐ๊ตฌ ๋ฐฉํ–ฅ์ด๋‚˜ ๋ฐœ์ „ ๊ฐ€๋Šฅ์„ฑ ์ œ์‹œ
์ฃผ์˜์‚ฌํ•ญ: ์ „๋ฌธ์„ฑ์„ ์œ ์ง€ํ•˜๋˜, ๋ถˆํ•„์š”ํ•˜๊ฒŒ ์–ด๋ ค์šด ์šฉ์–ด๋Š” ์ง€์–‘ํ•˜๊ณ  ๋ช…ํ™•ํ•œ ์„ค๋ช… ์ค‘์‹ฌ์œผ๋กœ ๊ตฌ์„ฑ
"""
}
return prompts.get(style, prompts["์นœ๊ทผํ•œ"])
def get_informational_blog_prompt():
prompts = [
"""
[์ค‘์š”: ์ •๋ณด์„ฑ ๋ธ”๋กœ๊ทธ ๊ธ€ ์ž‘์„ฑ ํ•„์ˆ˜ ๊ทœ์น™]
์ด ๊ทœ์น™์„ ๋ฐ˜๋“œ์‹œ ๋”ฐ๋ฅด์„ธ์š”. ์–ด๋–ค ์ƒํ™ฉ์—์„œ๋„ ์˜ˆ์™ธ๋Š” ์—†์Šต๋‹ˆ๋‹ค:
1. ๋งˆํฌ๋‹ค์šด ๋ฌธ๋ฒ•(**, *, #, -, 1., 2., 3.)์„ ์ ˆ๋Œ€ ์‚ฌ์šฉํ•˜์ง€ ๋งˆ์„ธ์š”.
2. ๋ชจ๋“  ์†Œ์ œ๋ชฉ์€ ๋ฒˆํ˜ธ ์—†์ด ์ผ๋ฐ˜ ๋ฌธ์žฅ์œผ๋กœ ์ž‘์„ฑํ•˜์„ธ์š”.
3. ๋ชจ๋“  ๋ชฉ๋ก์€ ๋ถˆ๋ฆฟ์ด๋‚˜ ๋ฒˆํ˜ธ ์—†์ด ์ž์—ฐ์Šค๋Ÿฌ์šด ๋ฌธ์žฅ์œผ๋กœ ์„œ์ˆ ํ•˜์„ธ์š”.
4. "์ฐธ๊ณ ๊ธ€", "์ฐธ๊ณ ๊ธ€์— ๋”ฐ๋ฅด๋ฉด" ๋“ฑ์˜ ํ‘œํ˜„์„ ์ ˆ๋Œ€ ์‚ฌ์šฉํ•˜์ง€ ๋งˆ์„ธ์š”.
5. ์ฃผ์ œ์— ๊ด€ํ•œ ์ •ํ™•ํ•˜๊ณ  ์‹ ๋ขฐํ•  ์ˆ˜ ์žˆ๋Š” ์ •๋ณด๋งŒ ์ž‘์„ฑํ•˜์„ธ์š”.
6. ๊ธ€์˜ ์ฃผ์ œ๋Š” ๋ฐ˜๋“œ์‹œ ์ฃผ์–ด์ง„ ์ฐธ๊ณ ๊ธ€์˜ ๋‚ด์šฉ์„ ๋ฐ”ํƒ•์œผ๋กœ ์ž‘์„ฑํ•˜์„ธ์š”.
7. ๊ธ€์˜ ๊ตฌ์„ฑ์€ ๋‹ค์Œ์„ ํฌํ•จํ•ด์•ผ ํ•ฉ๋‹ˆ๋‹ค:
- ์ฃผ์ œ์— ๋Œ€ํ•œ ๋ช…ํ™•ํ•œ ์†Œ๊ฐœ์™€ ๋ฐฐ๊ฒฝ
- ์ฃผ์ œ์˜ ์—ญ์‚ฌ์ /์ด๋ก ์  ๋งฅ๋ฝ
- ํ˜„์žฌ ๋™ํ–ฅ์ด๋‚˜ ์ตœ์‹  ์ •๋ณด
- ์ฃผ์š” ๊ฐœ๋…์ด๋‚˜ ์›๋ฆฌ์— ๋Œ€ํ•œ ์„ค๋ช…
- ์‹ค์šฉ์ ์ธ ์ ์šฉ ๋ฐฉ๋ฒ•์ด๋‚˜ ์‚ฌ๋ก€
- ๋‹ค์–‘ํ•œ ๊ด€์ ์ด๋‚˜ ๋น„๊ต ๋ถ„์„
- ํ–ฅํ›„ ์ „๋ง์ด๋‚˜ ๊ฒฐ๋ก 
"""
]
return random.choice(prompts)
def remove_unwanted_phrases(text):
unwanted_phrases = [
'์—ฌ๋Ÿฌ๋ถ„', '์ตœ๊ทผ', '๋งˆ์ง€๋ง‰์œผ๋กœ', '๊ฒฐ๋ก ์ ์œผ๋กœ', '๊ฒฐ๊ตญ',
'์ข…ํ•ฉ์ ์œผ๋กœ', '๋”ฐ๋ผ์„œ', '๋งˆ๋ฌด๋ฆฌ', '๋์œผ๋กœ', '์š”์•ฝ',
'ํ•œ ์ค„ ์š”์•ฝ', '์ •๋ฆฌํ•˜์ž๋ฉด', '์ด์ •๋ฆฌ', '๊ธ€์„ ๋งˆ์น˜๋ฉฐ',
'์ด์ƒ์œผ๋กœ', '์ถ”์ฒœ๋“œ๋ฆฝ๋‹ˆ๋‹ค', '์ฐธ๊ณ ํ•˜์„ธ์š”', '๋„์›€์ด ๋˜์…จ๊ธธ',
'์ข‹์€ ํ•˜๋ฃจ ๋˜์„ธ์š”', '๋‹ค์Œ ๊ธ€์—์„œ', '๋„์›€์ด ๋˜์—ˆ๊ธธ',
'์ฆ๊ฑฐ์šด ํ•˜๋ฃจ ๋˜์„ธ์š”', '๊ฐ์‚ฌํ•ฉ๋‹ˆ๋‹ค'
]
words = re.findall(r'\S+|\n', text)
result_words = [word for word in words if not any(phrase in word for phrase in unwanted_phrases)]
return ' '.join(result_words).replace(' \n ', '\n').replace(' \n', '\n').replace('\n ', '\n')
def post_process_blog(blog_content, style="์นœ๊ทผํ•œ"):
"""์ƒ์„ฑ๋œ ๋ธ”๋กœ๊ทธ ๊ธ€์˜ ํ›„์ฒ˜๋ฆฌ: ๋งˆํฌ๋‹ค์šด, ๋ฒˆํ˜ธ/๋ถˆ๋ฆฟ ์ œ๊ฑฐ ๋ฐ ์Šคํƒ€์ผ ์ˆ˜์ •"""
try:
# ๋ฒˆํ˜ธ ๋ชฉ๋ก, ๋ถˆ๋ฆฟ, ํ—ค๋”ฉ ๋“ฑ ์ œ๊ฑฐ
blog_content = re.sub(r'^\d+\.\s+', '', blog_content, flags=re.MULTILINE)
blog_content = re.sub(r'^[\*\-\โ€ข]\s+', '', blog_content, flags=re.MULTILINE)
blog_content = re.sub(r'^#+\s+', '', blog_content, flags=re.MULTILINE)
if style == "์นœ๊ทผํ•œ":
blog_content = re.sub(r'([๊ฐ€-ํžฃ]+)๊ณ ์š”', r'\1๊ตฌ์š”', blog_content)
blog_content = re.sub(r'๋‹ต๋‹ˆ๋‹ค', '์–ด์š”', blog_content)
blog_content = re.sub(r'์˜€๋‹ต๋‹ˆ๋‹ค', '์˜€์–ด์š”', blog_content)
blog_content = re.sub(r'ํ–ˆ๋‹ต๋‹ˆ๋‹ค', 'ํ–ˆ์–ด์š”', blog_content)
blog_content = re.sub(r'์Šต๋‹ˆ๋‹ค', '์š”', blog_content)
blog_content = re.sub(r'ํ•ฉ๋‹ˆ๋‹ค', 'ํ•ด์š”', blog_content)
blog_content = re.sub(r'๋ฉ๋‹ˆ๋‹ค', '๋ผ์š”', blog_content)
blog_content = re.sub(r'์ž…๋‹ˆ๋‹ค', '์ด์—์š”', blog_content)
# ๊ณผ์žฅ๋œ ํ‘œํ˜„ ์ •๋ฆฌ
exaggerated_expressions = [
(r'ํ•„์ˆ˜์ ์ธ', r'์ค‘์š”ํ•œ'),
(r'ํ˜๋ช…์ ์ธ', r'์ค‘์š”ํ•œ'),
(r'๋†€๋ผ์šด', r'์ฃผ๋ชฉํ•  ๋งŒํ•œ'),
(r'๊ธฐ์ ์˜', r'ํšจ๊ณผ์ ์ธ'),
(r'์ตœ๊ณ ์˜', r'์ข‹์€'),
(r'์„ธ๊ณ„์ ์ธ', r'์œ ๋ช…ํ•œ'),
(r'์™„๋ฒฝํ•œ', r'์šฐ์ˆ˜ํ•œ'),
(r'๊ทน์ ์ธ', r'์ƒ๋‹นํ•œ'),
(r'๋ฌดํ•œํ•œ', r'๋งŽ์€'),
(r'์ ˆ๋Œ€์ ์ธ', r'์ƒ๋‹นํ•œ'),
(r'ํ˜์‹ ์ ์ธ', r'์ƒˆ๋กœ์šด'),
(r'ํ™˜์ƒ์ ์ธ', r'์ข‹์€'),
(r'๊ทผ๋ณธ์ ์ธ', r'๊ธฐ๋ณธ์ ์ธ'),
(r'ํš๊ธฐ์ ์ธ', r'์ค‘์š”ํ•œ'),
(r'์ „๋ก€์—†๋Š”', r'ํŠน๋ณ„ํ•œ'),
(r'์••๋„์ ์ธ', r'์ฃผ๋ชฉํ•  ๋งŒํ•œ'),
(r'ํ™ฉํ™€ํ•œ', r'์ข‹์€'),
(r'์ฒœ์ƒ์˜', r'์šฐ์ˆ˜ํ•œ'),
(r'๊ธฐ๊ฐ€ ๋ง‰ํžŒ', r'ํšจ๊ณผ์ ์ธ'),
(r'๋ํŒ์™•', r'์ตœ์ƒ์œ„'),
(r'๊ทธ ์ž์ฒด', r''),
(r'์ด (.{1,10}) ๊ทธ ์ž์ฒด์˜€์–ด์š”', r'์ด \1์˜€์–ด์š”'),
(r'๊ฐ€ (.{1,10}) ๊ทธ ์ž์ฒด์˜€์–ด์š”', r'๊ฐ€ \1์˜€์–ด์š”'),
(r'์••๋„์ ์ธ', r'์ค‘์š”ํ•œ'),
(r'์ฒœ๊ตญ', r'์ข‹์€ ๊ณณ'),
(r'ํ™ฉํ™€ํ–ˆ์–ด์š”', r'์ข‹์•˜์–ด์š”'),
(r'ํ™˜์ƒ์˜', r'์ข‹์€')
]
for pattern, replacement in exaggerated_expressions:
blog_content = re.sub(pattern, replacement, blog_content, flags=re.IGNORECASE)
blog_content = re.sub(r'์ฐธ๊ณ ๊ธ€์— ๋”ฐ๋ฅด๋ฉด', r'์•Œ๋ ค์ง„ ๋ฐ”๋กœ๋Š”', blog_content)
blog_content = re.sub(r'์ฐธ๊ณ ๊ธ€', r'๊ด€๋ จ ์ •๋ณด', blog_content)
# Gemini API๋ฅผ ํ™œ์šฉํ•œ ์ถ”๊ฐ€ ํ›„์ฒ˜๋ฆฌ ํ”„๋กฌํ”„ํŠธ (ํ…์ŠคํŠธ ๋ณด์™„ ๋ชฉ์ )
prompt = f"""
๋‹ค์Œ ์ •๋ณด์„ฑ ๋ธ”๋กœ๊ทธ ๊ธ€์„ ๋” ์ž์—ฐ์Šค๋Ÿฌ์šด ํ˜•ํƒœ๋กœ ๋ณ€๊ฒฝํ•ด์ฃผ์„ธ์š”:
์›๋ณธ ๊ธ€:
{blog_content}
๋ณ€๊ฒฝ ์š”๊ตฌ์‚ฌํ•ญ:
1. ๋งˆํฌ๋‹ค์šด ํ˜•์‹ ๋ฐ ๋ฒˆํ˜ธ ๋ชฉ๋ก, ๋ถˆ๋ฆฟ ํ‘œํ˜„ ์ œ๊ฑฐ
2. ์†Œ์ œ๋ชฉ์€ 5๊ฐœ ์ดํ•˜๋กœ, ๊ฐ ์†Œ์ œ๋ชฉ ์•„๋ž˜ ๋‚ด์šฉ์€ ์ตœ์†Œ {MIN_SECTION_LENGTH}์ž ์ด์ƒ์œผ๋กœ ์ƒ์„ธํ•˜๊ฒŒ ์„œ์ˆ 
3. "์ฐธ๊ณ ๊ธ€" ๊ด€๋ จ ํ‘œํ˜„ ์ œ๊ฑฐ
4. ๊ตฌ์ฒด์ ์ธ ์˜ˆ์‹œ์™€ ์‹ค์šฉ์ ์ธ ์ •๋ณด ํฌํ•จ
5. ์Šคํƒ€์ผ: {style} (์ž์—ฐ์Šค๋Ÿฌ์šด ๋ฌธ์ฒด ์‚ฌ์šฉ)
"""
# ๋žœ๋ค ํด๋ผ์ด์–ธํŠธ ์‚ฌ์šฉ์œผ๋กœ ๋ณ€๊ฒฝ
from google.genai import types
client = get_random_gemini_client()
response = client.models.generate_content(
model="gemini-2.0-flash",
contents=[prompt],
config=types.GenerateContentConfig(
max_output_tokens=MAX_TOKENS,
temperature=0.7,
top_p=0.9
)
)
return response.text.strip()
except Exception as e:
logging.error(f"๋ธ”๋กœ๊ทธ ๊ธ€ ํ›„์ฒ˜๋ฆฌ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}")
return blog_content
def format_blog_post(blog_post, query=""):
blog_post = re.sub(r'^#+\s+', '', blog_post, flags=re.MULTILINE)
blog_post = re.sub(r'^\d+\.\s+', '', blog_post, flags=re.MULTILINE)
blog_post = re.sub(r'^[\*\-]\s+', '', blog_post, flags=re.MULTILINE)
lines = blog_post.split('\n')
formatted_lines = []
in_paragraph = False
first_line = True
title_found = False
first_non_empty_line = ""
for line in lines:
if line.strip():
first_non_empty_line = line.strip()
break
subtitle_patterns = [
r'^.{10,100}\?$',
r'^.{10,100}:$',
r'^.{5,50}์˜ [๊ฐ€-ํžฃ\s]+$',
r'^[๊ฐ€-ํžฃ\s]{5,50} [๊ฐ€-ํžฃ\s]+$'
]
previous_line_empty = True
def optimize_title(title, max_length=60):
if ': ' in title and len(title) > max_length:
title = title.split(': ')[0]
if len(title) > max_length:
title = re.sub(r'\s*\([^)]*\)', '', title)
if len(title) > max_length and ',' in title:
title = title.split(',')[0]
if len(title) > max_length:
words = title.split()
shortened_title = []
current_length = 0
for word in words:
if current_length + len(word) + 1 <= max_length:
shortened_title.append(word)
current_length += len(word) + 1
else:
break
title = ' '.join(shortened_title)
endings_to_remove = ['๊ทธ', '์ด', '์€', '๋Š”', '์ด๋‚˜', '์™€', '๊ณผ', '๋˜๋Š”', '๊ทธ๋ฆฌ๊ณ ']
for ending in endings_to_remove:
if title.endswith(f" {ending}"):
title = title[:-len(ending)-1]
if len(title) < 20 and query:
title = f"{query} ์ •๋ณด ๊ฐ€์ด๋“œ"
return title
for i, line in enumerate(lines):
line = line.strip()
next_line_empty = (i+1 >= len(lines)) or not lines[i+1].strip()
if not line:
if in_paragraph:
formatted_lines.append("</p>")
in_paragraph = False
formatted_lines.append("<br>")
previous_line_empty = True
else:
if first_line and len(line) > 5:
optimized_title = optimize_title(line)
formatted_lines.append(f'<h1 style="font-size: 1.8em; margin-bottom: 20px; font-weight: bold; color: #222;">{html.escape(optimized_title)}</h1>')
first_line = False
title_found = True
previous_line_empty = False
else:
is_subtitle = False
if any(re.match(pattern, line) for pattern in subtitle_patterns):
is_subtitle = True
elif previous_line_empty and next_line_empty and len(line) < 80:
is_subtitle = True
if is_subtitle:
if in_paragraph:
formatted_lines.append("</p>")
in_paragraph = False
formatted_lines.append(f'<h2 style="font-size: 1.3em; margin-top: 25px; margin-bottom: 15px; font-weight: bold; color: #333;">{html.escape(line)}</h2>')
else:
if not in_paragraph:
formatted_lines.append("<p>")
in_paragraph = True
content = html.escape(line)
bold_content = re.sub(r'\*\*(.*?)\*\*', r'<strong>\1</strong>', content)
formatted_lines.append(bold_content)
previous_line_empty = False
if in_paragraph:
formatted_lines.append("</p>")
if not title_found:
default_title = f"{query} ์ •๋ณด ๊ฐ€์ด๋“œ" if query else "์ •๋ณด์„ฑ ๋ธ”๋กœ๊ทธ"
if first_non_empty_line:
default_title = optimize_title(first_non_empty_line)
formatted_lines.insert(0, f'<h1 style="font-size: 1.8em; margin-bottom: 20px; font-weight: bold; color: #222;">{html.escape(default_title)}</h1>')
return '\n'.join(formatted_lines)
# Gemini API ํ˜ธ์ถœ ํ—ฌํผ ํ•จ์ˆ˜ (ํ”„๋กฌํ”„ํŠธ ์ค‘๋ณต ์ œ๊ฑฐ)
def call_gemini_api(prompt):
# ๋งค๋ฒˆ ์ƒˆ๋กœ์šด ๋žœ๋ค ํด๋ผ์ด์–ธํŠธ ๊ฐ€์ ธ์˜ค๊ธฐ
from google.genai import types
client = get_random_gemini_client()
response = client.models.generate_content(
model="gemini-2.0-flash",
contents=[prompt],
config=types.GenerateContentConfig(
max_output_tokens=MAX_TOKENS,
temperature=TEMPERATURE,
top_p=TOP_P
)
)
return response.text.strip()
def generate_blog_post(query, style="์นœ๊ทผํ•œ"):
try:
# ์ฐธ๊ณ ๊ธ€ ์ˆ˜์ง‘
ref1, ref2, ref3 = fetch_crawl_results(query)
style_prompt = get_style_prompt(style)
format_prompt = get_informational_blog_prompt()
# ์Šคํƒ€์ผ ์„ธ๋ถ€ ์ง€์‹œ์‚ฌํ•ญ (์›๋ณธ ๋‚ด์šฉ์˜ ํŠน์„ฑ์„ ์œ ์ง€)
style_specific_instructions = ""
if style == "์นœ๊ทผํ•œ":
style_specific_instructions = """
์ด ๋ธ”๋กœ๊ทธ๋Š” ๋ฐ˜๋“œ์‹œ ์นœ๊ทผํ•œ ๋Œ€ํ™”์ฒด๋กœ ์ž‘์„ฑํ•ด์•ผ ํ•ฉ๋‹ˆ๋‹ค.
- 'ํ•ด์š”์ฒด' ์‚ฌ์šฉ: "~ํ–ˆ์–ด์š”", "~์ธ ๊ฒƒ ๊ฐ™์•„์š”", "~ํ•˜๋„ค์š”"
- ๊ฒฉ์‹์ฒด(์˜ˆ: "~ํ•ฉ๋‹ˆ๋‹ค", "~์ž…๋‹ˆ๋‹ค") ์‚ฌ์šฉ ๊ธˆ์ง€
- ๋Œ€ํ™”ํ•˜๋“ฏ ํŽธ์•ˆํ•˜๊ฒŒ ์ž‘์„ฑ
- ์งง๊ณ  ๊ฐ„๊ฒฐํ•œ ๋ฌธ์žฅ ์‚ฌ์šฉ
- ๋ณต์žกํ•œ ๊ฐœ๋…์€ ์ผ์ƒ์ ์ธ ๋น„์œ ๋กœ ์„ค๋ช…
- ์‹ค์ œ ์‚ฌ๋ก€์™€ ์˜ˆ์‹œ๋ฅผ ํ†ตํ•œ ์„ค๋ช…
- ์ˆ˜์‚ฌ์˜๋ฌธ๋ฌธ์„ ํ™œ์šฉํ•œ ๋…์ž์™€์˜ ์†Œํ†ต
"""
elif style == "์ผ๋ฐ˜์ ์ธ":
style_specific_instructions = """
์ด ๋ธ”๋กœ๊ทธ๋Š” ๋ฐ˜๋“œ์‹œ ์ผ๋ฐ˜์ ์ธ ์กด๋Œ“๋ง์ฒด๋กœ ์ž‘์„ฑํ•ด์•ผ ํ•ฉ๋‹ˆ๋‹ค.
- '์Šต๋‹ˆ๋‹ค์ฒด' ์‚ฌ์šฉ: "~ํ–ˆ์Šต๋‹ˆ๋‹ค", "~์ž…๋‹ˆ๋‹ค", "~ํ•˜์˜€์Šต๋‹ˆ๋‹ค"
- ๊ฒฉ์‹์ ์ด์ง€ ์•Š์€ ๋ช…ํ™•ํ•œ ํ‘œํ˜„ ์‚ฌ์šฉ
- ๊ฐ„๊ฒฐํ•˜๊ณ  ๋ช…๋ฃŒํ•œ ๋ฌธ์žฅ ์‚ฌ์šฉ
- ๊ฐ๊ด€์ ์ธ ์‚ฌ์‹ค๊ณผ ๋ถ„์„ ์ค‘์‹ฌ์œผ๋กœ ์„œ์ˆ 
- ๊ท ํ˜• ์žกํžŒ ์‹œ๊ฐ์—์„œ ๋‹ค์–‘ํ•œ ๊ด€์  ์ œ์‹œ
"""
elif style == "์ „๋ฌธ์ ์ธ":
style_specific_instructions = """
์ด ๋ธ”๋กœ๊ทธ๋Š” ๋ฐ˜๋“œ์‹œ ์ „๋ฌธ์ ์ด๊ณ  ๋ถ„์„์ ์ธ ์–ดํˆฌ๋กœ ์ž‘์„ฑํ•ด์•ผ ํ•ฉ๋‹ˆ๋‹ค.
- '์Šต๋‹ˆ๋‹ค์ฒด' ์‚ฌ์šฉ: "~ํ–ˆ์Šต๋‹ˆ๋‹ค", "~์ž…๋‹ˆ๋‹ค", "~ํ•˜์˜€์Šต๋‹ˆ๋‹ค"
- ์—ญ์‚ฌ, ์ด๋ก , ์—ฐ๊ตฌ ๊ฒฐ๊ณผ ๋“ฑ ์‹ฌ์ธต ์ •๋ณด ํฌํ•จ
- ๊ฐ๊ด€์ ์ด๊ณ  ๋…ผ๋ฆฌ์ ์ธ ๋ถ„์„ ์ค‘์‹ฌ ์ž‘์„ฑ
- ๊ตฌ์ฒด์ ์ธ ๋ฐ์ดํ„ฐ ๋ฐ ์ˆ˜์น˜ ํฌํ•จํ•˜์—ฌ ์„ค๋ช…
- ๋‹ค์–‘ํ•œ ๊ด€์ ๊ณผ ์ด๋ก ์  ํ”„๋ ˆ์ž„์›Œํฌ ์ œ์‹œ
"""
# Phase 1: ์ดˆ๊ธฐ ์ƒ์„ฑ
initial_prompt = f"""
์ฃผ์ œ: {query} ์ •๋ณด์„ฑ ๋ธ”๋กœ๊ทธ
์ฐธ๊ณ ๊ธ€ 1: {ref1}
์ฐธ๊ณ ๊ธ€ 2: {ref2}
์ฐธ๊ณ ๊ธ€ 3: {ref3}
๋ชฉํ‘œ ๊ธ€์ž์ˆ˜: {TARGET_CHAR_LENGTH}
{format_prompt}
์Šคํƒ€์ผ ๊ฐ€์ด๋“œ:
{style_prompt}
์Šคํƒ€์ผ ์„ธ๋ถ€ ์ง€์‹œ์‚ฌํ•ญ:
{style_specific_instructions}
ํŠน๋ณ„ ์ง€์‹œ์‚ฌํ•ญ:
1. ๋ฐ˜๋“œ์‹œ ๊ธ€์˜ ์ฒ˜์Œ์— ๋งค๋ ฅ์ ์ด๊ณ  ๋ช…ํ™•ํ•œ ์ œ๋ชฉ ํฌํ•จ (์ฃผ์ œ์™€ ํ•ต์‹ฌ ํฌ์ธํŠธ๋ฅผ ๋‹ด์€ ์ œ๋ชฉ).
2. ๋งˆํฌ๋‹ค์šด ๋ฌธ๋ฒ•(#, *, -, 1., 2., ๋“ฑ) ์‚ฌ์šฉ ๊ธˆ์ง€.
3. ์†Œ์ œ๋ชฉ์€ ๋ฒˆํ˜ธ ์—†์ด ์ž‘์„ฑํ•˜๊ณ , 5๊ฐœ ์ดํ•˜๋กœ ์ œํ•œ.
4. ๋ชจ๋“  ๋ชฉ๋ก์€ ๋ถˆ๋ฆฟ์ด๋‚˜ ๋ฒˆํ˜ธ ์—†์ด ์ž์—ฐ์Šค๋Ÿฝ๊ฒŒ ์„œ์ˆ .
5. "์ฐธ๊ณ ๊ธ€" ๊ด€๋ จ ํ‘œํ˜„ ์‚ฌ์šฉ ๊ธˆ์ง€.
6. ์ฃผ์ œ์— ๊ด€ํ•œ ์ •ํ™•ํ•˜๊ณ  ์‹ ๋ขฐํ•  ์ˆ˜ ์žˆ๋Š” ์ •๋ณด๋งŒ ์ž‘์„ฑ.
7. ๊ตฌ์ฒด์ ์ธ ์ •๋ณด(์—ญ์‚ฌ์  ๋ฐฐ๊ฒฝ, ์ฃผ์š” ๊ฐœ๋…, ์ด๋ก , ์‹ค์šฉ์  ์ ์šฉ, ์ตœ์‹  ๋™ํ–ฅ ๋“ฑ) ์ž์—ฐ์Šค๋Ÿฝ๊ฒŒ ํฌํ•จ.
8. ๊ธ€์ž์ˆ˜๊ฐ€ ์ตœ์†Œ {TARGET_CHAR_LENGTH}์ž ์ด์ƒ์ด์–ด์•ผ ํ•จ.
9. ๊ฐ ์†Œ์ œ๋ชฉ ์•„๋ž˜ ๋‚ด์šฉ์€ ์ตœ์†Œ {MIN_SECTION_LENGTH}์ž ์ด์ƒ ์„œ์ˆ .
10. ๋ฐ˜๋“œ์‹œ ๋‹ค์Œ ๊ตฌ์„ฑ ์š”์†Œ๋ฅผ ํฌํ•จํ•  ๊ฒƒ:
- ์ฃผ์ œ์— ๋Œ€ํ•œ ๋ช…ํ™•ํ•œ ์†Œ๊ฐœ์™€ ๋ฐฐ๊ฒฝ
- ์ฃผ์ œ์˜ ์—ญ์‚ฌ์ /์ด๋ก ์  ๋งฅ๋ฝ
- ํ˜„์žฌ ๋™ํ–ฅ์ด๋‚˜ ์ตœ์‹  ์ •๋ณด
- ์ฃผ์š” ๊ฐœ๋…์ด๋‚˜ ์›๋ฆฌ์— ๋Œ€ํ•œ ์„ค๋ช…
- ์‹ค์šฉ์ ์ธ ์ ์šฉ ๋ฐฉ๋ฒ•์ด๋‚˜ ์‚ฌ๋ก€
- ๋‹ค์–‘ํ•œ ๊ด€์ ์ด๋‚˜ ๋น„๊ต ๋ถ„์„
- ํ–ฅํ›„ ์ „๋ง์ด๋‚˜ ๊ฒฐ๋ก 
"""
first_attempt = call_gemini_api(initial_prompt)
first_attempt_cleaned = remove_unwanted_phrases(first_attempt)
first_attempt_length = len(first_attempt_cleaned)
if first_attempt_length >= TARGET_CHAR_LENGTH:
final_post = post_process_blog(first_attempt_cleaned, style)
final_html = format_blog_post(final_post, query)
return final_html, ref1, ref2, ref3, first_attempt_length
# Phase 2: ํ‡ด๊ณ  (Revision) ์‹œ๋„
longest_ref = max([ref1, ref2, ref3], key=len)
revision_prompt = f"""
์ด์ „ ๊ธ€:
{first_attempt_cleaned}
์ฐธ๊ณ ๊ธ€: {longest_ref}
ํฌ์ŠคํŒ… ์Šคํƒ€์ผ:
{style_prompt}
์Šคํƒ€์ผ ์„ธ๋ถ€ ์ง€์‹œ์‚ฌํ•ญ:
{style_specific_instructions}
๋ฌธ์ œ์ :
์ด์ „์— ์ž‘์„ฑ๋œ ๊ธ€์€ ๋ชฉํ‘œ ๊ธ€์ž์ˆ˜์ธ {TARGET_CHAR_LENGTH}์ž์— ๋ฏธ์น˜์ง€ ๋ชปํ•ฉ๋‹ˆ๋‹ค. ํ˜„์žฌ ๊ธ€์ž์ˆ˜๋Š” ์•ฝ {first_attempt_length}์ž์ž…๋‹ˆ๋‹ค.
๋˜ํ•œ, ๊ฐ ์†Œ์ œ๋ชฉ ์•„๋ž˜ ๋‚ด์šฉ์ด ๋„ˆ๋ฌด ์งง๊ณ  ๋ถ€์‹คํ•ฉ๋‹ˆ๋‹ค.
์ค‘์š” ์š”๊ตฌ์‚ฌํ•ญ:
1. ๊ธ€์˜ ์ฒ˜์Œ์— ๋งค๋ ฅ์ ์ธ ์ œ๋ชฉ(์ฃผ์ œ์™€ ํ•ต์‹ฌ ํฌ์ธํŠธ ํฌํ•จ) ์ถ”๊ฐ€.
2. ๊ธ€์ž์ˆ˜๋ฅผ ์ตœ์†Œ {TARGET_CHAR_LENGTH}์ž ์ด์ƒ์œผ๋กœ ๋Š˜๋ฆฌ๊ณ , ๊ฐ ์„น์…˜์„ ์ƒ์„ธํžˆ ์„œ์ˆ .
3. ๋งˆํฌ๋‹ค์šด ํ˜•์‹(#, *, -, 1., 2., ๋“ฑ) ์‚ฌ์šฉ ๊ธˆ์ง€.
4. ์†Œ์ œ๋ชฉ์€ ๋ฒˆํ˜ธ ์—†์ด ์ž‘์„ฑ.
5. ๋ชจ๋“  ๋ชฉ๋ก์€ ๋ถˆ๋ฆฟ์ด๋‚˜ ๋ฒˆํ˜ธ ์—†์ด ์„œ์ˆ .
6. "์ฐธ๊ณ ๊ธ€" ๊ด€๋ จ ํ‘œํ˜„ ์‚ฌ์šฉ ๊ธˆ์ง€.
7. ์ฃผ์ œ์— ๊ด€ํ•œ ์ •ํ™•ํ•˜๊ณ  ์‹ ๋ขฐํ•  ์ˆ˜ ์žˆ๋Š” ์ •๋ณด๋งŒ ์ž‘์„ฑ.
8. ๊ฐ ์†Œ์ œ๋ชฉ ์•„๋ž˜ ๋‚ด์šฉ์„ ์ตœ์†Œ {MIN_SECTION_LENGTH}์ž ์ด์ƒ ์„œ์ˆ .
9. ์†Œ์ œ๋ชฉ ์ˆ˜๋Š” 5๊ฐœ ์ดํ•˜๋กœ ์ œํ•œ.
์ƒ์„ธ ๋ณด์™„:
- ์ฃผ์ œ์˜ ๋ฐฐ๊ฒฝ ๋ฐ ๋งฅ๋ฝ์— ๋Œ€ํ•œ ๋” ๊นŠ์€ ์„ค๋ช… ์ถ”๊ฐ€.
- ๊ตฌ์ฒด์ ์ธ ์‚ฌ๋ก€, ์—ฐ๊ตฌ ๊ฒฐ๊ณผ, ํ†ต๊ณ„ ๋“ฑ ์‹ค์ฆ์  ๋ฐ์ดํ„ฐ ์ถ”๊ฐ€.
- ์—ญ์‚ฌ์  ๋ฐœ์ „ ๊ณผ์ •, ์ฃผ์š” ์ด๋ก ์  ํ”„๋ ˆ์ž„์›Œํฌ ๋“ฑ ์‹ฌ์ธต ์ •๋ณด ์ถ”๊ฐ€.
- ์‹ค์šฉ์ ์ธ ์ ์šฉ ๋ฐฉ๋ฒ•, ํŒ, ๊ถŒ์žฅ์‚ฌํ•ญ ๋“ฑ ์‹คํ–‰ ๊ฐ€๋Šฅํ•œ ์ •๋ณด ์ถ”๊ฐ€.
- ๋‹ค์–‘ํ•œ ๊ด€์ , ์Ÿ์ , ๋…ผ์Ÿ์  ๋“ฑ ๊ท ํ˜• ์žกํžŒ ์‹œ๊ฐ ์ œ๊ณต.
"""
revised_attempt = call_gemini_api(revision_prompt)
revised_cleaned = remove_unwanted_phrases(revised_attempt)
final_post = post_process_blog(revised_cleaned, style)
final_html = format_blog_post(final_post, query)
soup = BeautifulSoup(final_html, 'html.parser')
actual_char_length = len(soup.get_text())
# Phase 3: ํ™•์žฅ (Expansion) ์‹œ๋„ (๊ธ€์ž์ˆ˜๊ฐ€ ๋ถ€์กฑํ•  ๊ฒฝ์šฐ)
if actual_char_length < TARGET_CHAR_LENGTH * 0.8:
expansion_prompt = f"""
๋‹ค์Œ ์ •๋ณด์„ฑ ๋ธ”๋กœ๊ทธ ๊ธ€์˜ ๋‚ด์šฉ์„ ํฌ๊ฒŒ ํ™•์žฅํ•ด์ฃผ์„ธ์š”:
์›๋ณธ ๊ธ€:
{final_post}
๋ฌธ์ œ์ :
์ด ๊ธ€์€ ๋ชฉํ‘œ ๊ธ€์ž์ˆ˜์ธ {TARGET_CHAR_LENGTH}์ž์— ๋ฏธ์น˜์ง€ ๋ชปํ•ฉ๋‹ˆ๋‹ค. ํ˜„์žฌ ๊ธ€์ž์ˆ˜๋Š” ์•ฝ {actual_char_length}์ž์ด๋ฉฐ, ๋‚ด์šฉ์ด ๋ถ€์‹คํ•ฉ๋‹ˆ๋‹ค.
์Šคํƒ€์ผ ๊ฐ€์ด๋“œ:
{style_prompt}
์Šคํƒ€์ผ ์„ธ๋ถ€ ์ง€์‹œ์‚ฌํ•ญ:
{style_specific_instructions}
์š”๊ตฌ์‚ฌํ•ญ:
1. ๊ฐ ์†Œ์ œ๋ชฉ ์•„๋ž˜์˜ ๋‚ด์šฉ์„ ์ตœ์†Œ {MIN_SECTION_LENGTH}์ž ์ด์ƒ ๋Œ€ํญ ํ™•์žฅ.
2. ๊ตฌ์ฒด์ ์ธ ์‚ฌ๋ก€, ์—ฐ๊ตฌ ๊ฒฐ๊ณผ, ํ†ต๊ณ„, ์˜ˆ์‹œ ๋“ฑ์„ ์ถ”๊ฐ€.
3. ๋งˆํฌ๋‹ค์šด ํ˜•์‹(#, *, -, 1., 2., ๋“ฑ) ์‚ฌ์šฉ ๊ธˆ์ง€.
4. {style} ์Šคํƒ€์ผ์— ๋งž์ถฐ ์ผ๊ด€๋˜๊ฒŒ ์ž‘์„ฑ.
5. ์†Œ์ œ๋ชฉ ๊ตฌ์กฐ๋Š” ์œ ์ง€ํ•˜๋˜, ๊ฐ ์„น์…˜ ๋‚ด์šฉ์„ 3๋ฐฐ ์ด์ƒ ํ’๋ถ€ํ•˜๊ฒŒ ํ™•์žฅ.
6. ์ฃผ์ œ์˜ ์—ญ์‚ฌ์  ๋ฐœ์ „, ์ด๋ก ์  ๋ฐฐ๊ฒฝ, ์ฃผ์š” ๊ฐœ๋… ๋“ฑ์„ ๋” ์ƒ์„ธํžˆ ์„œ์ˆ .
7. ์ „์ฒด ๊ธ€์ž์ˆ˜๋ฅผ ์ตœ์†Œ {TARGET_CHAR_LENGTH}์ž ์ด์ƒ ๋‹ฌ์„ฑ.
8. ์†Œ์ œ๋ชฉ ์ˆ˜๋Š” ์ตœ๋Œ€ 5๊ฐœ๋กœ ์ œํ•œ.
9. ์‹ค์šฉ์  ์ •๋ณด(์ ์šฉ ๋ฐฉ๋ฒ•, ํŒ, ์ฃผ์˜์‚ฌํ•ญ ๋“ฑ) ์ถ”๊ฐ€.
10. ๋‹ค์–‘ํ•œ ๊ด€์ ๊ณผ ์‹œ๊ฐ์—์„œ์˜ ๋ถ„์„ ์ถ”๊ฐ€.
11. ์ฃผ์ œ์™€ ๊ด€๋ จ๋œ ์ตœ์‹  ๋™ํ–ฅ์ด๋‚˜ ๋ฐœ์ „ ๋ฐฉํ–ฅ ์ถ”๊ฐ€.
12. ๊ฒฐ๋ก  ๋ถ€๋ถ„์—์„œ ์ฃผ์ œ์˜ ์˜์˜์™€ ์ค‘์š”์„ฑ ๊ฐ•์กฐ.
"""
expanded_attempt = call_gemini_api(expansion_prompt)
final_post = post_process_blog(expanded_attempt, style)
final_html = format_blog_post(final_post, query)
soup = BeautifulSoup(final_html, 'html.parser')
actual_char_length = len(soup.get_text())
return final_html, ref1, ref2, ref3, actual_char_length
except Exception as e:
logging.error(f"๋ธ”๋กœ๊ทธ ๊ธ€ ์ƒ์„ฑ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}")
return f"<p>๋ธ”๋กœ๊ทธ ๊ธ€ ์ƒ์„ฑ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}</p>", "", "", "", 0