import gradio as gr
import random
import feedparser
import requests
from bs4 import BeautifulSoup
from gtts import gTTS
import tempfile
import google.generativeai as genai
import os

# --- 설정 사항 ---
# 허깅페이스 Secrets 또는 시스템 환경변수에서 API 키를 안전하게 불러옵니다.
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")

if not GEMINI_API_KEY:
    raise ValueError("API 키가 설정되지 않았습니다. 허깅페이스 Settings -> Secrets에서 GEMINI_API_KEY를 추가해주세요.")

genai.configure(api_key=GEMINI_API_KEY)

# 수능 지문 출처용 RSS 피드 목록
RSS_FEEDS = {
    "Aeon": "https://aeon.co/feed.rss",
    "Psyche": "https://psyche.co/feed",
    "The Conversation": "https://theconversation.com/global/articles.atom",
    "Scientific American": "https://rss.sciam.com/ScientificAmerican-Global",
    "Nautilus": "https://nautil.us/feed/"
}

def fetch_random_article():
    """RSS 피드에서 랜덤으로 기사 URL과 제목을 가져옵니다."""
    site_name, feed_url = random.choice(list(RSS_FEEDS.items()))
    feed = feedparser.parse(feed_url)
    
    if not feed.entries:
        return None, None, site_name
        
    random_entry = random.choice(feed.entries)
    return random_entry.title, random_entry.link, site_name

def scrape_article_text(url):
    """URL에서 본문 텍스트를 크롤링합니다."""
    try:
        headers = {'User-Agent': 'Mozilla/5.0'}
        response = requests.get(url, headers=headers, timeout=10)
        soup = BeautifulSoup(response.content, 'html.parser')
        
        # p 태그 위주로 텍스트 추출 (사이트별 구조가 다르므로 범용적인 방식 사용)
        paragraphs = soup.find_all('p')
        text = ' '.join([p.get_text() for p in paragraphs])
        
        # 텍스트가 너무 길면 앞부분만 자름 (API 토큰 제한 및 수능 지문 분량 고려)
        return text[:4000]
    except Exception as e:
        return str(e)

def generate_tts(text):
    """영어 지문을 음성(mp3)으로 변환합니다."""
    try:
        tts = gTTS(text=text, lang='en', slow=False)
        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
        tts.save(temp_file.name)
        return temp_file.name
    except Exception:
        return None

def process_suneung_material():
    """전체 파이프라인: 기사 수집 -> AI 프롬프트 요청 -> TTS 생성 -> 결과 반환"""
    title, link, site_name = fetch_random_article()
    
    if not link:
        return "기사를 가져오는데 실패했습니다.", "", "", "", None
        
    raw_text = scrape_article_text(link)
    
    if len(raw_text) < 200:
        return "기사 본문이 너무 짧거나 크롤링이 차단된 사이트입니다. 다시 시도해주세요.", "", "", "", None

    # 수능 출제 위원 관점의 Gemini 프롬프트
    prompt = f"""
    You are an expert in creating English questions for the Korean CSAT (수능 영어 출제위원).
    Read the following article excerpt and perform the tasks below.
    
    [Source Material]
    {raw_text}
    
    [Tasks]
    1. **CSAT Passage (수능형 지문)**: Rewrite and summarize the core idea into a logically coherent passage of exactly 150-180 words. The vocabulary and sentence structure must match the high difficulty level of the Korean CSAT English section. DO NOT include the blank or altered sequence here; just provide the perfect base passage.
    2. **Vocabulary (주요 어휘)**: Extract 7-10 high-level vocabulary words from your generated passage. Format strictly as: "English Word | Korean Meaning | Synonym".
    3. **Fill-in-the-blank Question (빈칸 추론 문제)**: Create a CSAT-style fill-in-the-blank question using the passage. Replace a crucial thematic phrase with "________". Provide 5 multiple-choice options (1 correct, 4 highly attractive distractors).
    4. **Sentence Ordering Question (글의 순서 문제)**: Take the same passage, keep the first sentence as the given box, and divide the rest into three parts (A), (B), and (C) in a scrambled order. Provide the correct order.

    Format the output cleanly with the following headers:
    [수능형 지문]
    [주요 어휘]
    [빈칸 추론 문제]
    [글의 순서 문제]
    """

    try:
        # Gemini 1.5 Flash 또는 Pro 모델 사용 권장
        model = genai.GenerativeModel('gemini-1.5-flash')
        response = model.generate_content(prompt)
        result_text = response.text
        
        # 생성된 텍스트에서 '[수능형 지문]' 부분만 추출하여 TTS 생성용으로 사용
        passage_for_tts = ""
        if "[수능형 지문]" in result_text and "[주요 어휘]" in result_text:
            passage_for_tts = result_text.split("[주요 어휘]")[0].replace("[수능형 지문]", "").strip()
        else:
            passage_for_tts = "Could not parse passage for audio."

        audio_path = generate_tts(passage_for_tts)
        
        source_info = f"**출처:** {site_name} - [{title}]({link})"
        
        return result_text, source_info, audio_path
        
    except Exception as e:
        return f"AI 처리 중 오류가 발생했습니다: {str(e)}", "", None

# --- Gradio UI 구성 ---
with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("# 🎓 수능 영어 모의 문항 자동 생성기")
    gr.Markdown("Nautilus, Scientific American, The Conversation, Psyche, Aeon에서 랜덤으로 기사를 발췌하여 수능형 지문과 문항을 제작합니다.")
    
    with gr.Row():
        generate_btn = gr.Button("랜덤 지문 및 문제 생성하기 (Click)", variant="primary")
    
    with gr.Row():
        with gr.Column(scale=2):
            output_content = gr.Markdown(label="출제 내용")
            source_display = gr.Markdown(label="지문 출처")
        
        with gr.Column(scale=1):
            audio_player = gr.Audio(label="지문 듣기 (TTS)", type="filepath", interactive=False)
            
    # 버튼 클릭 시 함수 연결 (로딩 애니메이션 포함)
    generate_btn.click(
        fn=process_suneung_material,
        inputs=[],
        outputs=[output_content, source_display, audio_player]
    )

if __name__ == "__main__":
    demo.launch()