File size: 6,297 Bytes
3de3af7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
import gradio as gr
import random
import feedparser
import requests
from bs4 import BeautifulSoup
from gtts import gTTS
import tempfile
import google.generativeai as genai
import os

# --- ์„ค์ • ์‚ฌํ•ญ ---
# ํ—ˆ๊น…ํŽ˜์ด์Šค Secrets ๋˜๋Š” ์‹œ์Šคํ…œ ํ™˜๊ฒฝ๋ณ€์ˆ˜์—์„œ API ํ‚ค๋ฅผ ์•ˆ์ „ํ•˜๊ฒŒ ๋ถˆ๋Ÿฌ์˜ต๋‹ˆ๋‹ค.
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")

if not GEMINI_API_KEY:
    raise ValueError("API ํ‚ค๊ฐ€ ์„ค์ •๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค. ํ—ˆ๊น…ํŽ˜์ด์Šค Settings -> Secrets์—์„œ GEMINI_API_KEY๋ฅผ ์ถ”๊ฐ€ํ•ด์ฃผ์„ธ์š”.")

genai.configure(api_key=GEMINI_API_KEY)

# ์ˆ˜๋Šฅ ์ง€๋ฌธ ์ถœ์ฒ˜์šฉ RSS ํ”ผ๋“œ ๋ชฉ๋ก
RSS_FEEDS = {
    "Aeon": "https://aeon.co/feed.rss",
    "Psyche": "https://psyche.co/feed",
    "The Conversation": "https://theconversation.com/global/articles.atom",
    "Scientific American": "https://rss.sciam.com/ScientificAmerican-Global",
    "Nautilus": "https://nautil.us/feed/"
}

def fetch_random_article():
    """RSS ํ”ผ๋“œ์—์„œ ๋žœ๋ค์œผ๋กœ ๊ธฐ์‚ฌ URL๊ณผ ์ œ๋ชฉ์„ ๊ฐ€์ ธ์˜ต๋‹ˆ๋‹ค."""
    site_name, feed_url = random.choice(list(RSS_FEEDS.items()))
    feed = feedparser.parse(feed_url)
    
    if not feed.entries:
        return None, None, site_name
        
    random_entry = random.choice(feed.entries)
    return random_entry.title, random_entry.link, site_name

def scrape_article_text(url):
    """URL์—์„œ ๋ณธ๋ฌธ ํ…์ŠคํŠธ๋ฅผ ํฌ๋กค๋งํ•ฉ๋‹ˆ๋‹ค."""
    try:
        headers = {'User-Agent': 'Mozilla/5.0'}
        response = requests.get(url, headers=headers, timeout=10)
        soup = BeautifulSoup(response.content, 'html.parser')
        
        # p ํƒœ๊ทธ ์œ„์ฃผ๋กœ ํ…์ŠคํŠธ ์ถ”์ถœ (์‚ฌ์ดํŠธ๋ณ„ ๊ตฌ์กฐ๊ฐ€ ๋‹ค๋ฅด๋ฏ€๋กœ ๋ฒ”์šฉ์ ์ธ ๋ฐฉ์‹ ์‚ฌ์šฉ)
        paragraphs = soup.find_all('p')
        text = ' '.join([p.get_text() for p in paragraphs])
        
        # ํ…์ŠคํŠธ๊ฐ€ ๋„ˆ๋ฌด ๊ธธ๋ฉด ์•ž๋ถ€๋ถ„๋งŒ ์ž๋ฆ„ (API ํ† ํฐ ์ œํ•œ ๋ฐ ์ˆ˜๋Šฅ ์ง€๋ฌธ ๋ถ„๋Ÿ‰ ๊ณ ๋ ค)
        return text[:4000]
    except Exception as e:
        return str(e)

def generate_tts(text):
    """์˜์–ด ์ง€๋ฌธ์„ ์Œ์„ฑ(mp3)์œผ๋กœ ๋ณ€ํ™˜ํ•ฉ๋‹ˆ๋‹ค."""
    try:
        tts = gTTS(text=text, lang='en', slow=False)
        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
        tts.save(temp_file.name)
        return temp_file.name
    except Exception:
        return None

def process_suneung_material():
    """์ „์ฒด ํŒŒ์ดํ”„๋ผ์ธ: ๊ธฐ์‚ฌ ์ˆ˜์ง‘ -> AI ํ”„๋กฌํ”„ํŠธ ์š”์ฒญ -> TTS ์ƒ์„ฑ -> ๊ฒฐ๊ณผ ๋ฐ˜ํ™˜"""
    title, link, site_name = fetch_random_article()
    
    if not link:
        return "๊ธฐ์‚ฌ๋ฅผ ๊ฐ€์ ธ์˜ค๋Š”๋ฐ ์‹คํŒจํ–ˆ์Šต๋‹ˆ๋‹ค.", "", "", "", None
        
    raw_text = scrape_article_text(link)
    
    if len(raw_text) < 200:
        return "๊ธฐ์‚ฌ ๋ณธ๋ฌธ์ด ๋„ˆ๋ฌด ์งง๊ฑฐ๋‚˜ ํฌ๋กค๋ง์ด ์ฐจ๋‹จ๋œ ์‚ฌ์ดํŠธ์ž…๋‹ˆ๋‹ค. ๋‹ค์‹œ ์‹œ๋„ํ•ด์ฃผ์„ธ์š”.", "", "", "", None

    # ์ˆ˜๋Šฅ ์ถœ์ œ ์œ„์› ๊ด€์ ์˜ Gemini ํ”„๋กฌํ”„ํŠธ
    prompt = f"""
    You are an expert in creating English questions for the Korean CSAT (์ˆ˜๋Šฅ ์˜์–ด ์ถœ์ œ์œ„์›).
    Read the following article excerpt and perform the tasks below.
    
    [Source Material]
    {raw_text}
    
    [Tasks]
    1. **CSAT Passage (์ˆ˜๋Šฅํ˜• ์ง€๋ฌธ)**: Rewrite and summarize the core idea into a logically coherent passage of exactly 150-180 words. The vocabulary and sentence structure must match the high difficulty level of the Korean CSAT English section. DO NOT include the blank or altered sequence here; just provide the perfect base passage.
    2. **Vocabulary (์ฃผ์š” ์–ดํœ˜)**: Extract 7-10 high-level vocabulary words from your generated passage. Format strictly as: "English Word | Korean Meaning | Synonym".
    3. **Fill-in-the-blank Question (๋นˆ์นธ ์ถ”๋ก  ๋ฌธ์ œ)**: Create a CSAT-style fill-in-the-blank question using the passage. Replace a crucial thematic phrase with "________". Provide 5 multiple-choice options (1 correct, 4 highly attractive distractors).
    4. **Sentence Ordering Question (๊ธ€์˜ ์ˆœ์„œ ๋ฌธ์ œ)**: Take the same passage, keep the first sentence as the given box, and divide the rest into three parts (A), (B), and (C) in a scrambled order. Provide the correct order.

    Format the output cleanly with the following headers:
    [์ˆ˜๋Šฅํ˜• ์ง€๋ฌธ]
    [์ฃผ์š” ์–ดํœ˜]
    [๋นˆ์นธ ์ถ”๋ก  ๋ฌธ์ œ]
    [๊ธ€์˜ ์ˆœ์„œ ๋ฌธ์ œ]
    """

    try:
        # Gemini 1.5 Flash ๋˜๋Š” Pro ๋ชจ๋ธ ์‚ฌ์šฉ ๊ถŒ์žฅ
        model = genai.GenerativeModel('gemini-1.5-flash')
        response = model.generate_content(prompt)
        result_text = response.text
        
        # ์ƒ์„ฑ๋œ ํ…์ŠคํŠธ์—์„œ '[์ˆ˜๋Šฅํ˜• ์ง€๋ฌธ]' ๋ถ€๋ถ„๋งŒ ์ถ”์ถœํ•˜์—ฌ TTS ์ƒ์„ฑ์šฉ์œผ๋กœ ์‚ฌ์šฉ
        passage_for_tts = ""
        if "[์ˆ˜๋Šฅํ˜• ์ง€๋ฌธ]" in result_text and "[์ฃผ์š” ์–ดํœ˜]" in result_text:
            passage_for_tts = result_text.split("[์ฃผ์š” ์–ดํœ˜]")[0].replace("[์ˆ˜๋Šฅํ˜• ์ง€๋ฌธ]", "").strip()
        else:
            passage_for_tts = "Could not parse passage for audio."

        audio_path = generate_tts(passage_for_tts)
        
        source_info = f"**์ถœ์ฒ˜:** {site_name} - [{title}]({link})"
        
        return result_text, source_info, audio_path
        
    except Exception as e:
        return f"AI ์ฒ˜๋ฆฌ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}", "", None

# --- Gradio UI ๊ตฌ์„ฑ ---
with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("# ๐ŸŽ“ ์ˆ˜๋Šฅ ์˜์–ด ๋ชจ์˜ ๋ฌธํ•ญ ์ž๋™ ์ƒ์„ฑ๊ธฐ")
    gr.Markdown("Nautilus, Scientific American, The Conversation, Psyche, Aeon์—์„œ ๋žœ๋ค์œผ๋กœ ๊ธฐ์‚ฌ๋ฅผ ๋ฐœ์ทŒํ•˜์—ฌ ์ˆ˜๋Šฅํ˜• ์ง€๋ฌธ๊ณผ ๋ฌธํ•ญ์„ ์ œ์ž‘ํ•ฉ๋‹ˆ๋‹ค.")
    
    with gr.Row():
        generate_btn = gr.Button("๋žœ๋ค ์ง€๋ฌธ ๋ฐ ๋ฌธ์ œ ์ƒ์„ฑํ•˜๊ธฐ (Click)", variant="primary")
    
    with gr.Row():
        with gr.Column(scale=2):
            output_content = gr.Markdown(label="์ถœ์ œ ๋‚ด์šฉ")
            source_display = gr.Markdown(label="์ง€๋ฌธ ์ถœ์ฒ˜")
        
        with gr.Column(scale=1):
            audio_player = gr.Audio(label="์ง€๋ฌธ ๋“ฃ๊ธฐ (TTS)", type="filepath", interactive=False)
            
    # ๋ฒ„ํŠผ ํด๋ฆญ ์‹œ ํ•จ์ˆ˜ ์—ฐ๊ฒฐ (๋กœ๋”ฉ ์• ๋‹ˆ๋ฉ”์ด์…˜ ํฌํ•จ)
    generate_btn.click(
        fn=process_suneung_material,
        inputs=[],
        outputs=[output_content, source_display, audio_player]
    )

if __name__ == "__main__":
    demo.launch()