Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import random | |
| import feedparser | |
| import requests | |
| from bs4 import BeautifulSoup | |
| from gtts import gTTS | |
| import tempfile | |
| import google.generativeai as genai | |
| import os | |
| # --- ์ค์ ์ฌํญ --- | |
| # ํ๊น ํ์ด์ค Secrets ๋๋ ์์คํ ํ๊ฒฝ๋ณ์์์ API ํค๋ฅผ ์์ ํ๊ฒ ๋ถ๋ฌ์ต๋๋ค. | |
| GEMINI_API_KEY = os.getenv("GEMINI_API_KEY") | |
| if not GEMINI_API_KEY: | |
| raise ValueError("API ํค๊ฐ ์ค์ ๋์ง ์์์ต๋๋ค. ํ๊น ํ์ด์ค Settings -> Secrets์์ GEMINI_API_KEY๋ฅผ ์ถ๊ฐํด์ฃผ์ธ์.") | |
| genai.configure(api_key=GEMINI_API_KEY) | |
| # ์๋ฅ ์ง๋ฌธ ์ถ์ฒ์ฉ RSS ํผ๋ ๋ชฉ๋ก | |
| RSS_FEEDS = { | |
| "Aeon": "https://aeon.co/feed.rss", | |
| "Psyche": "https://psyche.co/feed", | |
| "The Conversation": "https://theconversation.com/global/articles.atom", | |
| "Scientific American": "https://rss.sciam.com/ScientificAmerican-Global", | |
| "Nautilus": "https://nautil.us/feed/" | |
| } | |
| def fetch_random_article(): | |
| """RSS ํผ๋์์ ๋๋ค์ผ๋ก ๊ธฐ์ฌ URL๊ณผ ์ ๋ชฉ์ ๊ฐ์ ธ์ต๋๋ค.""" | |
| site_name, feed_url = random.choice(list(RSS_FEEDS.items())) | |
| feed = feedparser.parse(feed_url) | |
| if not feed.entries: | |
| return None, None, site_name | |
| random_entry = random.choice(feed.entries) | |
| return random_entry.title, random_entry.link, site_name | |
| def scrape_article_text(url): | |
| """URL์์ ๋ณธ๋ฌธ ํ ์คํธ๋ฅผ ํฌ๋กค๋งํฉ๋๋ค.""" | |
| try: | |
| headers = {'User-Agent': 'Mozilla/5.0'} | |
| response = requests.get(url, headers=headers, timeout=10) | |
| soup = BeautifulSoup(response.content, 'html.parser') | |
| # p ํ๊ทธ ์์ฃผ๋ก ํ ์คํธ ์ถ์ถ (์ฌ์ดํธ๋ณ ๊ตฌ์กฐ๊ฐ ๋ค๋ฅด๋ฏ๋ก ๋ฒ์ฉ์ ์ธ ๋ฐฉ์ ์ฌ์ฉ) | |
| paragraphs = soup.find_all('p') | |
| text = ' '.join([p.get_text() for p in paragraphs]) | |
| # ํ ์คํธ๊ฐ ๋๋ฌด ๊ธธ๋ฉด ์๋ถ๋ถ๋ง ์๋ฆ (API ํ ํฐ ์ ํ ๋ฐ ์๋ฅ ์ง๋ฌธ ๋ถ๋ ๊ณ ๋ ค) | |
| return text[:4000] | |
| except Exception as e: | |
| return str(e) | |
| def generate_tts(text): | |
| """์์ด ์ง๋ฌธ์ ์์ฑ(mp3)์ผ๋ก ๋ณํํฉ๋๋ค.""" | |
| try: | |
| tts = gTTS(text=text, lang='en', slow=False) | |
| temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") | |
| tts.save(temp_file.name) | |
| return temp_file.name | |
| except Exception: | |
| return None | |
| def process_suneung_material(): | |
| """์ ์ฒด ํ์ดํ๋ผ์ธ: ๊ธฐ์ฌ ์์ง -> AI ํ๋กฌํํธ ์์ฒญ -> TTS ์์ฑ -> ๊ฒฐ๊ณผ ๋ฐํ""" | |
| title, link, site_name = fetch_random_article() | |
| if not link: | |
| return "๊ธฐ์ฌ๋ฅผ ๊ฐ์ ธ์ค๋๋ฐ ์คํจํ์ต๋๋ค.", "", "", "", None | |
| raw_text = scrape_article_text(link) | |
| if len(raw_text) < 200: | |
| return "๊ธฐ์ฌ ๋ณธ๋ฌธ์ด ๋๋ฌด ์งง๊ฑฐ๋ ํฌ๋กค๋ง์ด ์ฐจ๋จ๋ ์ฌ์ดํธ์ ๋๋ค. ๋ค์ ์๋ํด์ฃผ์ธ์.", "", "", "", None | |
| # ์๋ฅ ์ถ์ ์์ ๊ด์ ์ Gemini ํ๋กฌํํธ | |
| prompt = f""" | |
| You are an expert in creating English questions for the Korean CSAT (์๋ฅ ์์ด ์ถ์ ์์). | |
| Read the following article excerpt and perform the tasks below. | |
| [Source Material] | |
| {raw_text} | |
| [Tasks] | |
| 1. **CSAT Passage (์๋ฅํ ์ง๋ฌธ)**: Rewrite and summarize the core idea into a logically coherent passage of exactly 150-180 words. The vocabulary and sentence structure must match the high difficulty level of the Korean CSAT English section. DO NOT include the blank or altered sequence here; just provide the perfect base passage. | |
| 2. **Vocabulary (์ฃผ์ ์ดํ)**: Extract 7-10 high-level vocabulary words from your generated passage. Format strictly as: "English Word | Korean Meaning | Synonym". | |
| 3. **Fill-in-the-blank Question (๋น์นธ ์ถ๋ก ๋ฌธ์ )**: Create a CSAT-style fill-in-the-blank question using the passage. Replace a crucial thematic phrase with "________". Provide 5 multiple-choice options (1 correct, 4 highly attractive distractors). | |
| 4. **Sentence Ordering Question (๊ธ์ ์์ ๋ฌธ์ )**: Take the same passage, keep the first sentence as the given box, and divide the rest into three parts (A), (B), and (C) in a scrambled order. Provide the correct order. | |
| Format the output cleanly with the following headers: | |
| [์๋ฅํ ์ง๋ฌธ] | |
| [์ฃผ์ ์ดํ] | |
| [๋น์นธ ์ถ๋ก ๋ฌธ์ ] | |
| [๊ธ์ ์์ ๋ฌธ์ ] | |
| """ | |
| try: | |
| # Gemini 1.5 Flash ๋๋ Pro ๋ชจ๋ธ ์ฌ์ฉ ๊ถ์ฅ | |
| model = genai.GenerativeModel('gemini-1.5-flash') | |
| response = model.generate_content(prompt) | |
| result_text = response.text | |
| # ์์ฑ๋ ํ ์คํธ์์ '[์๋ฅํ ์ง๋ฌธ]' ๋ถ๋ถ๋ง ์ถ์ถํ์ฌ TTS ์์ฑ์ฉ์ผ๋ก ์ฌ์ฉ | |
| passage_for_tts = "" | |
| if "[์๋ฅํ ์ง๋ฌธ]" in result_text and "[์ฃผ์ ์ดํ]" in result_text: | |
| passage_for_tts = result_text.split("[์ฃผ์ ์ดํ]")[0].replace("[์๋ฅํ ์ง๋ฌธ]", "").strip() | |
| else: | |
| passage_for_tts = "Could not parse passage for audio." | |
| audio_path = generate_tts(passage_for_tts) | |
| source_info = f"**์ถ์ฒ:** {site_name} - [{title}]({link})" | |
| return result_text, source_info, audio_path | |
| except Exception as e: | |
| return f"AI ์ฒ๋ฆฌ ์ค ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค: {str(e)}", "", None | |
| # --- Gradio UI ๊ตฌ์ฑ --- | |
| with gr.Blocks(theme=gr.themes.Soft()) as demo: | |
| gr.Markdown("# ๐ ์๋ฅ ์์ด ๋ชจ์ ๋ฌธํญ ์๋ ์์ฑ๊ธฐ") | |
| gr.Markdown("Nautilus, Scientific American, The Conversation, Psyche, Aeon์์ ๋๋ค์ผ๋ก ๊ธฐ์ฌ๋ฅผ ๋ฐ์ทํ์ฌ ์๋ฅํ ์ง๋ฌธ๊ณผ ๋ฌธํญ์ ์ ์ํฉ๋๋ค.") | |
| with gr.Row(): | |
| generate_btn = gr.Button("๋๋ค ์ง๋ฌธ ๋ฐ ๋ฌธ์ ์์ฑํ๊ธฐ (Click)", variant="primary") | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| output_content = gr.Markdown(label="์ถ์ ๋ด์ฉ") | |
| source_display = gr.Markdown(label="์ง๋ฌธ ์ถ์ฒ") | |
| with gr.Column(scale=1): | |
| audio_player = gr.Audio(label="์ง๋ฌธ ๋ฃ๊ธฐ (TTS)", type="filepath", interactive=False) | |
| # ๋ฒํผ ํด๋ฆญ ์ ํจ์ ์ฐ๊ฒฐ (๋ก๋ฉ ์ ๋๋ฉ์ด์ ํฌํจ) | |
| generate_btn.click( | |
| fn=process_suneung_material, | |
| inputs=[], | |
| outputs=[output_content, source_display, audio_player] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |