englissi's picture
Create app.py
3de3af7 verified
import gradio as gr
import random
import feedparser
import requests
from bs4 import BeautifulSoup
from gtts import gTTS
import tempfile
import google.generativeai as genai
import os
# --- ์„ค์ • ์‚ฌํ•ญ ---
# ํ—ˆ๊น…ํŽ˜์ด์Šค Secrets ๋˜๋Š” ์‹œ์Šคํ…œ ํ™˜๊ฒฝ๋ณ€์ˆ˜์—์„œ API ํ‚ค๋ฅผ ์•ˆ์ „ํ•˜๊ฒŒ ๋ถˆ๋Ÿฌ์˜ต๋‹ˆ๋‹ค.
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
if not GEMINI_API_KEY:
raise ValueError("API ํ‚ค๊ฐ€ ์„ค์ •๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค. ํ—ˆ๊น…ํŽ˜์ด์Šค Settings -> Secrets์—์„œ GEMINI_API_KEY๋ฅผ ์ถ”๊ฐ€ํ•ด์ฃผ์„ธ์š”.")
genai.configure(api_key=GEMINI_API_KEY)
# ์ˆ˜๋Šฅ ์ง€๋ฌธ ์ถœ์ฒ˜์šฉ RSS ํ”ผ๋“œ ๋ชฉ๋ก
RSS_FEEDS = {
"Aeon": "https://aeon.co/feed.rss",
"Psyche": "https://psyche.co/feed",
"The Conversation": "https://theconversation.com/global/articles.atom",
"Scientific American": "https://rss.sciam.com/ScientificAmerican-Global",
"Nautilus": "https://nautil.us/feed/"
}
def fetch_random_article():
"""RSS ํ”ผ๋“œ์—์„œ ๋žœ๋ค์œผ๋กœ ๊ธฐ์‚ฌ URL๊ณผ ์ œ๋ชฉ์„ ๊ฐ€์ ธ์˜ต๋‹ˆ๋‹ค."""
site_name, feed_url = random.choice(list(RSS_FEEDS.items()))
feed = feedparser.parse(feed_url)
if not feed.entries:
return None, None, site_name
random_entry = random.choice(feed.entries)
return random_entry.title, random_entry.link, site_name
def scrape_article_text(url):
"""URL์—์„œ ๋ณธ๋ฌธ ํ…์ŠคํŠธ๋ฅผ ํฌ๋กค๋งํ•ฉ๋‹ˆ๋‹ค."""
try:
headers = {'User-Agent': 'Mozilla/5.0'}
response = requests.get(url, headers=headers, timeout=10)
soup = BeautifulSoup(response.content, 'html.parser')
# p ํƒœ๊ทธ ์œ„์ฃผ๋กœ ํ…์ŠคํŠธ ์ถ”์ถœ (์‚ฌ์ดํŠธ๋ณ„ ๊ตฌ์กฐ๊ฐ€ ๋‹ค๋ฅด๋ฏ€๋กœ ๋ฒ”์šฉ์ ์ธ ๋ฐฉ์‹ ์‚ฌ์šฉ)
paragraphs = soup.find_all('p')
text = ' '.join([p.get_text() for p in paragraphs])
# ํ…์ŠคํŠธ๊ฐ€ ๋„ˆ๋ฌด ๊ธธ๋ฉด ์•ž๋ถ€๋ถ„๋งŒ ์ž๋ฆ„ (API ํ† ํฐ ์ œํ•œ ๋ฐ ์ˆ˜๋Šฅ ์ง€๋ฌธ ๋ถ„๋Ÿ‰ ๊ณ ๋ ค)
return text[:4000]
except Exception as e:
return str(e)
def generate_tts(text):
"""์˜์–ด ์ง€๋ฌธ์„ ์Œ์„ฑ(mp3)์œผ๋กœ ๋ณ€ํ™˜ํ•ฉ๋‹ˆ๋‹ค."""
try:
tts = gTTS(text=text, lang='en', slow=False)
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
tts.save(temp_file.name)
return temp_file.name
except Exception:
return None
def process_suneung_material():
"""์ „์ฒด ํŒŒ์ดํ”„๋ผ์ธ: ๊ธฐ์‚ฌ ์ˆ˜์ง‘ -> AI ํ”„๋กฌํ”„ํŠธ ์š”์ฒญ -> TTS ์ƒ์„ฑ -> ๊ฒฐ๊ณผ ๋ฐ˜ํ™˜"""
title, link, site_name = fetch_random_article()
if not link:
return "๊ธฐ์‚ฌ๋ฅผ ๊ฐ€์ ธ์˜ค๋Š”๋ฐ ์‹คํŒจํ–ˆ์Šต๋‹ˆ๋‹ค.", "", "", "", None
raw_text = scrape_article_text(link)
if len(raw_text) < 200:
return "๊ธฐ์‚ฌ ๋ณธ๋ฌธ์ด ๋„ˆ๋ฌด ์งง๊ฑฐ๋‚˜ ํฌ๋กค๋ง์ด ์ฐจ๋‹จ๋œ ์‚ฌ์ดํŠธ์ž…๋‹ˆ๋‹ค. ๋‹ค์‹œ ์‹œ๋„ํ•ด์ฃผ์„ธ์š”.", "", "", "", None
# ์ˆ˜๋Šฅ ์ถœ์ œ ์œ„์› ๊ด€์ ์˜ Gemini ํ”„๋กฌํ”„ํŠธ
prompt = f"""
You are an expert in creating English questions for the Korean CSAT (์ˆ˜๋Šฅ ์˜์–ด ์ถœ์ œ์œ„์›).
Read the following article excerpt and perform the tasks below.
[Source Material]
{raw_text}
[Tasks]
1. **CSAT Passage (์ˆ˜๋Šฅํ˜• ์ง€๋ฌธ)**: Rewrite and summarize the core idea into a logically coherent passage of exactly 150-180 words. The vocabulary and sentence structure must match the high difficulty level of the Korean CSAT English section. DO NOT include the blank or altered sequence here; just provide the perfect base passage.
2. **Vocabulary (์ฃผ์š” ์–ดํœ˜)**: Extract 7-10 high-level vocabulary words from your generated passage. Format strictly as: "English Word | Korean Meaning | Synonym".
3. **Fill-in-the-blank Question (๋นˆ์นธ ์ถ”๋ก  ๋ฌธ์ œ)**: Create a CSAT-style fill-in-the-blank question using the passage. Replace a crucial thematic phrase with "________". Provide 5 multiple-choice options (1 correct, 4 highly attractive distractors).
4. **Sentence Ordering Question (๊ธ€์˜ ์ˆœ์„œ ๋ฌธ์ œ)**: Take the same passage, keep the first sentence as the given box, and divide the rest into three parts (A), (B), and (C) in a scrambled order. Provide the correct order.
Format the output cleanly with the following headers:
[์ˆ˜๋Šฅํ˜• ์ง€๋ฌธ]
[์ฃผ์š” ์–ดํœ˜]
[๋นˆ์นธ ์ถ”๋ก  ๋ฌธ์ œ]
[๊ธ€์˜ ์ˆœ์„œ ๋ฌธ์ œ]
"""
try:
# Gemini 1.5 Flash ๋˜๋Š” Pro ๋ชจ๋ธ ์‚ฌ์šฉ ๊ถŒ์žฅ
model = genai.GenerativeModel('gemini-1.5-flash')
response = model.generate_content(prompt)
result_text = response.text
# ์ƒ์„ฑ๋œ ํ…์ŠคํŠธ์—์„œ '[์ˆ˜๋Šฅํ˜• ์ง€๋ฌธ]' ๋ถ€๋ถ„๋งŒ ์ถ”์ถœํ•˜์—ฌ TTS ์ƒ์„ฑ์šฉ์œผ๋กœ ์‚ฌ์šฉ
passage_for_tts = ""
if "[์ˆ˜๋Šฅํ˜• ์ง€๋ฌธ]" in result_text and "[์ฃผ์š” ์–ดํœ˜]" in result_text:
passage_for_tts = result_text.split("[์ฃผ์š” ์–ดํœ˜]")[0].replace("[์ˆ˜๋Šฅํ˜• ์ง€๋ฌธ]", "").strip()
else:
passage_for_tts = "Could not parse passage for audio."
audio_path = generate_tts(passage_for_tts)
source_info = f"**์ถœ์ฒ˜:** {site_name} - [{title}]({link})"
return result_text, source_info, audio_path
except Exception as e:
return f"AI ์ฒ˜๋ฆฌ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}", "", None
# --- Gradio UI ๊ตฌ์„ฑ ---
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("# ๐ŸŽ“ ์ˆ˜๋Šฅ ์˜์–ด ๋ชจ์˜ ๋ฌธํ•ญ ์ž๋™ ์ƒ์„ฑ๊ธฐ")
gr.Markdown("Nautilus, Scientific American, The Conversation, Psyche, Aeon์—์„œ ๋žœ๋ค์œผ๋กœ ๊ธฐ์‚ฌ๋ฅผ ๋ฐœ์ทŒํ•˜์—ฌ ์ˆ˜๋Šฅํ˜• ์ง€๋ฌธ๊ณผ ๋ฌธํ•ญ์„ ์ œ์ž‘ํ•ฉ๋‹ˆ๋‹ค.")
with gr.Row():
generate_btn = gr.Button("๋žœ๋ค ์ง€๋ฌธ ๋ฐ ๋ฌธ์ œ ์ƒ์„ฑํ•˜๊ธฐ (Click)", variant="primary")
with gr.Row():
with gr.Column(scale=2):
output_content = gr.Markdown(label="์ถœ์ œ ๋‚ด์šฉ")
source_display = gr.Markdown(label="์ง€๋ฌธ ์ถœ์ฒ˜")
with gr.Column(scale=1):
audio_player = gr.Audio(label="์ง€๋ฌธ ๋“ฃ๊ธฐ (TTS)", type="filepath", interactive=False)
# ๋ฒ„ํŠผ ํด๋ฆญ ์‹œ ํ•จ์ˆ˜ ์—ฐ๊ฒฐ (๋กœ๋”ฉ ์• ๋‹ˆ๋ฉ”์ด์…˜ ํฌํ•จ)
generate_btn.click(
fn=process_suneung_material,
inputs=[],
outputs=[output_content, source_display, audio_player]
)
if __name__ == "__main__":
demo.launch()