englissi commited on
Commit
3de3af7
ยท
verified ยท
1 Parent(s): 77fbacd

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +145 -0
app.py ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import random
3
+ import feedparser
4
+ import requests
5
+ from bs4 import BeautifulSoup
6
+ from gtts import gTTS
7
+ import tempfile
8
+ import google.generativeai as genai
9
+ import os
10
+
11
+ # --- ์„ค์ • ์‚ฌํ•ญ ---
12
+ # ํ—ˆ๊น…ํŽ˜์ด์Šค Secrets ๋˜๋Š” ์‹œ์Šคํ…œ ํ™˜๊ฒฝ๋ณ€์ˆ˜์—์„œ API ํ‚ค๋ฅผ ์•ˆ์ „ํ•˜๊ฒŒ ๋ถˆ๋Ÿฌ์˜ต๋‹ˆ๋‹ค.
13
+ GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
14
+
15
+ if not GEMINI_API_KEY:
16
+ raise ValueError("API ํ‚ค๊ฐ€ ์„ค์ •๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค. ํ—ˆ๊น…ํŽ˜์ด์Šค Settings -> Secrets์—์„œ GEMINI_API_KEY๋ฅผ ์ถ”๊ฐ€ํ•ด์ฃผ์„ธ์š”.")
17
+
18
+ genai.configure(api_key=GEMINI_API_KEY)
19
+
20
+ # ์ˆ˜๋Šฅ ์ง€๋ฌธ ์ถœ์ฒ˜์šฉ RSS ํ”ผ๋“œ ๋ชฉ๋ก
21
+ RSS_FEEDS = {
22
+ "Aeon": "https://aeon.co/feed.rss",
23
+ "Psyche": "https://psyche.co/feed",
24
+ "The Conversation": "https://theconversation.com/global/articles.atom",
25
+ "Scientific American": "https://rss.sciam.com/ScientificAmerican-Global",
26
+ "Nautilus": "https://nautil.us/feed/"
27
+ }
28
+
29
+ def fetch_random_article():
30
+ """RSS ํ”ผ๋“œ์—์„œ ๋žœ๋ค์œผ๋กœ ๊ธฐ์‚ฌ URL๊ณผ ์ œ๋ชฉ์„ ๊ฐ€์ ธ์˜ต๋‹ˆ๋‹ค."""
31
+ site_name, feed_url = random.choice(list(RSS_FEEDS.items()))
32
+ feed = feedparser.parse(feed_url)
33
+
34
+ if not feed.entries:
35
+ return None, None, site_name
36
+
37
+ random_entry = random.choice(feed.entries)
38
+ return random_entry.title, random_entry.link, site_name
39
+
40
+ def scrape_article_text(url):
41
+ """URL์—์„œ ๋ณธ๋ฌธ ํ…์ŠคํŠธ๋ฅผ ํฌ๋กค๋งํ•ฉ๋‹ˆ๋‹ค."""
42
+ try:
43
+ headers = {'User-Agent': 'Mozilla/5.0'}
44
+ response = requests.get(url, headers=headers, timeout=10)
45
+ soup = BeautifulSoup(response.content, 'html.parser')
46
+
47
+ # p ํƒœ๊ทธ ์œ„์ฃผ๋กœ ํ…์ŠคํŠธ ์ถ”์ถœ (์‚ฌ์ดํŠธ๋ณ„ ๊ตฌ์กฐ๊ฐ€ ๋‹ค๋ฅด๋ฏ€๋กœ ๋ฒ”์šฉ์ ์ธ ๋ฐฉ์‹ ์‚ฌ์šฉ)
48
+ paragraphs = soup.find_all('p')
49
+ text = ' '.join([p.get_text() for p in paragraphs])
50
+
51
+ # ํ…์ŠคํŠธ๊ฐ€ ๋„ˆ๋ฌด ๊ธธ๋ฉด ์•ž๋ถ€๋ถ„๋งŒ ์ž๋ฆ„ (API ํ† ํฐ ์ œํ•œ ๋ฐ ์ˆ˜๋Šฅ ์ง€๋ฌธ ๋ถ„๋Ÿ‰ ๊ณ ๋ ค)
52
+ return text[:4000]
53
+ except Exception as e:
54
+ return str(e)
55
+
56
+ def generate_tts(text):
57
+ """์˜์–ด ์ง€๋ฌธ์„ ์Œ์„ฑ(mp3)์œผ๋กœ ๋ณ€ํ™˜ํ•ฉ๋‹ˆ๋‹ค."""
58
+ try:
59
+ tts = gTTS(text=text, lang='en', slow=False)
60
+ temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
61
+ tts.save(temp_file.name)
62
+ return temp_file.name
63
+ except Exception:
64
+ return None
65
+
66
+ def process_suneung_material():
67
+ """์ „์ฒด ํŒŒ์ดํ”„๋ผ์ธ: ๊ธฐ์‚ฌ ์ˆ˜์ง‘ -> AI ํ”„๋กฌํ”„ํŠธ ์š”์ฒญ -> TTS ์ƒ์„ฑ -> ๊ฒฐ๊ณผ ๋ฐ˜ํ™˜"""
68
+ title, link, site_name = fetch_random_article()
69
+
70
+ if not link:
71
+ return "๊ธฐ์‚ฌ๋ฅผ ๊ฐ€์ ธ์˜ค๋Š”๋ฐ ์‹คํŒจํ–ˆ์Šต๋‹ˆ๋‹ค.", "", "", "", None
72
+
73
+ raw_text = scrape_article_text(link)
74
+
75
+ if len(raw_text) < 200:
76
+ return "๊ธฐ์‚ฌ ๋ณธ๋ฌธ์ด ๋„ˆ๋ฌด ์งง๊ฑฐ๋‚˜ ํฌ๋กค๋ง์ด ์ฐจ๋‹จ๋œ ์‚ฌ์ดํŠธ์ž…๋‹ˆ๋‹ค. ๋‹ค์‹œ ์‹œ๋„ํ•ด์ฃผ์„ธ์š”.", "", "", "", None
77
+
78
+ # ์ˆ˜๋Šฅ ์ถœ์ œ ์œ„์› ๊ด€์ ์˜ Gemini ํ”„๋กฌํ”„ํŠธ
79
+ prompt = f"""
80
+ You are an expert in creating English questions for the Korean CSAT (์ˆ˜๋Šฅ ์˜์–ด ์ถœ์ œ์œ„์›).
81
+ Read the following article excerpt and perform the tasks below.
82
+
83
+ [Source Material]
84
+ {raw_text}
85
+
86
+ [Tasks]
87
+ 1. **CSAT Passage (์ˆ˜๋Šฅํ˜• ์ง€๋ฌธ)**: Rewrite and summarize the core idea into a logically coherent passage of exactly 150-180 words. The vocabulary and sentence structure must match the high difficulty level of the Korean CSAT English section. DO NOT include the blank or altered sequence here; just provide the perfect base passage.
88
+ 2. **Vocabulary (์ฃผ์š” ์–ดํœ˜)**: Extract 7-10 high-level vocabulary words from your generated passage. Format strictly as: "English Word | Korean Meaning | Synonym".
89
+ 3. **Fill-in-the-blank Question (๋นˆ์นธ ์ถ”๋ก  ๋ฌธ์ œ)**: Create a CSAT-style fill-in-the-blank question using the passage. Replace a crucial thematic phrase with "________". Provide 5 multiple-choice options (1 correct, 4 highly attractive distractors).
90
+ 4. **Sentence Ordering Question (๊ธ€์˜ ์ˆœ์„œ ๋ฌธ์ œ)**: Take the same passage, keep the first sentence as the given box, and divide the rest into three parts (A), (B), and (C) in a scrambled order. Provide the correct order.
91
+
92
+ Format the output cleanly with the following headers:
93
+ [์ˆ˜๋Šฅํ˜• ์ง€๋ฌธ]
94
+ [์ฃผ์š” ์–ดํœ˜]
95
+ [๋นˆ์นธ ์ถ”๋ก  ๋ฌธ์ œ]
96
+ [๊ธ€์˜ ์ˆœ์„œ ๋ฌธ์ œ]
97
+ """
98
+
99
+ try:
100
+ # Gemini 1.5 Flash ๋˜๋Š” Pro ๋ชจ๋ธ ์‚ฌ์šฉ ๊ถŒ์žฅ
101
+ model = genai.GenerativeModel('gemini-1.5-flash')
102
+ response = model.generate_content(prompt)
103
+ result_text = response.text
104
+
105
+ # ์ƒ์„ฑ๋œ ํ…์ŠคํŠธ์—์„œ '[์ˆ˜๋Šฅํ˜• ์ง€๋ฌธ]' ๋ถ€๋ถ„๋งŒ ์ถ”์ถœํ•˜์—ฌ TTS ์ƒ์„ฑ์šฉ์œผ๋กœ ์‚ฌ์šฉ
106
+ passage_for_tts = ""
107
+ if "[์ˆ˜๋Šฅํ˜• ์ง€๋ฌธ]" in result_text and "[์ฃผ์š” ์–ดํœ˜]" in result_text:
108
+ passage_for_tts = result_text.split("[์ฃผ์š” ์–ดํœ˜]")[0].replace("[์ˆ˜๋Šฅํ˜• ์ง€๋ฌธ]", "").strip()
109
+ else:
110
+ passage_for_tts = "Could not parse passage for audio."
111
+
112
+ audio_path = generate_tts(passage_for_tts)
113
+
114
+ source_info = f"**์ถœ์ฒ˜:** {site_name} - [{title}]({link})"
115
+
116
+ return result_text, source_info, audio_path
117
+
118
+ except Exception as e:
119
+ return f"AI ์ฒ˜๋ฆฌ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}", "", None
120
+
121
+ # --- Gradio UI ๊ตฌ์„ฑ ---
122
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
123
+ gr.Markdown("# ๐ŸŽ“ ์ˆ˜๋Šฅ ์˜์–ด ๋ชจ์˜ ๋ฌธํ•ญ ์ž๋™ ์ƒ์„ฑ๊ธฐ")
124
+ gr.Markdown("Nautilus, Scientific American, The Conversation, Psyche, Aeon์—์„œ ๋žœ๋ค์œผ๋กœ ๊ธฐ์‚ฌ๋ฅผ ๋ฐœ์ทŒํ•˜์—ฌ ์ˆ˜๋Šฅํ˜• ์ง€๋ฌธ๊ณผ ๋ฌธํ•ญ์„ ์ œ์ž‘ํ•ฉ๋‹ˆ๋‹ค.")
125
+
126
+ with gr.Row():
127
+ generate_btn = gr.Button("๋žœ๋ค ์ง€๋ฌธ ๋ฐ ๋ฌธ์ œ ์ƒ์„ฑํ•˜๊ธฐ (Click)", variant="primary")
128
+
129
+ with gr.Row():
130
+ with gr.Column(scale=2):
131
+ output_content = gr.Markdown(label="์ถœ์ œ ๋‚ด์šฉ")
132
+ source_display = gr.Markdown(label="์ง€๋ฌธ ์ถœ์ฒ˜")
133
+
134
+ with gr.Column(scale=1):
135
+ audio_player = gr.Audio(label="์ง€๋ฌธ ๋“ฃ๊ธฐ (TTS)", type="filepath", interactive=False)
136
+
137
+ # ๋ฒ„ํŠผ ํด๋ฆญ ์‹œ ํ•จ์ˆ˜ ์—ฐ๊ฒฐ (๋กœ๋”ฉ ์• ๋‹ˆ๋ฉ”์ด์…˜ ํฌํ•จ)
138
+ generate_btn.click(
139
+ fn=process_suneung_material,
140
+ inputs=[],
141
+ outputs=[output_content, source_display, audio_player]
142
+ )
143
+
144
+ if __name__ == "__main__":
145
+ demo.launch()