lingscope_app / app.py
Jey813's picture
Update app.py
fb7b386 verified
import os
import json
import tempfile
import gradio as gr
import openai
from transformers import pipeline
# =========================
# ๐Ÿ”ง ์„ค์ •
# =========================
openai.api_key = os.environ.get("OPENAI_API_KEY")
# Hugging Face ๋ฒˆ์—ญ ํŒŒ์ดํ”„๋ผ์ธ (์–‘๋ฐฉํ–ฅ ํฌํ•จ)
translator_ko_to_en = pipeline("translation", model="Helsinki-NLP/opus-mt-ko-en")
translator_ko_to_de = pipeline("translation", model="Helsinki-NLP/opus-mt-ko-de")
translator_en_to_ko = pipeline("translation", model="Helsinki-NLP/opus-mt-en-ko")
translator_de_to_ko = pipeline("translation", model="Helsinki-NLP/opus-mt-de-ko")
# =========================
# ๐Ÿง  ์œ ํ‹ธ: OpenAI ํ˜ธ์ถœ
# =========================
def gpt(messages, temperature=0.7, model="gpt-4"):
"""๋‹จ์ผ ChatCompletion ๋ž˜ํผ"""
resp = openai.ChatCompletion.create(
model=model,
messages=messages,
temperature=temperature
)
return resp.choices[0].message["content"].strip()
# =========================
# ๐Ÿงฉ ํ•ต์‹ฌ ๋กœ์ง
# =========================
def make_variants(input_text, source_lang, target_lang, direct_translation):
"""
์ง์—ญ์„ ๊ธฐ์ค€์œผ๋กœ ์›์–ด๋ฏผ์ด ์ž์—ฐ์Šค๋Ÿฝ๊ฒŒ ์“ฐ๋Š” ๋ณ€ํ˜• ๋ฒˆ์—ญ 2๊ฐœ๋ฅผ ์ถ”๊ฐ€๋กœ ์ƒ์„ฑ (์ด 3๊ฐœ)
"""
sys_msg = "You are a bilingual translator who produces concise, natural alternatives."
user_msg = f"""
[์›๋ฌธ] ({source_lang}): {input_text}
[์ง์—ญ] ({target_lang}): {direct_translation}
์œ„ ์ง์—ญ์„ ๊ธฐ์ค€์œผ๋กœ, {target_lang} ์›์–ด๋ฏผ์ด ์‹ค์ œ๋กœ ๋งŽ์ด ์“ฐ๋Š” ์ž์—ฐ์Šค๋Ÿฌ์šด ๋ณ€ํ˜• 2๊ฐ€์ง€๋ฅผ ๋งŒ๋“ค์–ด์ค˜.
- ๋งฅ๋ฝ: ์ผ์ƒ ๋Œ€ํ™” ๊ธฐ์ค€
- ๊ฐ ๋ณ€ํ˜•์€ 1๋ฌธ์žฅ
- ๊ณผ์žฅ/์Šฌ๋žญ์€ ๊ณผํ•˜์ง€ ์•Š๊ฒŒ
- ์ถœ๋ ฅ ํ˜•์‹:
1) ๋ณ€ํ˜•A: ...
2) ๋ณ€ํ˜•B: ...
"""
out = gpt([{"role":"system","content":sys_msg},{"role":"user","content":user_msg}], temperature=0.6)
# ๊ฐ„๋‹จ ํŒŒ์‹ฑ
variants = [direct_translation]
for line in out.splitlines():
line = line.strip()
if line.startswith("1)") or line.lower().startswith("๋ณ€ํ˜•a"):
variants.append(line.split(":",1)[1].strip() if ":" in line else line)
elif line.startswith("2)") or line.lower().startswith("๋ณ€ํ˜•b"):
variants.append(line.split(":",1)[1].strip() if ":" in line else line)
# fallback
return variants[:3] if len(variants)>=3 else (variants + ["", ""])[:3]
def back_translate_list(variants, source_lang, target_lang):
"""๊ฐ ๋ณ€ํ˜• ๋ฒˆ์—ญ์„ ๋ชจ๊ตญ์–ด๋กœ ์—ญ๋ฒˆ์—ญํ•˜์—ฌ ๋น„๊ต ํ…Œ์ด๋ธ”์šฉ ๋ฐ์ดํ„ฐ ์ƒ์„ฑ"""
back_list = []
for v in variants:
if not v:
back_list.append("")
continue
if source_lang == "ํ•œ๊ตญ์–ด" and target_lang == "์˜์–ด":
back_ = translator_en_to_ko(v)[0]["translation_text"]
elif source_lang == "ํ•œ๊ตญ์–ด" and target_lang == "๋…์ผ์–ด":
back_ = translator_de_to_ko(v)[0]["translation_text"]
else:
back_ = "(์—ญ๋ฒˆ์—ญ ๋ฏธ์ง€์›)"
back_list.append(back_)
return back_list
def build_explanations(input_text, variants, source_lang, target_lang):
"""ํ‘œํ˜„/๋ฌธ๋ฒ•/๋‹จ์–ด/๋ฌธํ™” ์„ค๋ช…์„ ์„น์…˜๋ณ„ ๋งˆํฌ๋‹ค์šด์œผ๋กœ ์ƒ์„ฑ"""
best = variants[0] if variants else ""
sys_msg = "You are a concise yet friendly language tutor who explains in Korean with clear headings and bullet points."
user_msg = f"""
๋‹ค์Œ ํ‘œํ˜„์— ๋Œ€ํ•ด ํ•œ๊ตญ์–ด๋กœ ์„ค๋ช…ํ•ด์ค˜. ๊ฐ„๊ฒฐํ•˜์ง€๋งŒ ํ•ต์‹ฌ์€ ๋น ์ง์—†์ด.
[์›๋ฌธ] ({source_lang}): {input_text}
[๋Œ€ํ‘œ ๋ฒˆ์—ญ] ({target_lang}): {best}
[๋‹ค๋ฅธ ๋ณ€ํ˜• ๋ฒˆ์—ญ๋“ค]: {variants[1:]}
์•„๋ž˜ ์„น์…˜ ์ œ๋ชฉ์„ ๊ทธ๋Œ€๋กœ ์‚ฌ์šฉํ•ด:
## ํ‘œํ˜„ ์„ค๋ช…
- ์–ด๋–ค ์ƒํ™ฉ/๊ด€๊ณ„์—์„œ ์“ฐ๋Š”์ง€, ๋‰˜์•™์Šค(๊ฒฉ์‹/์นœ๊ทผ๊ฐ)
## ๋ฌธ๋ฒ• ํฌ์ธํŠธ
- ํ•ต์‹ฌ ๋ฌธ๋ฒ• ์š”์†Œ 2~3๊ฐœ (์กฐ์‚ฌ/์ „์น˜์‚ฌ, ์‹œ์ œ, ์–ด์ˆœ ๋“ฑ)
- ๊ฐ„๋‹จ ์˜ˆ๋ฌธ ๊ฐ 1๊ฐœ
## ๋‹จ์–ด/ํ‘œํ˜„ ์„ค๋ช…
- ์–ด๋ ค์šธ ์ˆ˜ ์žˆ๋Š” ๋‹จ์–ด/๊ตฌ์ ˆ 3๊ฐœ: ์˜๋ฏธ + ์งง์€ ์˜ˆ๋ฌธ
## ๋ฌธํ™”์  ์ฐจ์ด
- ํ•œ๊ตญ์–ด์™€ ๋Œ€์ƒ ์–ธ์–ด ์‚ฌ์ด์˜ ๊ธฐ๋Œ€/์˜ˆ์˜/๊ด€์Šต ์ฐจ์ด 2~3๊ฐ€์ง€
"""
return gpt([{"role":"system","content":sys_msg},{"role":"user","content":user_msg}], temperature=0.5)
def build_pronunciation(input_text, variants, source_lang, target_lang):
"""๋ฐœ์Œ ๊ฐ€์ด๋“œ(ํ…์ŠคํŠธ). IPA/๊ฐ•์„ธ/๋ฆฌ๋“ฌ ํฌ์ธํŠธ"""
best = variants[0] if variants else ""
sys_msg = "You provide compact pronunciation guides (IPA-ish, stress, rhythm)."
user_msg = f"""
๋‹ค์Œ ๋‘ ๋ฌธ์žฅ์— ๋Œ€ํ•œ ๋ฐœ์Œ ๊ฐ€์ด๋“œ๋ฅผ ํ•œ๊ตญ์–ด๋กœ ๊ฐ„๋‹จํžˆ ์ ์–ด์ค˜.
[์›๋ฌธ] ({source_lang}): {input_text}
[๋Œ€ํ‘œ ๋ฒˆ์—ญ] ({target_lang}): {best}
ํ˜•์‹:
- ์›๋ฌธ: (๊ฐ€๋Šฅํ•˜๋ฉด ๊ฐ„๋‹จ IPA/ํ•œ๊ธ€ํ‘œ๊ธฐ) + ๊ฐ•์„ธ/๋ฆฌ๋“ฌ ํฌ์ธํŠธ
- ๋ฒˆ์—ญ: (IPA/๊ฐ•์„ธ) + ์ž์—ฐ์Šค๋Ÿฌ์šด ์–ต์–‘ ํŒ
"""
return gpt([{"role":"system","content":sys_msg},{"role":"user","content":user_msg}], temperature=0.4)
def build_roleplay(input_text, variants, target_lang):
"""๊ฒฉ์‹/์นœ๊ทผ 2๊ฐ€์ง€ ํ†ค์˜ ์งง์€ Role Play"""
best = variants[0] if variants else ""
sys_msg = "You create short, practical role-play dialogues for language learners."
user_msg = f"""
๋‹ค์Œ ํ‘œํ˜„์„ ํ™œ์šฉํ•œ ์งง์€ ๋Œ€ํ™” 2๊ฐ€์ง€๋ฅผ ๋งŒ๋“ค์–ด์ค˜. ๊ฐ ๋Œ€ํ™”๋Š” 6~8 ํ„ด.
- ํ†ค1: ๊ฒฉ์‹(์ง์žฅ/๊ณต์ ์ธ ์ƒํ™ฉ)
- ํ†ค2: ์นœ๊ทผ(์นœ๊ตฌ/๊ฐ€๋ฒผ์šด ์ƒํ™ฉ)
- ๋Œ€์ƒ ์–ธ์–ด: {target_lang}
- ๋Œ€ํ™” ํ›„ ํ•œ๊ตญ์–ด ์š”์•ฝ ํ•œ ์ค„
ํ‘œํ˜„: "{best}"
"""
return gpt([{"role":"system","content":sys_msg},{"role":"user","content":user_msg}], temperature=0.7)
def suggest_resources(input_text, target_lang):
"""ํ•™์Šต ์ž๋ฃŒ ์ถ”์ฒœ: ์œ ํŠœ๋ธŒ/๊ฒ€์ƒ‰ ํ‚ค์›Œ๋“œ"""
sys_msg = "You suggest search keywords for YouTube and web to find usage contexts."
user_msg = f"""
์•„๋ž˜ ํ‘œํ˜„์„ ์‹ค์ œ ๋งฅ๋ฝ์—์„œ ๋ณผ ์ˆ˜ ์žˆ๋Š” ์ž๋ฃŒ๋ฅผ ์ฐพ๊ธฐ ์œ„ํ•œ ๊ฒ€์ƒ‰ ํ‚ค์›Œ๋“œ๋ฅผ ์ œ์•ˆํ•ด์ค˜.
- ์–ธ์–ด: {target_lang}
- 5~7๊ฐœ ํ‚ค์›Œ๋“œ, ๋”ฐ์˜ดํ‘œ ์—†์ด, ํ•œ ์ค„์— ํ•˜๋‚˜
ํ‘œํ˜„: {input_text}
"""
out = gpt([{"role":"system","content":sys_msg},{"role":"user","content":user_msg}], temperature=0.5)
# ํด๋ฆญ ๊ฐ€๋Šฅํ•œ ๊ฒ€์ƒ‰ URL ๋ฌธ์ž์—ด ์ƒ์„ฑ
items = [s.strip("-โ€ข ").strip() for s in out.splitlines() if s.strip()]
md_lines = []
base = "https://www.youtube.com/results?search_query="
for k in items:
url = base + k.replace(" ", "+")
md_lines.append(f"- [{k}]({url})")
return "\n".join(md_lines)
# =========================
# ๐Ÿš€ ๋ฉ”์ธ ํ•จ์ˆ˜ (Gradio์— ์—ฐ๊ฒฐ)
# =========================
def run_pipeline(input_text, source_lang, target_lang, favorites_state):
if not input_text.strip():
return (
"", [], None, "", "", "", favorites_state, gr.update(visible=False), None
)
# 1) ๊ธฐ๋ณธ ๋ฒˆ์—ญ
if source_lang == "ํ•œ๊ตญ์–ด" and target_lang == "์˜์–ด":
direct = translator_ko_to_en(input_text)[0]['translation_text']
elif source_lang == "ํ•œ๊ตญ์–ด" and target_lang == "๋…์ผ์–ด":
direct = translator_ko_to_de(input_text)[0]['translation_text']
else:
return (
input_text, ["(์ง€์›๋˜์ง€ ์•Š๋Š” ์–ธ์–ด์Œ์ž…๋‹ˆ๋‹ค.)"], None, "(์ง€์›๋˜์ง€ ์•Š๋Š” ์–ธ์–ด์Œ)", "", "", favorites_state, gr.update(visible=False), None
)
# 2) ๋ณ€ํ˜• 3๊ฐ€์ง€
variants = make_variants(input_text, source_lang, target_lang, direct)
# 3) ์—ญ๋ฒˆ์—ญ ํ…Œ์ด๋ธ” ๋ฐ์ดํ„ฐ
backs = back_translate_list(variants, source_lang, target_lang)
back_table = {
"๋ฒˆ์—ญ(Variant)": variants,
"์—ญ๋ฒˆ์—ญ(๋ชจ๊ตญ์–ด)": backs
}
# 4) ์„ค๋ช… ์„น์…˜
explanations_md = build_explanations(input_text, variants, source_lang, target_lang)
# 5) ๋ฐœ์Œ ๊ฐ€์ด๋“œ
pron_md = build_pronunciation(input_text, variants, source_lang, target_lang)
# 6) Role Play
roleplay_md = build_roleplay(input_text, variants, target_lang)
# 7) ์ž๋ฃŒ ์ถ”์ฒœ
resources_md = suggest_resources(input_text, target_lang)
# 8) ์ฆ๊ฒจ์ฐพ๊ธฐ ์นด๋“œ(ํ˜„์žฌ ๊ฒฐ๊ณผ)
current_card = {
"์›๋ฌธ": input_text,
"๋Œ€ํ‘œ ๋ฒˆ์—ญ": variants[0],
"๋‹ค๋ฅธ ๋ณ€ํ˜•": variants[1:],
"์—ญ๋ฒˆ์—ญ": backs,
"์„ค๋ช…": explanations_md,
"๋ฐœ์Œ": pron_md,
"role_play": roleplay_md
}
# ๋‹ค์šด๋กœ๋“œ ํŒŒ์ผ์€ Save ๋ฒ„ํŠผ ํด๋ฆญ ์‹œ ์ƒ์„ฑํ•˜๋„๋ก ํ•˜๋ฏ€๋กœ ์—ฌ๊ธฐ์„œ๋Š” None
return (
input_text,
variants,
back_table,
explanations_md,
pron_md,
roleplay_md,
favorites_state,
gr.update(visible=True),
resources_md
)
def save_to_favorites(input_text, variants, backs, explanations_md, pron_md, roleplay_md, favorites_state):
if favorites_state is None:
favorites_state = []
entry = {
"์›๋ฌธ": input_text,
"๋ณ€ํ˜•๋ฒˆ์—ญ": variants,
"์—ญ๋ฒˆ์—ญ": backs,
"์„ค๋ช…": explanations_md,
"๋ฐœ์Œ": pron_md,
"role_play": roleplay_md
}
favorites_state.append(entry)
return favorites_state, f"์ €์žฅ ์™„๋ฃŒ! (์ด {len(favorites_state)}๊ฑด)"
def export_favorites(favorites_state):
if not favorites_state:
return None
fd, path = tempfile.mkstemp(suffix=".json")
with os.fdopen(fd, "w", encoding="utf-8") as f:
json.dump(favorites_state, f, ensure_ascii=False, indent=2)
return path
def load_sample(sample_text):
return gr.update(value=sample_text)
# =========================
# ๐ŸŽ›๏ธ Gradio UI
# =========================
with gr.Blocks(title="๐ŸŒ ๋ฌธํ™” ๊ฐ„ ํ‘œํ˜„ ๋น„๊ต + ๋ฌธ๋ฒ• & ์–ดํœ˜ ๋„์šฐ๋ฏธ (ํ™•์žฅํŒ)") as demo:
gr.Markdown("## ๐ŸŒ ๋ฌธํ™” ๊ฐ„ ํ‘œํ˜„ ๋น„๊ต + ๋ฌธ๋ฒ• & ์–ดํœ˜ ๋„์šฐ๋ฏธ\n์ž…๋ ฅํ•œ ํ‘œํ˜„์„ ๊ธฐ๋ฐ˜์œผ๋กœ **์ž์—ฐ์Šค๋Ÿฌ์šด ๋ฒˆ์—ญ 3๊ฐ€์ง€, ์—ญ๋ฒˆ์—ญ ๋น„๊ต, ๋ฌธ๋ฒ•/๋ฌธํ™” ์„ค๋ช…, Role Play, ๋ฐœ์Œ ๊ฐ€์ด๋“œ**๊นŒ์ง€ ํ•œ ๋ฒˆ์—!")
with gr.Row():
with gr.Column(scale=5):
input_text = gr.Textbox(label="๋น„๊ตํ•  ๋ฌธ์žฅ ์ž…๋ ฅ", placeholder="์˜ˆ: ๊ณ ์ƒํ–ˆ์–ด!", lines=2)
with gr.Row():
src_dd = gr.Dropdown(["ํ•œ๊ตญ์–ด"], label="๋ชจ๊ตญ์–ด ์„ ํƒ", value="ํ•œ๊ตญ์–ด")
tgt_dd = gr.Dropdown(["์˜์–ด", "๋…์ผ์–ด"], label="๋น„๊ต ์–ธ์–ด ์„ ํƒ", value="์˜์–ด")
with gr.Accordion("์ƒ˜ํ”Œ ๋ฌธ์žฅ ๋ถˆ๋Ÿฌ์˜ค๊ธฐ", open=False):
gr.Markdown("- ์ƒํ™ฉ๋ณ„๋กœ ๋ฐ”๋กœ ํ…Œ์ŠคํŠธํ•ด๋ณด์„ธ์š”.")
with gr.Row():
b1 = gr.Button("์นœ๊ตฌ ์œ„๋กœ: ๊ณ ์ƒํ–ˆ์–ด!")
b2 = gr.Button("๊ฒฉ๋ ค: ์ˆ˜๊ณ  ๋งŽ์•˜์–ด, ์ •๋ง ๊ณ ๋งˆ์›Œ.")
b3 = gr.Button("์—…๋ฌด: ์˜ค๋Š˜ ์ผ์ • ํ™•์ธ ๋ถ€ํƒ๋“œ๋ฆฝ๋‹ˆ๋‹ค.")
submit = gr.Button("๐Ÿš€ Submit", variant="primary")
with gr.Column(scale=7):
tabs = gr.Tabs()
with tabs:
with gr.Tab("๊ฒฐ๊ณผ ์š”์•ฝ"):
orig_out = gr.Textbox(label="์›๋ฌธ", interactive=False)
variants_out = gr.HighlightedText(
label="๋ฒˆ์—ญ 3๊ฐ€์ง€ (์ง์—ญ + ์ž์—ฐ์Šค๋Ÿฌ์šด ๋ณ€ํ˜•)",
combine_adjacent=True
)
resources_md = gr.Markdown(visible=False)
with gr.Tab("์—ญ๋ฐฉํ–ฅ ๋น„๊ต"):
back_table = gr.Dataframe(headers=["๋ฒˆ์—ญ(Variant)", "์—ญ๋ฒˆ์—ญ(๋ชจ๊ตญ์–ด)"], interactive=False)
with gr.Tab("์„ค๋ช…"):
explain_out = gr.Markdown()
with gr.Tab("๋ฐœ์Œ ๊ฐ€์ด๋“œ"):
pron_out = gr.Markdown()
with gr.Tab("Role Play"):
role_out = gr.Markdown()
with gr.Tab("์ฆ๊ฒจ์ฐพ๊ธฐ"):
fav_state = gr.State([])
save_btn = gr.Button("โญ ํ˜„์žฌ ๊ฒฐ๊ณผ ์ €์žฅ")
save_status = gr.Markdown("")
export_btn = gr.Button("โฌ‡๏ธ ์ฆ๊ฒจ์ฐพ๊ธฐ JSON ๋‚ด๋ณด๋‚ด๊ธฐ")
export_file = gr.File(label="๋‹ค์šด๋กœ๋“œ ํŒŒ์ผ")
# ---------- ์ด๋ฒคํŠธ ๋ฐ”์ธ๋”ฉ ----------
submit.click(
fn=run_pipeline,
inputs=[input_text, src_dd, tgt_dd, fav_state],
outputs=[orig_out, variants_out, back_table, explain_out, pron_out, role_out, fav_state, resources_md, resources_md],
)
# ์ƒ˜ํ”Œ ๋ฒ„ํŠผ
b1.click(fn=load_sample, inputs=None, outputs=input_text, _js=None, kwargs={"sample_text":"๊ณ ์ƒํ–ˆ์–ด!"})
b2.click(fn=load_sample, inputs=None, outputs=input_text, kwargs={"sample_text":"์ˆ˜๊ณ  ๋งŽ์•˜์–ด, ์ •๋ง ๊ณ ๋งˆ์›Œ."})
b3.click(fn=load_sample, inputs=None, outputs=input_text, kwargs={"sample_text":"์˜ค๋Š˜ ์ผ์ • ํ™•์ธ ๋ถ€ํƒ๋“œ๋ฆฝ๋‹ˆ๋‹ค."})
# ์ฆ๊ฒจ์ฐพ๊ธฐ ์ €์žฅ
save_btn.click(
fn=save_to_favorites,
inputs=[orig_out, variants_out, back_table, explain_out, pron_out, role_out, fav_state],
outputs=[fav_state, save_status]
)
# ๋‚ด๋ณด๋‚ด๊ธฐ
export_btn.click(fn=export_favorites, inputs=[fav_state], outputs=[export_file])
demo.launch()