Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -29,17 +29,17 @@ try:
|
|
| 29 |
tts_model = VitsModel.from_pretrained("facebook/mms-tts-kaz").to(device)
|
| 30 |
tts_tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-kaz")
|
| 31 |
|
| 32 |
-
# Перевод ru
|
| 33 |
translator = pipeline(
|
| 34 |
"translation",
|
| 35 |
model="facebook/nllb-200-distilled-600M",
|
| 36 |
device=0 if device == "cuda" else -1
|
| 37 |
)
|
| 38 |
|
| 39 |
-
# Генерация вопросов
|
| 40 |
qa_model = pipeline(
|
| 41 |
"text2text-generation",
|
| 42 |
-
model="
|
| 43 |
device=0 if device == "cuda" else -1
|
| 44 |
)
|
| 45 |
|
|
@@ -55,35 +55,33 @@ except Exception as e:
|
|
| 55 |
|
| 56 |
def generate_quiz(text: str):
|
| 57 |
prompt = (
|
| 58 |
-
"
|
| 59 |
-
"Верни ТОЛЬКО JSON
|
| 60 |
"{\n"
|
| 61 |
" \"question\": \"...\",\n"
|
| 62 |
" \"correct\": \"...\",\n"
|
| 63 |
" \"wrong\": \"...\"\n"
|
| 64 |
"}\n"
|
| 65 |
-
"Без комментариев, без пояснений.\n"
|
| 66 |
f"TEXT: {text}"
|
| 67 |
)
|
| 68 |
|
| 69 |
-
#
|
| 70 |
out = qa_model(prompt, max_new_tokens=200)[0]["generated_text"].strip()
|
| 71 |
|
| 72 |
-
#
|
| 73 |
if not out:
|
| 74 |
out = qa_model(prompt, max_new_tokens=200)[0]["generated_text"].strip()
|
| 75 |
if not out:
|
| 76 |
raise ValueError("Модель дважды вернула пустой ответ.")
|
| 77 |
|
| 78 |
-
#
|
| 79 |
try:
|
| 80 |
json_str = out[out.index("{"): out.rindex("}") + 1]
|
| 81 |
except Exception:
|
| 82 |
-
# fallback
|
| 83 |
-
q = re.search(r'"?question"?\s*[:=]\s*[\'"](.+?)[\'"]', out
|
| 84 |
-
c = re.search(r'"?correct"?\s*[:=]\s*[\'"](.+?)[\'"]', out
|
| 85 |
-
w = re.search(r'"?wrong"?\s*[:=]\s*[\'"](.+?)[\'"]', out
|
| 86 |
-
|
| 87 |
if q and c and w:
|
| 88 |
json_str = json.dumps({
|
| 89 |
"question": q.group(1),
|
|
@@ -93,7 +91,6 @@ def generate_quiz(text: str):
|
|
| 93 |
else:
|
| 94 |
raise ValueError(f"Модель вывела неподходящий формат:\n{out}")
|
| 95 |
|
| 96 |
-
# чистка JSON
|
| 97 |
json_str = json_str.replace("\n", "")
|
| 98 |
|
| 99 |
try:
|
|
@@ -106,7 +103,7 @@ def generate_quiz(text: str):
|
|
| 106 |
wrong = data.get("wrong", "").strip()
|
| 107 |
|
| 108 |
if not (question and correct and wrong):
|
| 109 |
-
raise ValueError(
|
| 110 |
|
| 111 |
options = [correct, wrong]
|
| 112 |
random.shuffle(options)
|
|
@@ -115,7 +112,7 @@ def generate_quiz(text: str):
|
|
| 115 |
|
| 116 |
|
| 117 |
# =========================
|
| 118 |
-
# Синтез
|
| 119 |
# =========================
|
| 120 |
|
| 121 |
def synthesize_audio(text_ru: str):
|
|
@@ -128,7 +125,7 @@ def synthesize_audio(text_ru: str):
|
|
| 128 |
|
| 129 |
waveform = output.waveform.squeeze().cpu().numpy()
|
| 130 |
audio = (waveform * 32767).astype("int16")
|
| 131 |
-
sr = getattr(tts_model.config,
|
| 132 |
|
| 133 |
tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
|
| 134 |
wavfile.write(tmp.name, sr, audio)
|
|
@@ -137,7 +134,7 @@ def synthesize_audio(text_ru: str):
|
|
| 137 |
|
| 138 |
|
| 139 |
# =========================
|
| 140 |
-
# Talking Head
|
| 141 |
# =========================
|
| 142 |
|
| 143 |
def make_talking_head(image_path: str, audio_path: str):
|
|
@@ -162,7 +159,7 @@ def start_lesson(image: Image.Image, text: str, state):
|
|
| 162 |
if image is None:
|
| 163 |
return None, "Загрузите фото", [], state
|
| 164 |
if not text:
|
| 165 |
-
return None, "Введите текст
|
| 166 |
if len(text) > 500:
|
| 167 |
return None, "Текст слишком длинный", [], state
|
| 168 |
|
|
@@ -194,15 +191,15 @@ def start_lesson(image: Image.Image, text: str, state):
|
|
| 194 |
|
| 195 |
|
| 196 |
# =========================
|
| 197 |
-
# Шаг 2 — реакция
|
| 198 |
# =========================
|
| 199 |
|
| 200 |
def answer_selected(selected_option: str, state):
|
| 201 |
if not state:
|
| 202 |
return None, "Ошибка: урок не запущен"
|
| 203 |
|
| 204 |
-
correct = state
|
| 205 |
-
image_path = state
|
| 206 |
|
| 207 |
if selected_option == correct:
|
| 208 |
reply_ru = "Молодец!"
|
|
@@ -218,22 +215,16 @@ def answer_selected(selected_option: str, state):
|
|
| 218 |
|
| 219 |
|
| 220 |
# =========================
|
| 221 |
-
#
|
| 222 |
# =========================
|
| 223 |
|
| 224 |
-
title = "🎓 Интерактивный бейне-лектор"
|
| 225 |
-
description = (
|
| 226 |
-
"Загрузите фото + текст лекции (рус.). Система задаст вопрос и предложит варианты.\n"
|
| 227 |
-
"Ответ — и лектор отреагирует (қазақша)."
|
| 228 |
-
)
|
| 229 |
-
|
| 230 |
with gr.Blocks() as demo:
|
| 231 |
-
gr.Markdown(
|
| 232 |
|
| 233 |
with gr.Row():
|
| 234 |
with gr.Column():
|
| 235 |
inp_image = gr.Image(type="pil", label="Фото лектора")
|
| 236 |
-
inp_text = gr.Textbox(lines=4, label="Текст лекции (
|
| 237 |
btn_start = gr.Button("Запустить урок")
|
| 238 |
|
| 239 |
with gr.Column():
|
|
@@ -248,8 +239,8 @@ with gr.Blocks() as demo:
|
|
| 248 |
|
| 249 |
btn_start.click(
|
| 250 |
start_lesson,
|
| 251 |
-
|
| 252 |
-
|
| 253 |
)
|
| 254 |
|
| 255 |
btn_opt1.click(answer_selected, [btn_opt1, state], [out_react, out_status])
|
|
|
|
| 29 |
tts_model = VitsModel.from_pretrained("facebook/mms-tts-kaz").to(device)
|
| 30 |
tts_tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-kaz")
|
| 31 |
|
| 32 |
+
# Перевод ru -> kk
|
| 33 |
translator = pipeline(
|
| 34 |
"translation",
|
| 35 |
model="facebook/nllb-200-distilled-600M",
|
| 36 |
device=0 if device == "cuda" else -1
|
| 37 |
)
|
| 38 |
|
| 39 |
+
# Генерация учебных вопросов (стабильная CPU-модель)
|
| 40 |
qa_model = pipeline(
|
| 41 |
"text2text-generation",
|
| 42 |
+
model="t5-base", # <-- ВАЖНО: существующая стабильная модель!
|
| 43 |
device=0 if device == "cuda" else -1
|
| 44 |
)
|
| 45 |
|
|
|
|
| 55 |
|
| 56 |
def generate_quiz(text: str):
|
| 57 |
prompt = (
|
| 58 |
+
"Сгенерируй учебный вопрос по тексту и дай один правильный и один неправильный ответ. "
|
| 59 |
+
"Верни ТОЛЬКО JSON без комментариев:\n"
|
| 60 |
"{\n"
|
| 61 |
" \"question\": \"...\",\n"
|
| 62 |
" \"correct\": \"...\",\n"
|
| 63 |
" \"wrong\": \"...\"\n"
|
| 64 |
"}\n"
|
|
|
|
| 65 |
f"TEXT: {text}"
|
| 66 |
)
|
| 67 |
|
| 68 |
+
# 1. Генерация
|
| 69 |
out = qa_model(prompt, max_new_tokens=200)[0]["generated_text"].strip()
|
| 70 |
|
| 71 |
+
# 2. Повторная попытка при пустом выводе
|
| 72 |
if not out:
|
| 73 |
out = qa_model(prompt, max_new_tokens=200)[0]["generated_text"].strip()
|
| 74 |
if not out:
|
| 75 |
raise ValueError("Модель дважды вернула пустой ответ.")
|
| 76 |
|
| 77 |
+
# 3. Извлечение JSON
|
| 78 |
try:
|
| 79 |
json_str = out[out.index("{"): out.rindex("}") + 1]
|
| 80 |
except Exception:
|
| 81 |
+
# fallback
|
| 82 |
+
q = re.search(r'"?question"?\s*[:=]\s*[\'"](.+?)[\'"]', out)
|
| 83 |
+
c = re.search(r'"?correct"?\s*[:=]\s*[\'"](.+?)[\'"]', out)
|
| 84 |
+
w = re.search(r'"?wrong"?\s*[:=]\s*[\'"](.+?)[\'"]', out)
|
|
|
|
| 85 |
if q and c and w:
|
| 86 |
json_str = json.dumps({
|
| 87 |
"question": q.group(1),
|
|
|
|
| 91 |
else:
|
| 92 |
raise ValueError(f"Модель вывела неподходящий формат:\n{out}")
|
| 93 |
|
|
|
|
| 94 |
json_str = json_str.replace("\n", "")
|
| 95 |
|
| 96 |
try:
|
|
|
|
| 103 |
wrong = data.get("wrong", "").strip()
|
| 104 |
|
| 105 |
if not (question and correct and wrong):
|
| 106 |
+
raise ValueError("JSON не содержит нужных полей")
|
| 107 |
|
| 108 |
options = [correct, wrong]
|
| 109 |
random.shuffle(options)
|
|
|
|
| 112 |
|
| 113 |
|
| 114 |
# =========================
|
| 115 |
+
# Синтез речи
|
| 116 |
# =========================
|
| 117 |
|
| 118 |
def synthesize_audio(text_ru: str):
|
|
|
|
| 125 |
|
| 126 |
waveform = output.waveform.squeeze().cpu().numpy()
|
| 127 |
audio = (waveform * 32767).astype("int16")
|
| 128 |
+
sr = getattr(tts_model.config, "sampling_rate", 22050)
|
| 129 |
|
| 130 |
tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
|
| 131 |
wavfile.write(tmp.name, sr, audio)
|
|
|
|
| 134 |
|
| 135 |
|
| 136 |
# =========================
|
| 137 |
+
# Talking Head
|
| 138 |
# =========================
|
| 139 |
|
| 140 |
def make_talking_head(image_path: str, audio_path: str):
|
|
|
|
| 159 |
if image is None:
|
| 160 |
return None, "Загрузите фото", [], state
|
| 161 |
if not text:
|
| 162 |
+
return None, "Введите текст", [], state
|
| 163 |
if len(text) > 500:
|
| 164 |
return None, "Текст слишком длинный", [], state
|
| 165 |
|
|
|
|
| 191 |
|
| 192 |
|
| 193 |
# =========================
|
| 194 |
+
# Шаг 2 — реакция
|
| 195 |
# =========================
|
| 196 |
|
| 197 |
def answer_selected(selected_option: str, state):
|
| 198 |
if not state:
|
| 199 |
return None, "Ошибка: урок не запущен"
|
| 200 |
|
| 201 |
+
correct = state["correct"]
|
| 202 |
+
image_path = state["image_path"]
|
| 203 |
|
| 204 |
if selected_option == correct:
|
| 205 |
reply_ru = "Молодец!"
|
|
|
|
| 215 |
|
| 216 |
|
| 217 |
# =========================
|
| 218 |
+
# Интерфейс
|
| 219 |
# =========================
|
| 220 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 221 |
with gr.Blocks() as demo:
|
| 222 |
+
gr.Markdown("# 🎓 Интерактивный бейне-лектор")
|
| 223 |
|
| 224 |
with gr.Row():
|
| 225 |
with gr.Column():
|
| 226 |
inp_image = gr.Image(type="pil", label="Фото лектора")
|
| 227 |
+
inp_text = gr.Textbox(lines=4, label="Текст лекции (рус.)")
|
| 228 |
btn_start = gr.Button("Запустить урок")
|
| 229 |
|
| 230 |
with gr.Column():
|
|
|
|
| 239 |
|
| 240 |
btn_start.click(
|
| 241 |
start_lesson,
|
| 242 |
+
[inp_image, inp_text, state],
|
| 243 |
+
[out_video, out_question, btn_opt1, btn_opt2, state]
|
| 244 |
)
|
| 245 |
|
| 246 |
btn_opt1.click(answer_selected, [btn_opt1, state], [out_react, out_status])
|