Spaces:
Sleeping
Sleeping
GitHub Actions commited on
Commit ·
f2532fa
0
Parent(s):
deploy from GitHub 2026-03-04_03:47:45
Browse files- README.md +31 -0
- app.py +550 -0
- lecture_processor.py +389 -0
- requirements.txt +11 -0
- transcribe.py +71 -0
README.md
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: Lecture Processor
|
| 3 |
+
emoji: "\U0001F393"
|
| 4 |
+
colorFrom: blue
|
| 5 |
+
colorTo: indigo
|
| 6 |
+
sdk: gradio
|
| 7 |
+
sdk_version: 5.15.0
|
| 8 |
+
app_file: app.py
|
| 9 |
+
pinned: false
|
| 10 |
+
license: mit
|
| 11 |
+
hardware: zero-a10g
|
| 12 |
+
---
|
| 13 |
+
|
| 14 |
+
# Lecture Processor
|
| 15 |
+
|
| 16 |
+
Transcribe, summarize, and generate quizzes from lecture recordings using **WhisperX** and a fine-tuned **Gemma 3 4B** model.
|
| 17 |
+
|
| 18 |
+
## How It Works
|
| 19 |
+
|
| 20 |
+
1. Paste a YouTube lecture URL
|
| 21 |
+
2. The pipeline automatically:
|
| 22 |
+
- **Transcribes** speech using WhisperX
|
| 23 |
+
- **Summarizes** the lecture with structured sections (Summary, Key Points, Action Points)
|
| 24 |
+
- **Generates quiz questions** (5 MCQ + 3 short answer)
|
| 25 |
+
|
| 26 |
+
## Tech Stack
|
| 27 |
+
|
| 28 |
+
- **WhisperX** - Speech-to-text transcription
|
| 29 |
+
- **Gemma 3 4B Instruct** - Fine-tuned with QLoRA for lecture summarization and quiz generation
|
| 30 |
+
- **LoRA Adapter** - [noufwithy/gemma-lecture-adapter](https://huggingface.co/noufwithy/gemma-lecture-adapter)
|
| 31 |
+
- **Gradio** - Web interface with ZeroGPU support
|
app.py
ADDED
|
@@ -0,0 +1,550 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import re
|
| 3 |
+
import tempfile
|
| 4 |
+
import time
|
| 5 |
+
import traceback
|
| 6 |
+
|
| 7 |
+
import gradio as gr
|
| 8 |
+
import httpx
|
| 9 |
+
import yt_dlp
|
| 10 |
+
|
| 11 |
+
try:
|
| 12 |
+
import spaces
|
| 13 |
+
except ImportError:
|
| 14 |
+
class spaces:
|
| 15 |
+
@staticmethod
|
| 16 |
+
def GPU(duration=60):
|
| 17 |
+
def decorator(fn):
|
| 18 |
+
return fn
|
| 19 |
+
return decorator
|
| 20 |
+
|
| 21 |
+
PROXY_BASE = os.environ.get("PROXY_BASE", "").rstrip("/")
|
| 22 |
+
PROXY_TOKEN = os.environ.get("PROXY_TOKEN", "")
|
| 23 |
+
|
| 24 |
+
from transcribe import transcribe_audio, unload_model as unload_whisper
|
| 25 |
+
from lecture_processor import summarize_lecture, generate_quiz
|
| 26 |
+
|
| 27 |
+
# LANGUAGES = {
|
| 28 |
+
# "Auto-detect": None,
|
| 29 |
+
# "English": "en",
|
| 30 |
+
# "Korean": "ko",
|
| 31 |
+
# "Japanese": "ja",
|
| 32 |
+
# "Chinese": "zh",
|
| 33 |
+
# "Spanish": "es",
|
| 34 |
+
# "French": "fr",
|
| 35 |
+
# "German": "de",
|
| 36 |
+
# "Italian": "it",
|
| 37 |
+
# "Portuguese": "pt",
|
| 38 |
+
# "Russian": "ru",
|
| 39 |
+
# "Arabic": "ar",
|
| 40 |
+
# "Hindi": "hi",
|
| 41 |
+
# }
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
def get_youtube_video_id(url: str) -> str | None:
|
| 45 |
+
"""Extract video ID from various YouTube URL formats."""
|
| 46 |
+
patterns = [
|
| 47 |
+
r"(?:youtube\.com/watch\?v=|youtu\.be/|youtube\.com/embed/)([a-zA-Z0-9_-]{11})",
|
| 48 |
+
]
|
| 49 |
+
for pattern in patterns:
|
| 50 |
+
match = re.search(pattern, url)
|
| 51 |
+
if match:
|
| 52 |
+
return match.group(1)
|
| 53 |
+
return None
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
def make_embed_html(video_id: str) -> str:
|
| 57 |
+
return f'<iframe width="100%" height="400" src="https://www.youtube.com/embed/{video_id}" frameborder="0" allowfullscreen></iframe>'
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
def download_youtube_audio(url: str) -> str:
|
| 61 |
+
"""Download audio from YouTube URL, returns path to wav file."""
|
| 62 |
+
tmp_dir = tempfile.mkdtemp()
|
| 63 |
+
output_path = f"{tmp_dir}/audio.wav"
|
| 64 |
+
ydl_opts = {
|
| 65 |
+
"format": "bestaudio/best",
|
| 66 |
+
"postprocessors": [{
|
| 67 |
+
"key": "FFmpegExtractAudio",
|
| 68 |
+
"preferredcodec": "wav",
|
| 69 |
+
}],
|
| 70 |
+
"outtmpl": f"{tmp_dir}/audio",
|
| 71 |
+
"quiet": True,
|
| 72 |
+
"no_warnings": True,
|
| 73 |
+
}
|
| 74 |
+
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
| 75 |
+
ydl.download([url])
|
| 76 |
+
return output_path
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
LANGUAGES = {
|
| 80 |
+
"English": "en",
|
| 81 |
+
}
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
def make_status_html(step: int = 0, timing: str = "", error: str = "") -> str:
|
| 85 |
+
"""Step progress indicator. Steps: 0=idle, 1=download, 2=transcribe, 3=summarize, 4=quiz, 5=done."""
|
| 86 |
+
if error:
|
| 87 |
+
return f'<div class="status-bar error">{error}</div>'
|
| 88 |
+
if step == 0:
|
| 89 |
+
return ""
|
| 90 |
+
|
| 91 |
+
labels = ["Download", "Transcribe", "Summarize", "Quiz"]
|
| 92 |
+
items = []
|
| 93 |
+
for i, label in enumerate(labels):
|
| 94 |
+
s = i + 1
|
| 95 |
+
if s < step or step == 5:
|
| 96 |
+
cls, icon = "done", "✓"
|
| 97 |
+
elif s == step:
|
| 98 |
+
cls, icon = "active", "↻"
|
| 99 |
+
else:
|
| 100 |
+
cls, icon = "pending", str(s)
|
| 101 |
+
items.append(
|
| 102 |
+
f'<div class="step {cls}"><span class="num">{icon}</span>{label}</div>'
|
| 103 |
+
)
|
| 104 |
+
|
| 105 |
+
connector = '<div class="conn"></div>'
|
| 106 |
+
steps_html = connector.join(items)
|
| 107 |
+
timing_html = f'<div class="timing">{timing}</div>' if timing else ""
|
| 108 |
+
|
| 109 |
+
return f'<div class="status-bar"><div class="steps">{steps_html}</div>{timing_html}</div>'
|
| 110 |
+
|
| 111 |
+
|
| 112 |
+
@spaces.GPU(duration=120)
|
| 113 |
+
def _run_pipeline(audio_path: str, language: str):
|
| 114 |
+
"""Pipeline that yields (transcript, summary, quiz, step, timing) progressively."""
|
| 115 |
+
lang_code = LANGUAGES.get(language)
|
| 116 |
+
timings = {}
|
| 117 |
+
|
| 118 |
+
gr.Info("Transcribing audio with WhisperX...")
|
| 119 |
+
try:
|
| 120 |
+
t0 = time.time()
|
| 121 |
+
raw_text = transcribe_audio(audio_path, language=lang_code)
|
| 122 |
+
timings["Transcription"] = time.time() - t0
|
| 123 |
+
except Exception as e:
|
| 124 |
+
yield f"[Transcription error] {e}", "", "", 0, ""
|
| 125 |
+
return
|
| 126 |
+
|
| 127 |
+
if not raw_text:
|
| 128 |
+
yield "(no speech detected)", "", "", 0, ""
|
| 129 |
+
return
|
| 130 |
+
|
| 131 |
+
timing_str = " | ".join(f"{k}: {v:.1f}s" for k, v in timings.items())
|
| 132 |
+
yield raw_text, "", "", 3, timing_str
|
| 133 |
+
|
| 134 |
+
unload_whisper()
|
| 135 |
+
|
| 136 |
+
gr.Info("Generating summary with Gemma...")
|
| 137 |
+
try:
|
| 138 |
+
t0 = time.time()
|
| 139 |
+
summary = summarize_lecture(raw_text)
|
| 140 |
+
timings["Summarization"] = time.time() - t0
|
| 141 |
+
except Exception as e:
|
| 142 |
+
print(f"[ERROR] Summarization failed: {e}")
|
| 143 |
+
traceback.print_exc()
|
| 144 |
+
summary = f"[Summarization error] {e}"
|
| 145 |
+
|
| 146 |
+
timing_str = " | ".join(f"{k}: {v:.1f}s" for k, v in timings.items())
|
| 147 |
+
yield raw_text, summary, "", 4, timing_str
|
| 148 |
+
|
| 149 |
+
gr.Info("Generating quiz with Gemma...")
|
| 150 |
+
try:
|
| 151 |
+
t0 = time.time()
|
| 152 |
+
quiz = generate_quiz(raw_text)
|
| 153 |
+
timings["Quiz Generation"] = time.time() - t0
|
| 154 |
+
except Exception as e:
|
| 155 |
+
print(f"[ERROR] Quiz generation failed: {e}")
|
| 156 |
+
traceback.print_exc()
|
| 157 |
+
quiz = f"[Quiz generation error] {e}"
|
| 158 |
+
|
| 159 |
+
timing_str = " | ".join(f"{k}: {v:.1f}s" for k, v in timings.items())
|
| 160 |
+
total = sum(timings.values())
|
| 161 |
+
timing_str += f" | Total: {total:.1f}s"
|
| 162 |
+
|
| 163 |
+
yield raw_text, summary, quiz, 5, timing_str
|
| 164 |
+
|
| 165 |
+
|
| 166 |
+
def fetch_audio_from_proxy(url: str) -> str:
|
| 167 |
+
"""Request audio extraction from proxy, save to tmp file, return path."""
|
| 168 |
+
headers = {"x-proxy-token": PROXY_TOKEN} if PROXY_TOKEN else {}
|
| 169 |
+
with httpx.stream(
|
| 170 |
+
"POST",
|
| 171 |
+
f"{PROXY_BASE}/extract",
|
| 172 |
+
json={"url": url, "audio_format": "best"},
|
| 173 |
+
headers=headers,
|
| 174 |
+
timeout=600,
|
| 175 |
+
) as resp:
|
| 176 |
+
resp.raise_for_status()
|
| 177 |
+
tmp_dir = tempfile.mkdtemp()
|
| 178 |
+
audio_path = f"{tmp_dir}/audio.wav"
|
| 179 |
+
with open(audio_path, "wb") as f:
|
| 180 |
+
for chunk in resp.iter_bytes(chunk_size=8192):
|
| 181 |
+
f.write(chunk)
|
| 182 |
+
return audio_path
|
| 183 |
+
|
| 184 |
+
|
| 185 |
+
def process_youtube(url: str, language: str):
|
| 186 |
+
"""Yields (embed, transcript, summary, quiz, status_html) progressively."""
|
| 187 |
+
if not url or not url.strip():
|
| 188 |
+
yield "", "", "", "", ""
|
| 189 |
+
return
|
| 190 |
+
|
| 191 |
+
url = url.strip()
|
| 192 |
+
|
| 193 |
+
video_id = get_youtube_video_id(url)
|
| 194 |
+
if not video_id:
|
| 195 |
+
yield "", "", "", "", make_status_html(error="Please enter a valid YouTube URL")
|
| 196 |
+
return
|
| 197 |
+
|
| 198 |
+
embed_html = make_embed_html(video_id)
|
| 199 |
+
yield embed_html, "", "", "", make_status_html(1)
|
| 200 |
+
|
| 201 |
+
try:
|
| 202 |
+
t0 = time.time()
|
| 203 |
+
if PROXY_BASE:
|
| 204 |
+
audio_path = fetch_audio_from_proxy(url)
|
| 205 |
+
else:
|
| 206 |
+
gr.Info("Downloading audio from YouTube...")
|
| 207 |
+
audio_path = download_youtube_audio(url)
|
| 208 |
+
dl_time = time.time() - t0
|
| 209 |
+
except Exception as e:
|
| 210 |
+
yield embed_html, "", "", "", make_status_html(error=f"Download failed: {e}")
|
| 211 |
+
return
|
| 212 |
+
|
| 213 |
+
yield embed_html, "", "", "", make_status_html(2, f"Download: {dl_time:.1f}s")
|
| 214 |
+
|
| 215 |
+
for raw_text, summary, quiz, step, timing_str in _run_pipeline(audio_path, language):
|
| 216 |
+
full_timing = f"Download: {dl_time:.1f}s | {timing_str}" if timing_str else ""
|
| 217 |
+
yield embed_html, raw_text, summary, quiz, make_status_html(step, full_timing)
|
| 218 |
+
|
| 219 |
+
|
| 220 |
+
EXAMPLES = {
|
| 221 |
+
"MIT OpenCourseWare": "https://www.youtube.com/watch?v=7Pq-S557XQU",
|
| 222 |
+
"Stanford CS229": "https://www.youtube.com/watch?v=jGwO_UgTS7I",
|
| 223 |
+
}
|
| 224 |
+
|
| 225 |
+
# ---------------------------------------------------------------------------
|
| 226 |
+
# ICL Gradio Theme
|
| 227 |
+
# ---------------------------------------------------------------------------
|
| 228 |
+
_icl_blue = gr.themes.Color(
|
| 229 |
+
c50="#F0F7FC",
|
| 230 |
+
c100="#D4EFFC",
|
| 231 |
+
c200="#A8DFFA",
|
| 232 |
+
c300="#5CC4F0",
|
| 233 |
+
c400="#00ACD7",
|
| 234 |
+
c500="#0091D4",
|
| 235 |
+
c600="#003E74",
|
| 236 |
+
c700="#002147",
|
| 237 |
+
c800="#001A38",
|
| 238 |
+
c900="#001029",
|
| 239 |
+
c950="#000A1A",
|
| 240 |
+
name="icl-blue",
|
| 241 |
+
)
|
| 242 |
+
|
| 243 |
+
_icl_tangerine = gr.themes.Color(
|
| 244 |
+
c50="#FFF5EB",
|
| 245 |
+
c100="#FFE6CC",
|
| 246 |
+
c200="#FFCC99",
|
| 247 |
+
c300="#FFB366",
|
| 248 |
+
c400="#FF9933",
|
| 249 |
+
c500="#EC7300",
|
| 250 |
+
c600="#CC6300",
|
| 251 |
+
c700="#A35000",
|
| 252 |
+
c800="#7A3C00",
|
| 253 |
+
c900="#522800",
|
| 254 |
+
c950="#331900",
|
| 255 |
+
name="icl-tangerine",
|
| 256 |
+
)
|
| 257 |
+
|
| 258 |
+
_icl_grey = gr.themes.Color(
|
| 259 |
+
c50="#F7F8F8",
|
| 260 |
+
c100="#EBEEEE",
|
| 261 |
+
c200="#D5D9D9",
|
| 262 |
+
c300="#B8BCBC",
|
| 263 |
+
c400="#9D9D9D",
|
| 264 |
+
c500="#7A7A7A",
|
| 265 |
+
c600="#5C5C5C",
|
| 266 |
+
c700="#4A4A4A",
|
| 267 |
+
c800="#373A36",
|
| 268 |
+
c900="#2A2D2A",
|
| 269 |
+
c950="#1A1C1A",
|
| 270 |
+
name="icl-grey",
|
| 271 |
+
)
|
| 272 |
+
|
| 273 |
+
ICL_THEME = gr.themes.Base(
|
| 274 |
+
primary_hue=_icl_blue,
|
| 275 |
+
secondary_hue=_icl_tangerine,
|
| 276 |
+
neutral_hue=_icl_grey,
|
| 277 |
+
font=[gr.themes.GoogleFont("Source Sans Pro"), "Arial", "sans-serif"],
|
| 278 |
+
font_mono=[gr.themes.GoogleFont("Source Code Pro"), "monospace"],
|
| 279 |
+
).set(
|
| 280 |
+
# Primary buttons – Navy background
|
| 281 |
+
button_primary_background_fill="#002147",
|
| 282 |
+
button_primary_background_fill_dark="#003E74",
|
| 283 |
+
button_primary_background_fill_hover="#003E74",
|
| 284 |
+
button_primary_background_fill_hover_dark="#0091D4",
|
| 285 |
+
button_primary_border_color="#002147",
|
| 286 |
+
button_primary_border_color_dark="#003E74",
|
| 287 |
+
button_primary_border_color_hover="#003E74",
|
| 288 |
+
button_primary_text_color="white",
|
| 289 |
+
button_primary_text_color_dark="white",
|
| 290 |
+
# Secondary buttons – white bg, blue border/text
|
| 291 |
+
button_secondary_background_fill="white",
|
| 292 |
+
button_secondary_background_fill_dark="#1A1C1A",
|
| 293 |
+
button_secondary_background_fill_hover="#D4EFFC",
|
| 294 |
+
button_secondary_background_fill_hover_dark="#001A38",
|
| 295 |
+
button_secondary_border_color="#003E74",
|
| 296 |
+
button_secondary_border_color_dark="#0091D4",
|
| 297 |
+
button_secondary_border_color_hover="#002147",
|
| 298 |
+
button_secondary_text_color="#003E74",
|
| 299 |
+
button_secondary_text_color_dark="#D4EFFC",
|
| 300 |
+
button_secondary_text_color_hover="#002147",
|
| 301 |
+
# Focus & loader
|
| 302 |
+
input_border_color_focus="#00ACD7",
|
| 303 |
+
input_border_color_focus_dark="#00ACD7",
|
| 304 |
+
loader_color="#003E74",
|
| 305 |
+
loader_color_dark="#0091D4",
|
| 306 |
+
)
|
| 307 |
+
|
| 308 |
+
# ---------------------------------------------------------------------------
|
| 309 |
+
# CSS – custom properties + minimal overrides
|
| 310 |
+
# ---------------------------------------------------------------------------
|
| 311 |
+
CSS = """
|
| 312 |
+
:root {
|
| 313 |
+
--icl-navy: #002147;
|
| 314 |
+
--icl-blue: #003E74;
|
| 315 |
+
--icl-process-blue: #0091D4;
|
| 316 |
+
--icl-pool: #00ACD7;
|
| 317 |
+
--icl-light-blue: #D4EFFC;
|
| 318 |
+
--icl-tangerine: #EC7300;
|
| 319 |
+
--icl-violet: #653098;
|
| 320 |
+
--icl-green: #02893B;
|
| 321 |
+
--icl-lime: #BBCE00;
|
| 322 |
+
--icl-red: #B22234;
|
| 323 |
+
--icl-grey: #EBEEEE;
|
| 324 |
+
--icl-cool-grey: #9D9D9D;
|
| 325 |
+
--icl-dark-grey: #373A36;
|
| 326 |
+
--sp-1: 4px; --sp-2: 8px; --sp-3: 12px; --sp-4: 16px;
|
| 327 |
+
--sp-5: 24px; --sp-6: 32px; --sp-7: 48px; --sp-8: 64px;
|
| 328 |
+
}
|
| 329 |
+
|
| 330 |
+
/* Header brand bar */
|
| 331 |
+
.icl-header {
|
| 332 |
+
text-align: center;
|
| 333 |
+
padding: var(--sp-5) var(--sp-4);
|
| 334 |
+
border-bottom: 3px solid var(--icl-navy);
|
| 335 |
+
margin-bottom: var(--sp-5);
|
| 336 |
+
}
|
| 337 |
+
.icl-header img { height: 60px; margin-bottom: var(--sp-2); }
|
| 338 |
+
.dark .icl-header { border-bottom-color: var(--icl-pool); }
|
| 339 |
+
|
| 340 |
+
/* Title & subtitle */
|
| 341 |
+
.main-title { text-align: center; color: var(--icl-navy); margin-bottom: 0 !important; }
|
| 342 |
+
.subtitle { text-align: center; color: var(--icl-blue); margin-top: 0 !important; }
|
| 343 |
+
.dark .main-title { color: var(--icl-light-blue); }
|
| 344 |
+
.dark .subtitle { color: var(--icl-pool); }
|
| 345 |
+
|
| 346 |
+
/* Tab selected override (Gradio tabs need !important) */
|
| 347 |
+
.tabs .tab-nav button.selected {
|
| 348 |
+
border-color: var(--icl-navy) !important;
|
| 349 |
+
color: var(--icl-navy) !important;
|
| 350 |
+
}
|
| 351 |
+
.dark .tabs .tab-nav button.selected {
|
| 352 |
+
border-color: var(--icl-pool) !important;
|
| 353 |
+
color: var(--icl-pool) !important;
|
| 354 |
+
}
|
| 355 |
+
|
| 356 |
+
/* Focus & active states */
|
| 357 |
+
button:focus-visible, input:focus-visible, textarea:focus-visible, select:focus-visible {
|
| 358 |
+
outline: 3px solid var(--icl-pool);
|
| 359 |
+
outline-offset: 2px;
|
| 360 |
+
}
|
| 361 |
+
button:active { transform: scale(0.97); }
|
| 362 |
+
|
| 363 |
+
/* Example buttons – compact inside bordered card */
|
| 364 |
+
.examples-row {
|
| 365 |
+
justify-content: center !important;
|
| 366 |
+
gap: var(--sp-2);
|
| 367 |
+
border: 1px solid var(--icl-light-blue);
|
| 368 |
+
border-radius: 8px;
|
| 369 |
+
padding: var(--sp-3) var(--sp-4);
|
| 370 |
+
background: var(--icl-grey);
|
| 371 |
+
}
|
| 372 |
+
.examples-row > * { flex: 0 0 auto !important; max-width: fit-content !important; }
|
| 373 |
+
.dark .examples-row { background: #1f2937; border-color: var(--icl-blue); }
|
| 374 |
+
|
| 375 |
+
/* Step progress indicator */
|
| 376 |
+
.status-bar {
|
| 377 |
+
padding: var(--sp-3) var(--sp-4);
|
| 378 |
+
border-radius: 8px;
|
| 379 |
+
background: var(--icl-grey);
|
| 380 |
+
border: 1px solid var(--icl-light-blue);
|
| 381 |
+
}
|
| 382 |
+
.status-bar.error {
|
| 383 |
+
background: #f8d7da;
|
| 384 |
+
border-color: #f5c6cb;
|
| 385 |
+
color: #721c24;
|
| 386 |
+
text-align: center;
|
| 387 |
+
font-weight: 500;
|
| 388 |
+
}
|
| 389 |
+
.status-bar .steps {
|
| 390 |
+
display: flex;
|
| 391 |
+
align-items: center;
|
| 392 |
+
justify-content: center;
|
| 393 |
+
gap: 0;
|
| 394 |
+
}
|
| 395 |
+
.status-bar .step {
|
| 396 |
+
display: flex;
|
| 397 |
+
align-items: center;
|
| 398 |
+
gap: 6px;
|
| 399 |
+
padding: 6px 14px;
|
| 400 |
+
border-radius: 20px;
|
| 401 |
+
font-size: 14px;
|
| 402 |
+
font-weight: 500;
|
| 403 |
+
background: var(--icl-light-blue);
|
| 404 |
+
color: var(--icl-blue);
|
| 405 |
+
white-space: nowrap;
|
| 406 |
+
transition: all 0.3s ease;
|
| 407 |
+
}
|
| 408 |
+
.status-bar .step.active {
|
| 409 |
+
background: var(--icl-blue);
|
| 410 |
+
color: white;
|
| 411 |
+
animation: pulse 1.5s ease-in-out infinite;
|
| 412 |
+
}
|
| 413 |
+
.status-bar .step.done {
|
| 414 |
+
background: var(--icl-navy);
|
| 415 |
+
color: white;
|
| 416 |
+
}
|
| 417 |
+
.status-bar .step .num {
|
| 418 |
+
font-weight: 700;
|
| 419 |
+
min-width: 18px;
|
| 420 |
+
text-align: center;
|
| 421 |
+
}
|
| 422 |
+
.status-bar .conn {
|
| 423 |
+
width: 24px;
|
| 424 |
+
height: 2px;
|
| 425 |
+
background: var(--icl-light-blue);
|
| 426 |
+
flex-shrink: 0;
|
| 427 |
+
}
|
| 428 |
+
.status-bar .timing {
|
| 429 |
+
text-align: center;
|
| 430 |
+
margin-top: var(--sp-2);
|
| 431 |
+
font-size: 13px;
|
| 432 |
+
color: var(--icl-blue);
|
| 433 |
+
}
|
| 434 |
+
@keyframes pulse {
|
| 435 |
+
0%, 100% { opacity: 1; }
|
| 436 |
+
50% { opacity: 0.6; }
|
| 437 |
+
}
|
| 438 |
+
|
| 439 |
+
/* Dark mode – status bar */
|
| 440 |
+
.dark .status-bar { background: #1f2937; border-color: var(--icl-blue); }
|
| 441 |
+
.dark .status-bar.error { background: #7f1d1d; border-color: #991b1b; color: #fca5a5; }
|
| 442 |
+
.dark .status-bar .step { background: var(--icl-blue); color: var(--icl-light-blue); }
|
| 443 |
+
.dark .status-bar .step.active { background: var(--icl-tangerine); color: white; }
|
| 444 |
+
.dark .status-bar .step.done { background: var(--icl-navy); color: var(--icl-light-blue); }
|
| 445 |
+
.dark .status-bar .conn { background: var(--icl-blue); }
|
| 446 |
+
.dark .status-bar .timing { color: var(--icl-light-blue); }
|
| 447 |
+
|
| 448 |
+
/* Footer */
|
| 449 |
+
.footer {
|
| 450 |
+
text-align: center;
|
| 451 |
+
color: var(--icl-dark-grey);
|
| 452 |
+
font-size: 0.85em;
|
| 453 |
+
margin-top: var(--sp-4);
|
| 454 |
+
}
|
| 455 |
+
.dark .footer { color: var(--icl-cool-grey); }
|
| 456 |
+
|
| 457 |
+
/* Reduced motion */
|
| 458 |
+
@media (prefers-reduced-motion: reduce) {
|
| 459 |
+
*, *::before, *::after {
|
| 460 |
+
animation-duration: 0.01ms !important;
|
| 461 |
+
animation-iteration-count: 1 !important;
|
| 462 |
+
transition-duration: 0.01ms !important;
|
| 463 |
+
}
|
| 464 |
+
}
|
| 465 |
+
|
| 466 |
+
/* Responsive */
|
| 467 |
+
@media (max-width: 768px) {
|
| 468 |
+
.icl-header img { height: 40px; }
|
| 469 |
+
.status-bar .step { padding: 4px 10px; font-size: 12px; }
|
| 470 |
+
.status-bar .conn { width: 12px; }
|
| 471 |
+
}
|
| 472 |
+
@media (max-width: 480px) {
|
| 473 |
+
.icl-header img { height: 32px; }
|
| 474 |
+
.icl-header { padding: var(--sp-3) var(--sp-2); }
|
| 475 |
+
}
|
| 476 |
+
"""
|
| 477 |
+
|
| 478 |
+
with gr.Blocks(
|
| 479 |
+
title="Lecture Processor",
|
| 480 |
+
css=CSS,
|
| 481 |
+
theme=ICL_THEME,
|
| 482 |
+
) as demo:
|
| 483 |
+
gr.HTML("""
|
| 484 |
+
<div class="icl-header">
|
| 485 |
+
<img src="https://upload.wikimedia.org/wikipedia/commons/5/51/Imperial_College_London_crest.svg"
|
| 486 |
+
alt="ICL Crest"
|
| 487 |
+
onerror="this.style.display='none';">
|
| 488 |
+
</div>
|
| 489 |
+
""")
|
| 490 |
+
gr.Markdown("# Lecture Processor", elem_classes="main-title")
|
| 491 |
+
gr.Markdown(
|
| 492 |
+
"Transcribe, summarize, and generate quizzes from lectures",
|
| 493 |
+
elem_classes="subtitle",
|
| 494 |
+
)
|
| 495 |
+
|
| 496 |
+
with gr.Row():
|
| 497 |
+
youtube_input = gr.Textbox(
|
| 498 |
+
label="🔗 YouTube URL",
|
| 499 |
+
placeholder="https://www.youtube.com/watch?v=...",
|
| 500 |
+
scale=3,
|
| 501 |
+
)
|
| 502 |
+
language_dropdown = gr.Dropdown(
|
| 503 |
+
choices=list(LANGUAGES.keys()),
|
| 504 |
+
value="English",
|
| 505 |
+
label="Language",
|
| 506 |
+
scale=1,
|
| 507 |
+
)
|
| 508 |
+
|
| 509 |
+
youtube_btn = gr.Button("▶ Process Lecture", variant="primary", size="lg")
|
| 510 |
+
|
| 511 |
+
gr.Markdown("**Examples:**")
|
| 512 |
+
with gr.Row(elem_classes="examples-row"):
|
| 513 |
+
for name, url in EXAMPLES.items():
|
| 514 |
+
gr.Button(name, variant="secondary", size="sm", min_width=160).click(
|
| 515 |
+
fn=lambda u=url: u, outputs=[youtube_input]
|
| 516 |
+
)
|
| 517 |
+
|
| 518 |
+
status_output = gr.HTML()
|
| 519 |
+
video_embed = gr.HTML()
|
| 520 |
+
|
| 521 |
+
with gr.Tabs():
|
| 522 |
+
with gr.TabItem("Transcript"):
|
| 523 |
+
raw_output = gr.Textbox(
|
| 524 |
+
label="Raw Transcription", lines=12
|
| 525 |
+
)
|
| 526 |
+
with gr.TabItem("Summary"):
|
| 527 |
+
summary_output = gr.Textbox(label="Lecture Summary", lines=12)
|
| 528 |
+
with gr.TabItem("Quiz"):
|
| 529 |
+
quiz_output = gr.Textbox(label="Quiz Questions", lines=12)
|
| 530 |
+
|
| 531 |
+
gr.Markdown(
|
| 532 |
+
"Powered by **WhisperX** & **Gemma 3 4B** | Fine-tuned LoRA adapter",
|
| 533 |
+
elem_classes="footer",
|
| 534 |
+
)
|
| 535 |
+
|
| 536 |
+
outputs = [video_embed, raw_output, summary_output, quiz_output, status_output]
|
| 537 |
+
|
| 538 |
+
youtube_btn.click(
|
| 539 |
+
fn=process_youtube,
|
| 540 |
+
inputs=[youtube_input, language_dropdown],
|
| 541 |
+
outputs=outputs,
|
| 542 |
+
)
|
| 543 |
+
youtube_input.submit(
|
| 544 |
+
fn=process_youtube,
|
| 545 |
+
inputs=[youtube_input, language_dropdown],
|
| 546 |
+
outputs=outputs,
|
| 547 |
+
)
|
| 548 |
+
|
| 549 |
+
if __name__ == "__main__":
|
| 550 |
+
demo.launch(server_name="0.0.0.0", share=True)
|
lecture_processor.py
ADDED
|
@@ -0,0 +1,389 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import traceback
|
| 3 |
+
import torch
|
| 4 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 5 |
+
from peft import PeftModel
|
| 6 |
+
|
| 7 |
+
DEFAULT_MODEL = "google/gemma-3-4b-it"
|
| 8 |
+
ADAPTER_PATH = "./gemma-lecture-adapter"
|
| 9 |
+
HUB_ADAPTER_ID = "noufwithy/gemma-lecture-adapter"
|
| 10 |
+
|
| 11 |
+
SUMMARIZE_SYSTEM_PROMPT = """You are a lecture summarization assistant.
|
| 12 |
+
Summarize the following lecture transcription into a comprehensive, structured summary with these sections:
|
| 13 |
+
- **Summary**: A concise overview of what the lecture covered
|
| 14 |
+
- **Key Points**: The main concepts, definitions, and important details covered in the lecture (use bullet points)
|
| 15 |
+
- **Action Points**: Any tasks, assignments, or follow-up actions mentioned by the lecturer
|
| 16 |
+
|
| 17 |
+
Cover ALL topics discussed. Do not omit any major points.
|
| 18 |
+
Output ONLY the summary. No explanations or extra commentary."""
|
| 19 |
+
|
| 20 |
+
# Quiz prompts match the training data format exactly (one question per call)
|
| 21 |
+
MCQ_SYSTEM_PROMPT = """You are an educational quiz generator.
|
| 22 |
+
Based on the following lecture transcription, generate a multiple choice question
|
| 23 |
+
with 4 options labeled A-D and indicate the correct answer.
|
| 24 |
+
|
| 25 |
+
Format:
|
| 26 |
+
Q1. [Question]
|
| 27 |
+
A) [Option]
|
| 28 |
+
B) [Option]
|
| 29 |
+
C) [Option]
|
| 30 |
+
D) [Option]
|
| 31 |
+
Correct Answer: [Letter]
|
| 32 |
+
|
| 33 |
+
Output ONLY the question. No explanations or extra commentary."""
|
| 34 |
+
|
| 35 |
+
SHORT_ANSWER_SYSTEM_PROMPT = """You are an educational quiz generator.
|
| 36 |
+
Based on the following lecture transcription, generate a short answer question
|
| 37 |
+
with the expected answer.
|
| 38 |
+
|
| 39 |
+
Format:
|
| 40 |
+
Q1. [Question]
|
| 41 |
+
Expected Answer: [Brief answer]
|
| 42 |
+
|
| 43 |
+
Output ONLY the question. No explanations or extra commentary."""
|
| 44 |
+
|
| 45 |
+
NUM_MCQ = 5
|
| 46 |
+
NUM_SHORT_ANSWER = 3
|
| 47 |
+
|
| 48 |
+
_model = None
|
| 49 |
+
_tokenizer = None
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
def _load_model(model_id: str = DEFAULT_MODEL, adapter_path: str = ADAPTER_PATH):
|
| 53 |
+
global _model, _tokenizer
|
| 54 |
+
if _model is not None:
|
| 55 |
+
return _model, _tokenizer
|
| 56 |
+
|
| 57 |
+
_tokenizer = AutoTokenizer.from_pretrained(model_id)
|
| 58 |
+
|
| 59 |
+
# Try local adapter first, then HuggingFace Hub, then base model
|
| 60 |
+
adapter_source = adapter_path if os.path.isdir(adapter_path) else HUB_ADAPTER_ID
|
| 61 |
+
|
| 62 |
+
# Load in bfloat16 (bitsandbytes 4-bit/8-bit quantization broken with Gemma 3)
|
| 63 |
+
try:
|
| 64 |
+
print(f"Loading model with LoRA adapter from {adapter_source}...")
|
| 65 |
+
base_model = AutoModelForCausalLM.from_pretrained(
|
| 66 |
+
model_id,
|
| 67 |
+
device_map="auto",
|
| 68 |
+
dtype=torch.bfloat16,
|
| 69 |
+
attn_implementation="eager",
|
| 70 |
+
)
|
| 71 |
+
_model = PeftModel.from_pretrained(base_model, adapter_source)
|
| 72 |
+
_model.eval()
|
| 73 |
+
print("LoRA adapter loaded successfully on bfloat16 base model.")
|
| 74 |
+
except Exception as e:
|
| 75 |
+
print(f"LoRA adapter failed ({e}), falling back to base model...")
|
| 76 |
+
traceback.print_exc()
|
| 77 |
+
_model = AutoModelForCausalLM.from_pretrained(
|
| 78 |
+
model_id, device_map="auto", dtype=torch.bfloat16,
|
| 79 |
+
)
|
| 80 |
+
|
| 81 |
+
return _model, _tokenizer
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
def _generate(messages, max_new_tokens=2048, do_sample=False, temperature=0.7):
|
| 85 |
+
"""Generate text using model.generate() directly."""
|
| 86 |
+
model, tokenizer = _load_model()
|
| 87 |
+
|
| 88 |
+
# Format chat messages into a string, then tokenize
|
| 89 |
+
prompt = tokenizer.apply_chat_template(
|
| 90 |
+
messages, tokenize=False, add_generation_prompt=True
|
| 91 |
+
)
|
| 92 |
+
inputs = tokenizer(prompt, return_tensors="pt", add_special_tokens=False)
|
| 93 |
+
input_ids = inputs["input_ids"].to(model.device)
|
| 94 |
+
attention_mask = inputs["attention_mask"].to(model.device)
|
| 95 |
+
|
| 96 |
+
print(f"[DEBUG] input length: {input_ids.shape[-1]} tokens")
|
| 97 |
+
|
| 98 |
+
with torch.no_grad():
|
| 99 |
+
outputs = model.generate(
|
| 100 |
+
input_ids=input_ids,
|
| 101 |
+
attention_mask=attention_mask,
|
| 102 |
+
max_new_tokens=max_new_tokens,
|
| 103 |
+
do_sample=do_sample,
|
| 104 |
+
temperature=temperature if do_sample else None,
|
| 105 |
+
top_p=0.9 if do_sample else None,
|
| 106 |
+
repetition_penalty=1.3,
|
| 107 |
+
)
|
| 108 |
+
|
| 109 |
+
# Decode only the new tokens (skip the input)
|
| 110 |
+
new_tokens = outputs[0][input_ids.shape[-1]:]
|
| 111 |
+
print(f"[DEBUG] generated {len(new_tokens)} new tokens")
|
| 112 |
+
|
| 113 |
+
response = tokenizer.decode(new_tokens, skip_special_tokens=True)
|
| 114 |
+
return response.strip()
|
| 115 |
+
|
| 116 |
+
|
| 117 |
+
def _is_good_summary(text: str, transcript: str = "") -> bool:
|
| 118 |
+
"""Check if a summary meets minimum quality: long enough, not repetitive, not parroting."""
|
| 119 |
+
if len(text) < 100:
|
| 120 |
+
return False
|
| 121 |
+
|
| 122 |
+
# Check for excessive repetition (same line or sentence repeated 2+ times)
|
| 123 |
+
from collections import Counter
|
| 124 |
+
for chunks in [
|
| 125 |
+
[s.strip() for s in text.split("\n") if s.strip()],
|
| 126 |
+
[s.strip() for s in text.split(".") if s.strip()],
|
| 127 |
+
]:
|
| 128 |
+
if chunks:
|
| 129 |
+
counts = Counter(chunks)
|
| 130 |
+
most_common_count = counts.most_common(1)[0][1]
|
| 131 |
+
if most_common_count >= 2:
|
| 132 |
+
print(f"[QUALITY] Repetitive output detected ({most_common_count} repeats)")
|
| 133 |
+
return False
|
| 134 |
+
|
| 135 |
+
# Check if summary is just parroting the transcript (high word overlap)
|
| 136 |
+
if transcript:
|
| 137 |
+
summary_words = set(text.lower().split())
|
| 138 |
+
transcript_words = set(transcript.lower().split())
|
| 139 |
+
if summary_words and transcript_words:
|
| 140 |
+
overlap = len(summary_words & transcript_words) / len(summary_words)
|
| 141 |
+
if overlap > 0.85:
|
| 142 |
+
print(f"[QUALITY] Summary too similar to transcript ({overlap:.0%} word overlap)")
|
| 143 |
+
return False
|
| 144 |
+
|
| 145 |
+
# Check if summary has enough key points (at least 3 bullet points)
|
| 146 |
+
bullet_count = text.count("- ")
|
| 147 |
+
has_key_points = "key points" in text.lower()
|
| 148 |
+
if has_key_points and bullet_count < 3:
|
| 149 |
+
print(f"[QUALITY] Summary has too few key points ({bullet_count})")
|
| 150 |
+
return False
|
| 151 |
+
|
| 152 |
+
# Check minimum unique content (summary should have substance)
|
| 153 |
+
unique_lines = set(s.strip() for s in text.split("\n") if s.strip() and len(s.strip()) > 10)
|
| 154 |
+
if len(unique_lines) < 5:
|
| 155 |
+
print(f"[QUALITY] Summary too shallow ({len(unique_lines)} unique lines)")
|
| 156 |
+
return False
|
| 157 |
+
|
| 158 |
+
return True
|
| 159 |
+
|
| 160 |
+
|
| 161 |
+
def _generate_with_base_fallback(messages, transcript="", **kwargs):
|
| 162 |
+
"""Generate with adapter first. If output is bad, retry with base model."""
|
| 163 |
+
result = _generate(messages, **kwargs)
|
| 164 |
+
|
| 165 |
+
if _is_good_summary(result, transcript=transcript):
|
| 166 |
+
return result
|
| 167 |
+
|
| 168 |
+
# Adapter output is bad, try base model
|
| 169 |
+
model, _ = _load_model()
|
| 170 |
+
if isinstance(model, PeftModel):
|
| 171 |
+
print("[FALLBACK] Adapter output too short or repetitive, retrying with base model...")
|
| 172 |
+
model.disable_adapter_layers()
|
| 173 |
+
try:
|
| 174 |
+
result = _generate(messages, **kwargs)
|
| 175 |
+
finally:
|
| 176 |
+
model.enable_adapter_layers()
|
| 177 |
+
print(f"[FALLBACK] base model response length: {len(result)}")
|
| 178 |
+
|
| 179 |
+
return result
|
| 180 |
+
|
| 181 |
+
|
| 182 |
+
def _truncate_transcript(transcript: str, max_words: int = 4000) -> str:
|
| 183 |
+
"""Truncate transcript to fit model's effective context (trained on 3072 tokens)."""
|
| 184 |
+
words = transcript.split()
|
| 185 |
+
if len(words) <= max_words:
|
| 186 |
+
return transcript
|
| 187 |
+
print(f"[TRUNCATE] Transcript has {len(words)} words, truncating to {max_words}")
|
| 188 |
+
return " ".join(words[:max_words])
|
| 189 |
+
|
| 190 |
+
|
| 191 |
+
def summarize_lecture(transcript: str, model: str = DEFAULT_MODEL) -> str:
|
| 192 |
+
"""Summarize a lecture transcript using Gemma."""
|
| 193 |
+
if not transcript or not transcript.strip():
|
| 194 |
+
return ""
|
| 195 |
+
|
| 196 |
+
truncated = _truncate_transcript(transcript)
|
| 197 |
+
messages = [
|
| 198 |
+
{"role": "system", "content": SUMMARIZE_SYSTEM_PROMPT},
|
| 199 |
+
{"role": "user", "content": f"Lecture transcription:\n\n{truncated}"},
|
| 200 |
+
]
|
| 201 |
+
# Try adapter first, fall back to base model if quality is bad
|
| 202 |
+
result = _generate_with_base_fallback(messages, transcript=transcript, do_sample=True, temperature=0.3)
|
| 203 |
+
print(f"[DEBUG summarize] response length: {len(result)}")
|
| 204 |
+
return result
|
| 205 |
+
|
| 206 |
+
|
| 207 |
+
def _extract_question_text(result: str) -> str:
|
| 208 |
+
"""Extract just the question text (first line after Q number) for dedup comparison."""
|
| 209 |
+
import re
|
| 210 |
+
match = re.search(r'Q\d+\.\s*(.+)', result)
|
| 211 |
+
return match.group(1).strip().lower() if match else result.strip().lower()
|
| 212 |
+
|
| 213 |
+
|
| 214 |
+
def _is_good_quiz_answer(result: str, transcript: str = "") -> bool:
|
| 215 |
+
"""Check if a generated quiz question is reasonable quality."""
|
| 216 |
+
# Reject if response doesn't match any expected format (no question generated)
|
| 217 |
+
if "Correct Answer:" not in result and "Expected Answer:" not in result:
|
| 218 |
+
print(f"[QUALITY] Response has no valid question format (missing Correct/Expected Answer)")
|
| 219 |
+
return False
|
| 220 |
+
|
| 221 |
+
# Reject if there's no actual question (Q1. pattern)
|
| 222 |
+
if "Q1." not in result:
|
| 223 |
+
print(f"[QUALITY] Response missing Q1. question marker")
|
| 224 |
+
return False
|
| 225 |
+
|
| 226 |
+
# Short answer: reject if expected answer is just a transcript fragment with no real content
|
| 227 |
+
if "Expected Answer:" in result:
|
| 228 |
+
answer = result.split("Expected Answer:")[-1].strip()
|
| 229 |
+
# Reject vague/pointer answers like "right here", "this arrow", "at this point"
|
| 230 |
+
vague_phrases = ["right here", "this arrow", "at this point", "this one", "over here", "right there"]
|
| 231 |
+
if any(phrase in answer.lower() for phrase in vague_phrases):
|
| 232 |
+
print(f"[QUALITY] Short answer too vague: {answer}")
|
| 233 |
+
return False
|
| 234 |
+
if len(answer.split()) < 2:
|
| 235 |
+
print(f"[QUALITY] Short answer too short: {answer}")
|
| 236 |
+
return False
|
| 237 |
+
|
| 238 |
+
# MCQ: reject if it doesn't have 4 options or has duplicate options
|
| 239 |
+
if "Correct Answer:" in result and "Expected Answer:" not in result:
|
| 240 |
+
import re
|
| 241 |
+
for label in ["A)", "B)", "C)", "D)"]:
|
| 242 |
+
if label not in result:
|
| 243 |
+
print(f"[QUALITY] MCQ missing option {label}")
|
| 244 |
+
return False
|
| 245 |
+
# Reject if options are mostly duplicated
|
| 246 |
+
options = re.findall(r'[A-D]\)\s*(.+)', result)
|
| 247 |
+
unique_options = set(opt.strip().lower() for opt in options)
|
| 248 |
+
if len(unique_options) < 3:
|
| 249 |
+
print(f"[QUALITY] MCQ has duplicate options ({len(unique_options)} unique out of {len(options)})")
|
| 250 |
+
return False
|
| 251 |
+
|
| 252 |
+
return True
|
| 253 |
+
|
| 254 |
+
|
| 255 |
+
def _dedup_mcq_options(result: str) -> str:
|
| 256 |
+
"""Remove duplicate MCQ options, keeping unique ones only."""
|
| 257 |
+
import re
|
| 258 |
+
options = re.findall(r'([A-D])\)\s*(.+)', result)
|
| 259 |
+
if len(options) != 4:
|
| 260 |
+
return result
|
| 261 |
+
|
| 262 |
+
seen = {}
|
| 263 |
+
unique = []
|
| 264 |
+
for label, text in options:
|
| 265 |
+
key = text.strip().lower()
|
| 266 |
+
if key not in seen:
|
| 267 |
+
seen[key] = True
|
| 268 |
+
unique.append((label, text.strip()))
|
| 269 |
+
|
| 270 |
+
if len(unique) == len(options):
|
| 271 |
+
return result # no duplicates
|
| 272 |
+
|
| 273 |
+
print(f"[QUALITY] Removed {len(options) - len(unique)} duplicate MCQ option(s)")
|
| 274 |
+
# Rebuild with correct labels
|
| 275 |
+
lines = result.split("\n")
|
| 276 |
+
new_lines = []
|
| 277 |
+
option_idx = 0
|
| 278 |
+
labels = ["A", "B", "C", "D"]
|
| 279 |
+
for line in lines:
|
| 280 |
+
if re.match(r'^[A-D]\)', line):
|
| 281 |
+
if option_idx < len(unique):
|
| 282 |
+
new_lines.append(f"{labels[option_idx]}) {unique[option_idx][1]}")
|
| 283 |
+
option_idx += 1
|
| 284 |
+
else:
|
| 285 |
+
new_lines.append(line)
|
| 286 |
+
|
| 287 |
+
return "\n".join(new_lines)
|
| 288 |
+
|
| 289 |
+
|
| 290 |
+
def _generate_quiz_with_fallback(messages, transcript="", **kwargs):
|
| 291 |
+
"""Generate a quiz question with adapter, fall back to base model if bad."""
|
| 292 |
+
result = _generate(messages, **kwargs)
|
| 293 |
+
|
| 294 |
+
if _is_good_quiz_answer(result, transcript):
|
| 295 |
+
return result
|
| 296 |
+
|
| 297 |
+
model, _ = _load_model()
|
| 298 |
+
if isinstance(model, PeftModel):
|
| 299 |
+
print("[FALLBACK] Quiz answer bad, retrying with base model...")
|
| 300 |
+
model.disable_adapter_layers()
|
| 301 |
+
try:
|
| 302 |
+
result = _generate(messages, **kwargs)
|
| 303 |
+
finally:
|
| 304 |
+
model.enable_adapter_layers()
|
| 305 |
+
|
| 306 |
+
return result
|
| 307 |
+
|
| 308 |
+
|
| 309 |
+
def _normalize_words(text: str) -> set[str]:
|
| 310 |
+
"""Strip punctuation from words for cleaner comparison."""
|
| 311 |
+
import re
|
| 312 |
+
return set(re.sub(r'[^\w\s]', '', word) for word in text.split() if word.strip())
|
| 313 |
+
|
| 314 |
+
|
| 315 |
+
def _is_duplicate(result: str, existing_parts: list[str]) -> bool:
|
| 316 |
+
"""Check if a generated question is too similar to any already generated."""
|
| 317 |
+
new_q = _extract_question_text(result)
|
| 318 |
+
for part in existing_parts:
|
| 319 |
+
old_q = _extract_question_text(part)
|
| 320 |
+
# Check if questions share most of their words (punctuation-stripped)
|
| 321 |
+
new_words = _normalize_words(new_q)
|
| 322 |
+
old_words = _normalize_words(old_q)
|
| 323 |
+
if not new_words or not old_words:
|
| 324 |
+
continue
|
| 325 |
+
overlap = len(new_words & old_words) / min(len(new_words), len(old_words))
|
| 326 |
+
if overlap > 0.7:
|
| 327 |
+
print(f"[QUALITY] Duplicate question detected ({overlap:.0%} word overlap)")
|
| 328 |
+
return True
|
| 329 |
+
return False
|
| 330 |
+
|
| 331 |
+
|
| 332 |
+
def generate_quiz(transcript: str, model: str = DEFAULT_MODEL) -> str:
|
| 333 |
+
"""Generate quiz questions from a lecture transcript using Gemma.
|
| 334 |
+
|
| 335 |
+
Generates questions one at a time to match training format, then combines them.
|
| 336 |
+
Skips duplicate questions automatically.
|
| 337 |
+
"""
|
| 338 |
+
if not transcript or not transcript.strip():
|
| 339 |
+
return ""
|
| 340 |
+
|
| 341 |
+
transcript = _truncate_transcript(transcript)
|
| 342 |
+
parts = []
|
| 343 |
+
max_retries = 2 # extra attempts per question if duplicate
|
| 344 |
+
|
| 345 |
+
# Generate MCQs one at a time (matches training: one MCQ per example)
|
| 346 |
+
for i in range(NUM_MCQ):
|
| 347 |
+
print(f"[DEBUG quiz] generating MCQ {i + 1}/{NUM_MCQ}...")
|
| 348 |
+
messages = [
|
| 349 |
+
{"role": "system", "content": MCQ_SYSTEM_PROMPT},
|
| 350 |
+
{"role": "user", "content": f"Lecture transcription:\n\n{transcript}"},
|
| 351 |
+
]
|
| 352 |
+
good = False
|
| 353 |
+
for attempt in range(1 + max_retries):
|
| 354 |
+
result = _generate_quiz_with_fallback(messages, transcript=transcript, max_new_tokens=256, do_sample=True)
|
| 355 |
+
if _is_good_quiz_answer(result, transcript) and not _is_duplicate(result, parts):
|
| 356 |
+
good = True
|
| 357 |
+
break
|
| 358 |
+
print(f"[DEBUG quiz] MCQ {i + 1} attempt {attempt + 1} was bad or duplicate, retrying...")
|
| 359 |
+
if good:
|
| 360 |
+
result = _dedup_mcq_options(result)
|
| 361 |
+
result = result.replace("Q1.", f"Q{len(parts) + 1}.", 1)
|
| 362 |
+
parts.append(result)
|
| 363 |
+
else:
|
| 364 |
+
print(f"[DEBUG quiz] MCQ {i + 1} dropped (unreliable after {1 + max_retries} attempts)")
|
| 365 |
+
|
| 366 |
+
# Generate short answer questions one at a time
|
| 367 |
+
for i in range(NUM_SHORT_ANSWER):
|
| 368 |
+
q_num = NUM_MCQ + i + 1
|
| 369 |
+
print(f"[DEBUG quiz] generating short answer {i + 1}/{NUM_SHORT_ANSWER}...")
|
| 370 |
+
messages = [
|
| 371 |
+
{"role": "system", "content": SHORT_ANSWER_SYSTEM_PROMPT},
|
| 372 |
+
{"role": "user", "content": f"Lecture transcription:\n\n{transcript}"},
|
| 373 |
+
]
|
| 374 |
+
good = False
|
| 375 |
+
for attempt in range(1 + max_retries):
|
| 376 |
+
result = _generate_quiz_with_fallback(messages, transcript=transcript, max_new_tokens=256, do_sample=True)
|
| 377 |
+
if _is_good_quiz_answer(result, transcript) and not _is_duplicate(result, parts):
|
| 378 |
+
good = True
|
| 379 |
+
break
|
| 380 |
+
print(f"[DEBUG quiz] short answer {i + 1} attempt {attempt + 1} was bad or duplicate, retrying...")
|
| 381 |
+
if good:
|
| 382 |
+
result = result.replace("Q1.", f"Q{len(parts) + 1}.", 1)
|
| 383 |
+
parts.append(result)
|
| 384 |
+
else:
|
| 385 |
+
print(f"[DEBUG quiz] short answer {i + 1} dropped (unreliable after {1 + max_retries} attempts)")
|
| 386 |
+
|
| 387 |
+
combined = "\n\n".join(parts)
|
| 388 |
+
print(f"[DEBUG quiz] total response length: {len(combined)}")
|
| 389 |
+
return combined
|
requirements.txt
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
--extra-index-url https://download.pytorch.org/whl/cpu
|
| 2 |
+
torch
|
| 3 |
+
torchaudio
|
| 4 |
+
whisperx @ git+https://github.com/m-bain/whisperX.git
|
| 5 |
+
transformers
|
| 6 |
+
accelerate
|
| 7 |
+
gradio
|
| 8 |
+
yt-dlp
|
| 9 |
+
httpx
|
| 10 |
+
peft
|
| 11 |
+
spaces
|
transcribe.py
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
|
| 3 |
+
# Workaround for PyTorch 2.6+ weights_only=True default.
|
| 4 |
+
# pyannote VAD model checkpoints (used by WhisperX) contain omegaconf types
|
| 5 |
+
# and other globals that are not in torch's safe-globals allowlist.
|
| 6 |
+
# This env var tells PyTorch to fall back to weights_only=False when the
|
| 7 |
+
# caller did not explicitly pass weights_only. The pyannote models are
|
| 8 |
+
# published, trusted checkpoints.
|
| 9 |
+
os.environ.setdefault("TORCH_FORCE_NO_WEIGHTS_ONLY_LOAD", "1")
|
| 10 |
+
|
| 11 |
+
import whisperx
|
| 12 |
+
|
| 13 |
+
import gc
|
| 14 |
+
import torch
|
| 15 |
+
|
| 16 |
+
_model = None
|
| 17 |
+
_current_device = None
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
def _get_model(device: str = None):
|
| 21 |
+
if device is None:
|
| 22 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 23 |
+
global _model, _current_device
|
| 24 |
+
if _model is None or _current_device != device:
|
| 25 |
+
_model = whisperx.load_model(
|
| 26 |
+
"base",
|
| 27 |
+
device=device,
|
| 28 |
+
compute_type="int8",
|
| 29 |
+
)
|
| 30 |
+
_current_device = device
|
| 31 |
+
return _model
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def unload_model():
|
| 35 |
+
"""Free WhisperX model from GPU memory to make room for other models."""
|
| 36 |
+
global _model, _current_device
|
| 37 |
+
if _model is not None:
|
| 38 |
+
del _model
|
| 39 |
+
_model = None
|
| 40 |
+
_current_device = None
|
| 41 |
+
gc.collect()
|
| 42 |
+
if torch.cuda.is_available():
|
| 43 |
+
torch.cuda.empty_cache()
|
| 44 |
+
print("[WhisperX] Model unloaded, GPU memory freed.")
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
def transcribe_audio(audio_path: str, language: str | None = None, device: str = None) -> str:
|
| 48 |
+
"""
|
| 49 |
+
Transcribe audio file using WhisperX.
|
| 50 |
+
|
| 51 |
+
Args:
|
| 52 |
+
audio_path: Path to audio file (any format supported by ffmpeg).
|
| 53 |
+
language: ISO 639-1 language code (e.g. "en", "ko", "ja").
|
| 54 |
+
None for auto-detection.
|
| 55 |
+
device: "cuda" or "cpu".
|
| 56 |
+
|
| 57 |
+
Returns:
|
| 58 |
+
Transcribed text as a single string.
|
| 59 |
+
"""
|
| 60 |
+
model = _get_model(device)
|
| 61 |
+
audio = whisperx.load_audio(audio_path)
|
| 62 |
+
|
| 63 |
+
transcribe_kwargs = {"batch_size": 16}
|
| 64 |
+
if language:
|
| 65 |
+
transcribe_kwargs["language"] = language
|
| 66 |
+
|
| 67 |
+
result = model.transcribe(audio, **transcribe_kwargs)
|
| 68 |
+
|
| 69 |
+
segments = result.get("segments", [])
|
| 70 |
+
text = " ".join(seg["text"].strip() for seg in segments if seg.get("text"))
|
| 71 |
+
return text
|