Spaces:

ICGenAIShare05
/

lecture-processor

Sleeping

App Files Files Community

GitHub Actions commited on Mar 4

Commit

f2532fa

0 Parent(s):

deploy from GitHub 2026-03-04_03:47:45

Browse files

Files changed (5) hide show

README.md +31 -0
app.py +550 -0
lecture_processor.py +389 -0
requirements.txt +11 -0
transcribe.py +71 -0

README.md ADDED Viewed

	@@ -0,0 +1,31 @@

+---
+title: Lecture Processor
+emoji: "\U0001F393"
+colorFrom: blue
+colorTo: indigo
+sdk: gradio
+sdk_version: 5.15.0
+app_file: app.py
+pinned: false
+license: mit
+hardware: zero-a10g
+---
+# Lecture Processor
+Transcribe, summarize, and generate quizzes from lecture recordings using **WhisperX** and a fine-tuned **Gemma 3 4B** model.
+## How It Works
+1. Paste a YouTube lecture URL
+2. The pipeline automatically:
+   - **Transcribes** speech using WhisperX
+   - **Summarizes** the lecture with structured sections (Summary, Key Points, Action Points)
+   - **Generates quiz questions** (5 MCQ + 3 short answer)
+## Tech Stack
+- **WhisperX** - Speech-to-text transcription
+- **Gemma 3 4B Instruct** - Fine-tuned with QLoRA for lecture summarization and quiz generation
+- **LoRA Adapter** - [noufwithy/gemma-lecture-adapter](https://huggingface.co/noufwithy/gemma-lecture-adapter)
+- **Gradio** - Web interface with ZeroGPU support

app.py ADDED Viewed

	@@ -0,0 +1,550 @@

+import os
+import re
+import tempfile
+import time
+import traceback
+import gradio as gr
+import httpx
+import yt_dlp
+try:
+    import spaces
+except ImportError:
+    class spaces:
+        @staticmethod
+        def GPU(duration=60):
+            def decorator(fn):
+                return fn
+            return decorator
+PROXY_BASE = os.environ.get("PROXY_BASE", "").rstrip("/")
+PROXY_TOKEN = os.environ.get("PROXY_TOKEN", "")
+from transcribe import transcribe_audio, unload_model as unload_whisper
+from lecture_processor import summarize_lecture, generate_quiz
+# LANGUAGES = {
+#     "Auto-detect": None,
+#     "English": "en",
+#     "Korean": "ko",
+#     "Japanese": "ja",
+#     "Chinese": "zh",
+#     "Spanish": "es",
+#     "French": "fr",
+#     "German": "de",
+#     "Italian": "it",
+#     "Portuguese": "pt",
+#     "Russian": "ru",
+#     "Arabic": "ar",
+#     "Hindi": "hi",
+# }
+def get_youtube_video_id(url: str) -> str | None:
+    """Extract video ID from various YouTube URL formats."""
+    patterns = [
+        r"(?:youtube\.com/watch\?v=|youtu\.be/|youtube\.com/embed/)([a-zA-Z0-9_-]{11})",
+    ]
+    for pattern in patterns:
+        match = re.search(pattern, url)
+        if match:
+            return match.group(1)
+    return None
+def make_embed_html(video_id: str) -> str:
+    return f'<iframe width="100%" height="400" src="https://www.youtube.com/embed/{video_id}" frameborder="0" allowfullscreen></iframe>'
+def download_youtube_audio(url: str) -> str:
+    """Download audio from YouTube URL, returns path to wav file."""
+    tmp_dir = tempfile.mkdtemp()
+    output_path = f"{tmp_dir}/audio.wav"
+    ydl_opts = {
+        "format": "bestaudio/best",
+        "postprocessors": [{
+            "key": "FFmpegExtractAudio",
+            "preferredcodec": "wav",
+        }],
+        "outtmpl": f"{tmp_dir}/audio",
+        "quiet": True,
+        "no_warnings": True,
+    }
+    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+        ydl.download([url])
+    return output_path
+LANGUAGES = {
+    "English": "en",
+}
+def make_status_html(step: int = 0, timing: str = "", error: str = "") -> str:
+    """Step progress indicator. Steps: 0=idle, 1=download, 2=transcribe, 3=summarize, 4=quiz, 5=done."""
+    if error:
+        return f'<div class="status-bar error">{error}</div>'
+    if step == 0:
+        return ""
+    labels = ["Download", "Transcribe", "Summarize", "Quiz"]
+    items = []
+    for i, label in enumerate(labels):
+        s = i + 1
+        if s < step or step == 5:
+            cls, icon = "done", "&#10003;"
+        elif s == step:
+            cls, icon = "active", "&#8635;"
+        else:
+            cls, icon = "pending", str(s)
+        items.append(
+            f'<div class="step {cls}"><span class="num">{icon}</span>{label}</div>'
+        )
+    connector = '<div class="conn"></div>'
+    steps_html = connector.join(items)
+    timing_html = f'<div class="timing">{timing}</div>' if timing else ""
+    return f'<div class="status-bar"><div class="steps">{steps_html}</div>{timing_html}</div>'
+@spaces.GPU(duration=120)
+def _run_pipeline(audio_path: str, language: str):
+    """Pipeline that yields (transcript, summary, quiz, step, timing) progressively."""
+    lang_code = LANGUAGES.get(language)
+    timings = {}
+    gr.Info("Transcribing audio with WhisperX...")
+    try:
+        t0 = time.time()
+        raw_text = transcribe_audio(audio_path, language=lang_code)
+        timings["Transcription"] = time.time() - t0
+    except Exception as e:
+        yield f"[Transcription error] {e}", "", "", 0, ""
+        return
+    if not raw_text:
+        yield "(no speech detected)", "", "", 0, ""
+        return
+    timing_str = " | ".join(f"{k}: {v:.1f}s" for k, v in timings.items())
+    yield raw_text, "", "", 3, timing_str
+    unload_whisper()
+    gr.Info("Generating summary with Gemma...")
+    try:
+        t0 = time.time()
+        summary = summarize_lecture(raw_text)
+        timings["Summarization"] = time.time() - t0
+    except Exception as e:
+        print(f"[ERROR] Summarization failed: {e}")
+        traceback.print_exc()
+        summary = f"[Summarization error] {e}"
+    timing_str = " | ".join(f"{k}: {v:.1f}s" for k, v in timings.items())
+    yield raw_text, summary, "", 4, timing_str
+    gr.Info("Generating quiz with Gemma...")
+    try:
+        t0 = time.time()
+        quiz = generate_quiz(raw_text)
+        timings["Quiz Generation"] = time.time() - t0
+    except Exception as e:
+        print(f"[ERROR] Quiz generation failed: {e}")
+        traceback.print_exc()
+        quiz = f"[Quiz generation error] {e}"
+    timing_str = " | ".join(f"{k}: {v:.1f}s" for k, v in timings.items())
+    total = sum(timings.values())
+    timing_str += f" | Total: {total:.1f}s"
+    yield raw_text, summary, quiz, 5, timing_str
+def fetch_audio_from_proxy(url: str) -> str:
+    """Request audio extraction from proxy, save to tmp file, return path."""
+    headers = {"x-proxy-token": PROXY_TOKEN} if PROXY_TOKEN else {}
+    with httpx.stream(
+        "POST",
+        f"{PROXY_BASE}/extract",
+        json={"url": url, "audio_format": "best"},
+        headers=headers,
+        timeout=600,
+    ) as resp:
+        resp.raise_for_status()
+        tmp_dir = tempfile.mkdtemp()
+        audio_path = f"{tmp_dir}/audio.wav"
+        with open(audio_path, "wb") as f:
+            for chunk in resp.iter_bytes(chunk_size=8192):
+                f.write(chunk)
+    return audio_path
+def process_youtube(url: str, language: str):
+    """Yields (embed, transcript, summary, quiz, status_html) progressively."""
+    if not url or not url.strip():
+        yield "", "", "", "", ""
+        return
+    url = url.strip()
+    video_id = get_youtube_video_id(url)
+    if not video_id:
+        yield "", "", "", "", make_status_html(error="Please enter a valid YouTube URL")
+        return
+    embed_html = make_embed_html(video_id)
+    yield embed_html, "", "", "", make_status_html(1)
+    try:
+        t0 = time.time()
+        if PROXY_BASE:
+            audio_path = fetch_audio_from_proxy(url)
+        else:
+            gr.Info("Downloading audio from YouTube...")
+            audio_path = download_youtube_audio(url)
+        dl_time = time.time() - t0
+    except Exception as e:
+        yield embed_html, "", "", "", make_status_html(error=f"Download failed: {e}")
+        return
+    yield embed_html, "", "", "", make_status_html(2, f"Download: {dl_time:.1f}s")
+    for raw_text, summary, quiz, step, timing_str in _run_pipeline(audio_path, language):
+        full_timing = f"Download: {dl_time:.1f}s | {timing_str}" if timing_str else ""
+        yield embed_html, raw_text, summary, quiz, make_status_html(step, full_timing)
+EXAMPLES = {
+    "MIT OpenCourseWare": "https://www.youtube.com/watch?v=7Pq-S557XQU",
+    "Stanford CS229": "https://www.youtube.com/watch?v=jGwO_UgTS7I",
+}
+# ---------------------------------------------------------------------------
+# ICL Gradio Theme
+# ---------------------------------------------------------------------------
+_icl_blue = gr.themes.Color(
+    c50="#F0F7FC",
+    c100="#D4EFFC",
+    c200="#A8DFFA",
+    c300="#5CC4F0",
+    c400="#00ACD7",
+    c500="#0091D4",
+    c600="#003E74",
+    c700="#002147",
+    c800="#001A38",
+    c900="#001029",
+    c950="#000A1A",
+    name="icl-blue",
+)
+_icl_tangerine = gr.themes.Color(
+    c50="#FFF5EB",
+    c100="#FFE6CC",
+    c200="#FFCC99",
+    c300="#FFB366",
+    c400="#FF9933",
+    c500="#EC7300",
+    c600="#CC6300",
+    c700="#A35000",
+    c800="#7A3C00",
+    c900="#522800",
+    c950="#331900",
+    name="icl-tangerine",
+)
+_icl_grey = gr.themes.Color(
+    c50="#F7F8F8",
+    c100="#EBEEEE",
+    c200="#D5D9D9",
+    c300="#B8BCBC",
+    c400="#9D9D9D",
+    c500="#7A7A7A",
+    c600="#5C5C5C",
+    c700="#4A4A4A",
+    c800="#373A36",
+    c900="#2A2D2A",
+    c950="#1A1C1A",
+    name="icl-grey",
+)
+ICL_THEME = gr.themes.Base(
+    primary_hue=_icl_blue,
+    secondary_hue=_icl_tangerine,
+    neutral_hue=_icl_grey,
+    font=[gr.themes.GoogleFont("Source Sans Pro"), "Arial", "sans-serif"],
+    font_mono=[gr.themes.GoogleFont("Source Code Pro"), "monospace"],
+).set(
+    # Primary buttons – Navy background
+    button_primary_background_fill="#002147",
+    button_primary_background_fill_dark="#003E74",
+    button_primary_background_fill_hover="#003E74",
+    button_primary_background_fill_hover_dark="#0091D4",
+    button_primary_border_color="#002147",
+    button_primary_border_color_dark="#003E74",
+    button_primary_border_color_hover="#003E74",
+    button_primary_text_color="white",
+    button_primary_text_color_dark="white",
+    # Secondary buttons – white bg, blue border/text
+    button_secondary_background_fill="white",
+    button_secondary_background_fill_dark="#1A1C1A",
+    button_secondary_background_fill_hover="#D4EFFC",
+    button_secondary_background_fill_hover_dark="#001A38",
+    button_secondary_border_color="#003E74",
+    button_secondary_border_color_dark="#0091D4",
+    button_secondary_border_color_hover="#002147",
+    button_secondary_text_color="#003E74",
+    button_secondary_text_color_dark="#D4EFFC",
+    button_secondary_text_color_hover="#002147",
+    # Focus & loader
+    input_border_color_focus="#00ACD7",
+    input_border_color_focus_dark="#00ACD7",
+    loader_color="#003E74",
+    loader_color_dark="#0091D4",
+)
+# ---------------------------------------------------------------------------
+# CSS – custom properties + minimal overrides
+# ---------------------------------------------------------------------------
+CSS = """
+:root {
+    --icl-navy: #002147;
+    --icl-blue: #003E74;
+    --icl-process-blue: #0091D4;
+    --icl-pool: #00ACD7;
+    --icl-light-blue: #D4EFFC;
+    --icl-tangerine: #EC7300;
+    --icl-violet: #653098;
+    --icl-green: #02893B;
+    --icl-lime: #BBCE00;
+    --icl-red: #B22234;
+    --icl-grey: #EBEEEE;
+    --icl-cool-grey: #9D9D9D;
+    --icl-dark-grey: #373A36;
+    --sp-1: 4px; --sp-2: 8px; --sp-3: 12px; --sp-4: 16px;
+    --sp-5: 24px; --sp-6: 32px; --sp-7: 48px; --sp-8: 64px;
+}
+/* Header brand bar */
+.icl-header {
+    text-align: center;
+    padding: var(--sp-5) var(--sp-4);
+    border-bottom: 3px solid var(--icl-navy);
+    margin-bottom: var(--sp-5);
+}
+.icl-header img { height: 60px; margin-bottom: var(--sp-2); }
+.dark .icl-header { border-bottom-color: var(--icl-pool); }
+/* Title & subtitle */
+.main-title { text-align: center; color: var(--icl-navy); margin-bottom: 0 !important; }
+.subtitle  { text-align: center; color: var(--icl-blue); margin-top: 0 !important; }
+.dark .main-title { color: var(--icl-light-blue); }
+.dark .subtitle   { color: var(--icl-pool); }
+/* Tab selected override (Gradio tabs need !important) */
+.tabs .tab-nav button.selected {
+    border-color: var(--icl-navy) !important;
+    color: var(--icl-navy) !important;
+}
+.dark .tabs .tab-nav button.selected {
+    border-color: var(--icl-pool) !important;
+    color: var(--icl-pool) !important;
+}
+/* Focus & active states */
+button:focus-visible, input:focus-visible, textarea:focus-visible, select:focus-visible {
+    outline: 3px solid var(--icl-pool);
+    outline-offset: 2px;
+}
+button:active { transform: scale(0.97); }
+/* Example buttons – compact inside bordered card */
+.examples-row {
+    justify-content: center !important;
+    gap: var(--sp-2);
+    border: 1px solid var(--icl-light-blue);
+    border-radius: 8px;
+    padding: var(--sp-3) var(--sp-4);
+    background: var(--icl-grey);
+}
+.examples-row > * { flex: 0 0 auto !important; max-width: fit-content !important; }
+.dark .examples-row { background: #1f2937; border-color: var(--icl-blue); }
+/* Step progress indicator */
+.status-bar {
+    padding: var(--sp-3) var(--sp-4);
+    border-radius: 8px;
+    background: var(--icl-grey);
+    border: 1px solid var(--icl-light-blue);
+}
+.status-bar.error {
+    background: #f8d7da;
+    border-color: #f5c6cb;
+    color: #721c24;
+    text-align: center;
+    font-weight: 500;
+}
+.status-bar .steps {
+    display: flex;
+    align-items: center;
+    justify-content: center;
+    gap: 0;
+}
+.status-bar .step {
+    display: flex;
+    align-items: center;
+    gap: 6px;
+    padding: 6px 14px;
+    border-radius: 20px;
+    font-size: 14px;
+    font-weight: 500;
+    background: var(--icl-light-blue);
+    color: var(--icl-blue);
+    white-space: nowrap;
+    transition: all 0.3s ease;
+}
+.status-bar .step.active {
+    background: var(--icl-blue);
+    color: white;
+    animation: pulse 1.5s ease-in-out infinite;
+}
+.status-bar .step.done {
+    background: var(--icl-navy);
+    color: white;
+}
+.status-bar .step .num {
+    font-weight: 700;
+    min-width: 18px;
+    text-align: center;
+}
+.status-bar .conn {
+    width: 24px;
+    height: 2px;
+    background: var(--icl-light-blue);
+    flex-shrink: 0;
+}
+.status-bar .timing {
+    text-align: center;
+    margin-top: var(--sp-2);
+    font-size: 13px;
+    color: var(--icl-blue);
+}
+@keyframes pulse {
+    0%, 100% { opacity: 1; }
+    50% { opacity: 0.6; }
+}
+/* Dark mode – status bar */
+.dark .status-bar { background: #1f2937; border-color: var(--icl-blue); }
+.dark .status-bar.error { background: #7f1d1d; border-color: #991b1b; color: #fca5a5; }
+.dark .status-bar .step { background: var(--icl-blue); color: var(--icl-light-blue); }
+.dark .status-bar .step.active { background: var(--icl-tangerine); color: white; }
+.dark .status-bar .step.done { background: var(--icl-navy); color: var(--icl-light-blue); }
+.dark .status-bar .conn { background: var(--icl-blue); }
+.dark .status-bar .timing { color: var(--icl-light-blue); }
+/* Footer */
+.footer {
+    text-align: center;
+    color: var(--icl-dark-grey);
+    font-size: 0.85em;
+    margin-top: var(--sp-4);
+}
+.dark .footer { color: var(--icl-cool-grey); }
+/* Reduced motion */
+@media (prefers-reduced-motion: reduce) {
+    *, *::before, *::after {
+        animation-duration: 0.01ms !important;
+        animation-iteration-count: 1 !important;
+        transition-duration: 0.01ms !important;
+    }
+}
+/* Responsive */
+@media (max-width: 768px) {
+    .icl-header img { height: 40px; }
+    .status-bar .step { padding: 4px 10px; font-size: 12px; }
+    .status-bar .conn { width: 12px; }
+}
+@media (max-width: 480px) {
+    .icl-header img { height: 32px; }
+    .icl-header { padding: var(--sp-3) var(--sp-2); }
+}
+"""
+with gr.Blocks(
+    title="Lecture Processor",
+    css=CSS,
+    theme=ICL_THEME,
+) as demo:
+    gr.HTML("""
+        <div class="icl-header">
+            <img src="https://upload.wikimedia.org/wikipedia/commons/5/51/Imperial_College_London_crest.svg"
+                 alt="ICL Crest"
+                 onerror="this.style.display='none';">
+        </div>
+    """)
+    gr.Markdown("# Lecture Processor", elem_classes="main-title")
+    gr.Markdown(
+        "Transcribe, summarize, and generate quizzes from lectures",
+        elem_classes="subtitle",
+    )
+    with gr.Row():
+        youtube_input = gr.Textbox(
+            label="🔗 YouTube URL",
+            placeholder="https://www.youtube.com/watch?v=...",
+            scale=3,
+        )
+        language_dropdown = gr.Dropdown(
+            choices=list(LANGUAGES.keys()),
+            value="English",
+            label="Language",
+            scale=1,
+        )
+    youtube_btn = gr.Button("▶ Process Lecture", variant="primary", size="lg")
+    gr.Markdown("**Examples:**")
+    with gr.Row(elem_classes="examples-row"):
+        for name, url in EXAMPLES.items():
+            gr.Button(name, variant="secondary", size="sm", min_width=160).click(
+                fn=lambda u=url: u, outputs=[youtube_input]
+            )
+    status_output = gr.HTML()
+    video_embed = gr.HTML()
+    with gr.Tabs():
+        with gr.TabItem("Transcript"):
+            raw_output = gr.Textbox(
+                label="Raw Transcription", lines=12
+            )
+        with gr.TabItem("Summary"):
+            summary_output = gr.Textbox(label="Lecture Summary", lines=12)
+        with gr.TabItem("Quiz"):
+            quiz_output = gr.Textbox(label="Quiz Questions", lines=12)
+    gr.Markdown(
+        "Powered by **WhisperX** & **Gemma 3 4B** | Fine-tuned LoRA adapter",
+        elem_classes="footer",
+    )
+    outputs = [video_embed, raw_output, summary_output, quiz_output, status_output]
+    youtube_btn.click(
+        fn=process_youtube,
+        inputs=[youtube_input, language_dropdown],
+        outputs=outputs,
+    )
+    youtube_input.submit(
+        fn=process_youtube,
+        inputs=[youtube_input, language_dropdown],
+        outputs=outputs,
+    )
+if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", share=True)

lecture_processor.py ADDED Viewed

	@@ -0,0 +1,389 @@

+import os
+import traceback
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+from peft import PeftModel
+DEFAULT_MODEL = "google/gemma-3-4b-it"
+ADAPTER_PATH = "./gemma-lecture-adapter"
+HUB_ADAPTER_ID = "noufwithy/gemma-lecture-adapter"
+SUMMARIZE_SYSTEM_PROMPT = """You are a lecture summarization assistant.
+Summarize the following lecture transcription into a comprehensive, structured summary with these sections:
+- **Summary**: A concise overview of what the lecture covered
+- **Key Points**: The main concepts, definitions, and important details covered in the lecture (use bullet points)
+- **Action Points**: Any tasks, assignments, or follow-up actions mentioned by the lecturer
+Cover ALL topics discussed. Do not omit any major points.
+Output ONLY the summary. No explanations or extra commentary."""
+# Quiz prompts match the training data format exactly (one question per call)
+MCQ_SYSTEM_PROMPT = """You are an educational quiz generator.
+Based on the following lecture transcription, generate a multiple choice question
+with 4 options labeled A-D and indicate the correct answer.
+Format:
+Q1. [Question]
+A) [Option]
+B) [Option]
+C) [Option]
+D) [Option]
+Correct Answer: [Letter]
+Output ONLY the question. No explanations or extra commentary."""
+SHORT_ANSWER_SYSTEM_PROMPT = """You are an educational quiz generator.
+Based on the following lecture transcription, generate a short answer question
+with the expected answer.
+Format:
+Q1. [Question]
+Expected Answer: [Brief answer]
+Output ONLY the question. No explanations or extra commentary."""
+NUM_MCQ = 5
+NUM_SHORT_ANSWER = 3
+_model = None
+_tokenizer = None
+def _load_model(model_id: str = DEFAULT_MODEL, adapter_path: str = ADAPTER_PATH):
+    global _model, _tokenizer
+    if _model is not None:
+        return _model, _tokenizer
+    _tokenizer = AutoTokenizer.from_pretrained(model_id)
+    # Try local adapter first, then HuggingFace Hub, then base model
+    adapter_source = adapter_path if os.path.isdir(adapter_path) else HUB_ADAPTER_ID
+    # Load in bfloat16 (bitsandbytes 4-bit/8-bit quantization broken with Gemma 3)
+    try:
+        print(f"Loading model with LoRA adapter from {adapter_source}...")
+        base_model = AutoModelForCausalLM.from_pretrained(
+            model_id,
+            device_map="auto",
+            dtype=torch.bfloat16,
+            attn_implementation="eager",
+        )
+        _model = PeftModel.from_pretrained(base_model, adapter_source)
+        _model.eval()
+        print("LoRA adapter loaded successfully on bfloat16 base model.")
+    except Exception as e:
+        print(f"LoRA adapter failed ({e}), falling back to base model...")
+        traceback.print_exc()
+        _model = AutoModelForCausalLM.from_pretrained(
+            model_id, device_map="auto", dtype=torch.bfloat16,
+        )
+    return _model, _tokenizer
+def _generate(messages, max_new_tokens=2048, do_sample=False, temperature=0.7):
+    """Generate text using model.generate() directly."""
+    model, tokenizer = _load_model()
+    # Format chat messages into a string, then tokenize
+    prompt = tokenizer.apply_chat_template(
+        messages, tokenize=False, add_generation_prompt=True
+    )
+    inputs = tokenizer(prompt, return_tensors="pt", add_special_tokens=False)
+    input_ids = inputs["input_ids"].to(model.device)
+    attention_mask = inputs["attention_mask"].to(model.device)
+    print(f"[DEBUG] input length: {input_ids.shape[-1]} tokens")
+    with torch.no_grad():
+        outputs = model.generate(
+            input_ids=input_ids,
+            attention_mask=attention_mask,
+            max_new_tokens=max_new_tokens,
+            do_sample=do_sample,
+            temperature=temperature if do_sample else None,
+            top_p=0.9 if do_sample else None,
+            repetition_penalty=1.3,
+        )
+    # Decode only the new tokens (skip the input)
+    new_tokens = outputs[0][input_ids.shape[-1]:]
+    print(f"[DEBUG] generated {len(new_tokens)} new tokens")
+    response = tokenizer.decode(new_tokens, skip_special_tokens=True)
+    return response.strip()
+def _is_good_summary(text: str, transcript: str = "") -> bool:
+    """Check if a summary meets minimum quality: long enough, not repetitive, not parroting."""
+    if len(text) < 100:
+        return False
+    # Check for excessive repetition (same line or sentence repeated 2+ times)
+    from collections import Counter
+    for chunks in [
+        [s.strip() for s in text.split("\n") if s.strip()],
+        [s.strip() for s in text.split(".") if s.strip()],
+    ]:
+        if chunks:
+            counts = Counter(chunks)
+            most_common_count = counts.most_common(1)[0][1]
+            if most_common_count >= 2:
+                print(f"[QUALITY] Repetitive output detected ({most_common_count} repeats)")
+                return False
+    # Check if summary is just parroting the transcript (high word overlap)
+    if transcript:
+        summary_words = set(text.lower().split())
+        transcript_words = set(transcript.lower().split())
+        if summary_words and transcript_words:
+            overlap = len(summary_words & transcript_words) / len(summary_words)
+            if overlap > 0.85:
+                print(f"[QUALITY] Summary too similar to transcript ({overlap:.0%} word overlap)")
+                return False
+    # Check if summary has enough key points (at least 3 bullet points)
+    bullet_count = text.count("- ")
+    has_key_points = "key points" in text.lower()
+    if has_key_points and bullet_count < 3:
+        print(f"[QUALITY] Summary has too few key points ({bullet_count})")
+        return False
+    # Check minimum unique content (summary should have substance)
+    unique_lines = set(s.strip() for s in text.split("\n") if s.strip() and len(s.strip()) > 10)
+    if len(unique_lines) < 5:
+        print(f"[QUALITY] Summary too shallow ({len(unique_lines)} unique lines)")
+        return False
+    return True
+def _generate_with_base_fallback(messages, transcript="", **kwargs):
+    """Generate with adapter first. If output is bad, retry with base model."""
+    result = _generate(messages, **kwargs)
+    if _is_good_summary(result, transcript=transcript):
+        return result
+    # Adapter output is bad, try base model
+    model, _ = _load_model()
+    if isinstance(model, PeftModel):
+        print("[FALLBACK] Adapter output too short or repetitive, retrying with base model...")
+        model.disable_adapter_layers()
+        try:
+            result = _generate(messages, **kwargs)
+        finally:
+            model.enable_adapter_layers()
+        print(f"[FALLBACK] base model response length: {len(result)}")
+    return result
+def _truncate_transcript(transcript: str, max_words: int = 4000) -> str:
+    """Truncate transcript to fit model's effective context (trained on 3072 tokens)."""
+    words = transcript.split()
+    if len(words) <= max_words:
+        return transcript
+    print(f"[TRUNCATE] Transcript has {len(words)} words, truncating to {max_words}")
+    return " ".join(words[:max_words])
+def summarize_lecture(transcript: str, model: str = DEFAULT_MODEL) -> str:
+    """Summarize a lecture transcript using Gemma."""
+    if not transcript or not transcript.strip():
+        return ""
+    truncated = _truncate_transcript(transcript)
+    messages = [
+        {"role": "system", "content": SUMMARIZE_SYSTEM_PROMPT},
+        {"role": "user", "content": f"Lecture transcription:\n\n{truncated}"},
+    ]
+    # Try adapter first, fall back to base model if quality is bad
+    result = _generate_with_base_fallback(messages, transcript=transcript, do_sample=True, temperature=0.3)
+    print(f"[DEBUG summarize] response length: {len(result)}")
+    return result
+def _extract_question_text(result: str) -> str:
+    """Extract just the question text (first line after Q number) for dedup comparison."""
+    import re
+    match = re.search(r'Q\d+\.\s*(.+)', result)
+    return match.group(1).strip().lower() if match else result.strip().lower()
+def _is_good_quiz_answer(result: str, transcript: str = "") -> bool:
+    """Check if a generated quiz question is reasonable quality."""
+    # Reject if response doesn't match any expected format (no question generated)
+    if "Correct Answer:" not in result and "Expected Answer:" not in result:
+        print(f"[QUALITY] Response has no valid question format (missing Correct/Expected Answer)")
+        return False
+    # Reject if there's no actual question (Q1. pattern)
+    if "Q1." not in result:
+        print(f"[QUALITY] Response missing Q1. question marker")
+        return False
+    # Short answer: reject if expected answer is just a transcript fragment with no real content
+    if "Expected Answer:" in result:
+        answer = result.split("Expected Answer:")[-1].strip()
+        # Reject vague/pointer answers like "right here", "this arrow", "at this point"
+        vague_phrases = ["right here", "this arrow", "at this point", "this one", "over here", "right there"]
+        if any(phrase in answer.lower() for phrase in vague_phrases):
+            print(f"[QUALITY] Short answer too vague: {answer}")
+            return False
+        if len(answer.split()) < 2:
+            print(f"[QUALITY] Short answer too short: {answer}")
+            return False
+    # MCQ: reject if it doesn't have 4 options or has duplicate options
+    if "Correct Answer:" in result and "Expected Answer:" not in result:
+        import re
+        for label in ["A)", "B)", "C)", "D)"]:
+            if label not in result:
+                print(f"[QUALITY] MCQ missing option {label}")
+                return False
+        # Reject if options are mostly duplicated
+        options = re.findall(r'[A-D]\)\s*(.+)', result)
+        unique_options = set(opt.strip().lower() for opt in options)
+        if len(unique_options) < 3:
+            print(f"[QUALITY] MCQ has duplicate options ({len(unique_options)} unique out of {len(options)})")
+            return False
+    return True
+def _dedup_mcq_options(result: str) -> str:
+    """Remove duplicate MCQ options, keeping unique ones only."""
+    import re
+    options = re.findall(r'([A-D])\)\s*(.+)', result)
+    if len(options) != 4:
+        return result
+    seen = {}
+    unique = []
+    for label, text in options:
+        key = text.strip().lower()
+        if key not in seen:
+            seen[key] = True
+            unique.append((label, text.strip()))
+    if len(unique) == len(options):
+        return result  # no duplicates
+    print(f"[QUALITY] Removed {len(options) - len(unique)} duplicate MCQ option(s)")
+    # Rebuild with correct labels
+    lines = result.split("\n")
+    new_lines = []
+    option_idx = 0
+    labels = ["A", "B", "C", "D"]
+    for line in lines:
+        if re.match(r'^[A-D]\)', line):
+            if option_idx < len(unique):
+                new_lines.append(f"{labels[option_idx]}) {unique[option_idx][1]}")
+                option_idx += 1
+        else:
+            new_lines.append(line)
+    return "\n".join(new_lines)
+def _generate_quiz_with_fallback(messages, transcript="", **kwargs):
+    """Generate a quiz question with adapter, fall back to base model if bad."""
+    result = _generate(messages, **kwargs)
+    if _is_good_quiz_answer(result, transcript):
+        return result
+    model, _ = _load_model()
+    if isinstance(model, PeftModel):
+        print("[FALLBACK] Quiz answer bad, retrying with base model...")
+        model.disable_adapter_layers()
+        try:
+            result = _generate(messages, **kwargs)
+        finally:
+            model.enable_adapter_layers()
+    return result
+def _normalize_words(text: str) -> set[str]:
+    """Strip punctuation from words for cleaner comparison."""
+    import re
+    return set(re.sub(r'[^\w\s]', '', word) for word in text.split() if word.strip())
+def _is_duplicate(result: str, existing_parts: list[str]) -> bool:
+    """Check if a generated question is too similar to any already generated."""
+    new_q = _extract_question_text(result)
+    for part in existing_parts:
+        old_q = _extract_question_text(part)
+        # Check if questions share most of their words (punctuation-stripped)
+        new_words = _normalize_words(new_q)
+        old_words = _normalize_words(old_q)
+        if not new_words or not old_words:
+            continue
+        overlap = len(new_words & old_words) / min(len(new_words), len(old_words))
+        if overlap > 0.7:
+            print(f"[QUALITY] Duplicate question detected ({overlap:.0%} word overlap)")
+            return True
+    return False
+def generate_quiz(transcript: str, model: str = DEFAULT_MODEL) -> str:
+    """Generate quiz questions from a lecture transcript using Gemma.
+    Generates questions one at a time to match training format, then combines them.
+    Skips duplicate questions automatically.
+    """
+    if not transcript or not transcript.strip():
+        return ""
+    transcript = _truncate_transcript(transcript)
+    parts = []
+    max_retries = 2  # extra attempts per question if duplicate
+    # Generate MCQs one at a time (matches training: one MCQ per example)
+    for i in range(NUM_MCQ):
+        print(f"[DEBUG quiz] generating MCQ {i + 1}/{NUM_MCQ}...")
+        messages = [
+            {"role": "system", "content": MCQ_SYSTEM_PROMPT},
+            {"role": "user", "content": f"Lecture transcription:\n\n{transcript}"},
+        ]
+        good = False
+        for attempt in range(1 + max_retries):
+            result = _generate_quiz_with_fallback(messages, transcript=transcript, max_new_tokens=256, do_sample=True)
+            if _is_good_quiz_answer(result, transcript) and not _is_duplicate(result, parts):
+                good = True
+                break
+            print(f"[DEBUG quiz] MCQ {i + 1} attempt {attempt + 1} was bad or duplicate, retrying...")
+        if good:
+            result = _dedup_mcq_options(result)
+            result = result.replace("Q1.", f"Q{len(parts) + 1}.", 1)
+            parts.append(result)
+        else:
+            print(f"[DEBUG quiz] MCQ {i + 1} dropped (unreliable after {1 + max_retries} attempts)")
+    # Generate short answer questions one at a time
+    for i in range(NUM_SHORT_ANSWER):
+        q_num = NUM_MCQ + i + 1
+        print(f"[DEBUG quiz] generating short answer {i + 1}/{NUM_SHORT_ANSWER}...")
+        messages = [
+            {"role": "system", "content": SHORT_ANSWER_SYSTEM_PROMPT},
+            {"role": "user", "content": f"Lecture transcription:\n\n{transcript}"},
+        ]
+        good = False
+        for attempt in range(1 + max_retries):
+            result = _generate_quiz_with_fallback(messages, transcript=transcript, max_new_tokens=256, do_sample=True)
+            if _is_good_quiz_answer(result, transcript) and not _is_duplicate(result, parts):
+                good = True
+                break
+            print(f"[DEBUG quiz] short answer {i + 1} attempt {attempt + 1} was bad or duplicate, retrying...")
+        if good:
+            result = result.replace("Q1.", f"Q{len(parts) + 1}.", 1)
+            parts.append(result)
+        else:
+            print(f"[DEBUG quiz] short answer {i + 1} dropped (unreliable after {1 + max_retries} attempts)")
+    combined = "\n\n".join(parts)
+    print(f"[DEBUG quiz] total response length: {len(combined)}")
+    return combined

requirements.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+--extra-index-url https://download.pytorch.org/whl/cpu
+torch
+torchaudio
+whisperx @ git+https://github.com/m-bain/whisperX.git
+transformers
+accelerate
+gradio
+yt-dlp
+httpx
+peft
+spaces

transcribe.py ADDED Viewed

	@@ -0,0 +1,71 @@

+import os
+# Workaround for PyTorch 2.6+ weights_only=True default.
+# pyannote VAD model checkpoints (used by WhisperX) contain omegaconf types
+# and other globals that are not in torch's safe-globals allowlist.
+# This env var tells PyTorch to fall back to weights_only=False when the
+# caller did not explicitly pass weights_only.  The pyannote models are
+# published, trusted checkpoints.
+os.environ.setdefault("TORCH_FORCE_NO_WEIGHTS_ONLY_LOAD", "1")
+import whisperx
+import gc
+import torch
+_model = None
+_current_device = None
+def _get_model(device: str = None):
+    if device is None:
+        device = "cuda" if torch.cuda.is_available() else "cpu"
+    global _model, _current_device
+    if _model is None or _current_device != device:
+        _model = whisperx.load_model(
+            "base",
+            device=device,
+            compute_type="int8",
+        )
+        _current_device = device
+    return _model
+def unload_model():
+    """Free WhisperX model from GPU memory to make room for other models."""
+    global _model, _current_device
+    if _model is not None:
+        del _model
+        _model = None
+        _current_device = None
+        gc.collect()
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+        print("[WhisperX] Model unloaded, GPU memory freed.")
+def transcribe_audio(audio_path: str, language: str | None = None, device: str = None) -> str:
+    """
+    Transcribe audio file using WhisperX.
+    Args:
+        audio_path: Path to audio file (any format supported by ffmpeg).
+        language: ISO 639-1 language code (e.g. "en", "ko", "ja").
+                  None for auto-detection.
+        device: "cuda" or "cpu".
+    Returns:
+        Transcribed text as a single string.
+    """
+    model = _get_model(device)
+    audio = whisperx.load_audio(audio_path)
+    transcribe_kwargs = {"batch_size": 16}
+    if language:
+        transcribe_kwargs["language"] = language
+    result = model.transcribe(audio, **transcribe_kwargs)
+    segments = result.get("segments", [])
+    text = " ".join(seg["text"].strip() for seg in segments if seg.get("text"))
+    return text