Spaces:

LRU1
/

lec2note

Sleeping

App Files Files Community

LRU1 commited on Sep 6, 2025

Commit

819e875

1 Parent(s): dd9b31b

basic functions done

Browse files

Files changed (26) hide show

app.py +66 -0
lec2note/ingestion/__pycache__/audio_extractor.cpython-310.pyc +0 -0
lec2note/ingestion/__pycache__/audio_extractor.cpython-312.pyc +0 -0
lec2note/ingestion/__pycache__/whisper_runner.cpython-310.pyc +0 -0
lec2note/ingestion/__pycache__/whisper_runner.cpython-312.pyc +0 -0
lec2note/ingestion/whisper_runner.py +5 -1
lec2note/processing/__pycache__/processor.cpython-310.pyc +0 -0
lec2note/scripts/__pycache__/run_pipeline.cpython-310.pyc +0 -0
lec2note/scripts/__pycache__/run_pipeline.cpython-312.pyc +0 -0
lec2note/scripts/run_pipeline.py +16 -3
lec2note/segmentation/__pycache__/chunk_merger.cpython-310.pyc +0 -0
lec2note/segmentation/__pycache__/semantic_segmenter.cpython-310.pyc +0 -0
lec2note/segmentation/__pycache__/sentence_chunker.cpython-310.pyc +0 -0
lec2note/segmentation/__pycache__/visual_merger.cpython-310.pyc +0 -0
lec2note/segmentation/chunk_merger.py +5 -1
lec2note/segmentation/semantic_segmenter.py +2 -6
lec2note/segmentation/visual_merger.py +24 -4
lec2note/segmentation/visual_segmenter.py +0 -52
lec2note/synthesis/__pycache__/assembler.cpython-310.pyc +0 -0
lec2note/synthesis/assembler.py +4 -2
lec2note/utils/__pycache__/logging_config.cpython-312.pyc +0 -0
lec2note/vision/__pycache__/frame_extractor.cpython-310.pyc +0 -0
lec2note/vision/__pycache__/image_comparator.cpython-310.pyc +0 -0
lec2note/vision/__pycache__/image_sampler.cpython-310.pyc +0 -0
lec2note/vision/__pycache__/keyframe_extractor.cpython-310.pyc +0 -0
requirements.txt +3 -1

app.py ADDED Viewed

	@@ -0,0 +1,66 @@

+import streamlit as st
+from pathlib import Path
+import tempfile
+import subprocess
+import textwrap
+st.set_page_config(page_title="Lec2Note2 – Lecture-to-Notes", layout="wide")
+st.title("📝 Lec2Note2 – Automatic Lecture Notes Generator")
+st.markdown(
+    textwrap.dedent(
+        """
+        Upload a lecture **video** and receive a fully-formatted **Markdown** study note – complete with key images and structured sections.
+        The processing pipeline performs ASR transcription, vision & semantic segmentation, then invokes an LLM to produce rich notes.
+        """
+    )
+)
+video_file = st.file_uploader("🎬 Upload MP4/MKV/AVI", type=["mp4", "mkv", "avi"])
+run_btn = st.button("🚀 Generate Notes", disabled=video_file is None)
+if run_btn and video_file:
+    # Save upload to a temporary file
+    tmp_dir = tempfile.TemporaryDirectory()
+    vid_path = Path(tmp_dir.name) / video_file.name
+    with vid_path.open("wb") as f:
+        f.write(video_file.read())
+    output_md = vid_path.with_suffix(".md")
+    st.info("Processing started. This may take several minutes depending on video length …")
+    progress_txt = st.empty()
+    # Run pipeline via subprocess to avoid blocking UI; capture logs
+    with st.spinner("Running Lec2Note2 pipeline …"):
+        result = subprocess.run(
+            [
+                "python",
+                "-m",
+                "lec2note.scripts.run_pipeline",
+                "--video",
+                str(vid_path),
+                "--output",
+                str(output_md),
+            ],
+            text=True,
+            capture_output=True,
+        )
+    if result.returncode != 0:
+        st.error("❌ Pipeline failed. See logs below.")
+        with st.expander("Show logs"):
+            st.code(result.stderr + "\n" + result.stdout)
+    else:
+        st.success("✅ Notes generated!")
+        md_content = output_md.read_text()
+        st.markdown(md_content)
+        st.download_button(
+            label="💾 Download notes.md",
+            data=md_content,
+            file_name="lecture_notes.md",
+            mime="text/markdown",
+        )
+    tmp_dir.cleanup()

lec2note/ingestion/__pycache__/audio_extractor.cpython-310.pyc CHANGED Viewed

Binary files a/lec2note/ingestion/__pycache__/audio_extractor.cpython-310.pyc and b/lec2note/ingestion/__pycache__/audio_extractor.cpython-310.pyc differ

lec2note/ingestion/__pycache__/audio_extractor.cpython-312.pyc CHANGED Viewed

Binary files a/lec2note/ingestion/__pycache__/audio_extractor.cpython-312.pyc and b/lec2note/ingestion/__pycache__/audio_extractor.cpython-312.pyc differ

lec2note/ingestion/__pycache__/whisper_runner.cpython-310.pyc CHANGED Viewed

Binary files a/lec2note/ingestion/__pycache__/whisper_runner.cpython-310.pyc and b/lec2note/ingestion/__pycache__/whisper_runner.cpython-310.pyc differ

lec2note/ingestion/__pycache__/whisper_runner.cpython-312.pyc CHANGED Viewed

Binary files a/lec2note/ingestion/__pycache__/whisper_runner.cpython-312.pyc and b/lec2note/ingestion/__pycache__/whisper_runner.cpython-312.pyc differ

lec2note/ingestion/whisper_runner.py CHANGED Viewed

@@ -10,6 +10,7 @@ from typing import List, Dict, Optional, Any
 import torch
 from whisper import load_model  # type: ignore
 __all__ = ["WhisperRunner"]
@@ -40,7 +41,7 @@ class WhisperRunner:  # noqa: D101
         # convert to our schema
         logger.info("[Whisper] got %d segments", len(segments))
-        return [
             {
                 "start": round(seg["start"], 2),
                 "end": round(seg["end"], 2),
@@ -48,3 +49,6 @@ class WhisperRunner:  # noqa: D101
             }
             for seg in segments
         ]

 import torch
 from whisper import load_model  # type: ignore
+import json
 __all__ = ["WhisperRunner"]
         # convert to our schema
         logger.info("[Whisper] got %d segments", len(segments))
+        results = [
             {
                 "start": round(seg["start"], 2),
                 "end": round(seg["end"], 2),
             }
             for seg in segments
         ]
+        with open(audio_path.with_suffix(".json"), "w") as f:
+            json.dump(results, f, indent=2)
+        return results

lec2note/processing/__pycache__/processor.cpython-310.pyc CHANGED Viewed

Binary files a/lec2note/processing/__pycache__/processor.cpython-310.pyc and b/lec2note/processing/__pycache__/processor.cpython-310.pyc differ

lec2note/scripts/__pycache__/run_pipeline.cpython-310.pyc CHANGED Viewed

Binary files a/lec2note/scripts/__pycache__/run_pipeline.cpython-310.pyc and b/lec2note/scripts/__pycache__/run_pipeline.cpython-310.pyc differ

lec2note/scripts/__pycache__/run_pipeline.cpython-312.pyc CHANGED Viewed

Binary files a/lec2note/scripts/__pycache__/run_pipeline.cpython-312.pyc and b/lec2note/scripts/__pycache__/run_pipeline.cpython-312.pyc differ

lec2note/scripts/run_pipeline.py CHANGED Viewed

@@ -13,9 +13,10 @@ from pathlib import Path
 from lec2note.ingestion.audio_extractor import AudioExtractor
 from lec2note.utils.logging_config import setup_logging
 from lec2note.ingestion.whisper_runner import WhisperRunner
-from lec2note.segmentation.visual_segmenter import VisualSegmenter
-from lec2note.segmentation.semantic_segmenter import SemanticSegmenter
 from lec2note.segmentation.chunk_merger import ChunkMerger
 from lec2note.processing.processor import Processor
 from lec2note.synthesis.assembler import Assembler
@@ -36,7 +37,19 @@ def main():  # noqa: D401
     final_chunks = ChunkMerger.run(subtitles, video_path)
-    note_chunks = [Processor.generate_note(fc, subtitles) for fc in final_chunks]
     markdown = Assembler.merge(note_chunks)
     Assembler.save(markdown, args.output)
     print(f"Saved markdown to {args.output}")

 from lec2note.ingestion.audio_extractor import AudioExtractor
 from lec2note.utils.logging_config import setup_logging
 from lec2note.ingestion.whisper_runner import WhisperRunner
 from lec2note.segmentation.chunk_merger import ChunkMerger
+# parallel execution
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from tqdm.auto import tqdm  # progress bar
 from lec2note.processing.processor import Processor
 from lec2note.synthesis.assembler import Assembler
     final_chunks = ChunkMerger.run(subtitles, video_path)
+    # generate notes in parallel (IO-bound: LLM API calls → threads sufficient)
+    note_chunks = []
+    with ThreadPoolExecutor(max_workers=min(8, len(final_chunks))) as pool, tqdm(
+        total=len(final_chunks), desc="Generating notes", unit="chunk"
+    ) as pbar:
+        future_map = {
+            pool.submit(Processor.generate_note, fc, subtitles): fc
+            for fc in final_chunks
+        }
+        for fut in as_completed(future_map):
+            note_chunks.append(fut.result())
+            pbar.update(1)
     markdown = Assembler.merge(note_chunks)
     Assembler.save(markdown, args.output)
     print(f"Saved markdown to {args.output}")

lec2note/segmentation/__pycache__/chunk_merger.cpython-310.pyc CHANGED Viewed

Binary files a/lec2note/segmentation/__pycache__/chunk_merger.cpython-310.pyc and b/lec2note/segmentation/__pycache__/chunk_merger.cpython-310.pyc differ

lec2note/segmentation/__pycache__/semantic_segmenter.cpython-310.pyc CHANGED Viewed

Binary files a/lec2note/segmentation/__pycache__/semantic_segmenter.cpython-310.pyc and b/lec2note/segmentation/__pycache__/semantic_segmenter.cpython-310.pyc differ

lec2note/segmentation/__pycache__/sentence_chunker.cpython-310.pyc CHANGED Viewed

Binary files a/lec2note/segmentation/__pycache__/sentence_chunker.cpython-310.pyc and b/lec2note/segmentation/__pycache__/sentence_chunker.cpython-310.pyc differ

lec2note/segmentation/__pycache__/visual_merger.cpython-310.pyc ADDED Viewed

Binary file (1.75 kB). View file

lec2note/segmentation/chunk_merger.py CHANGED Viewed

@@ -55,7 +55,11 @@ class ChunkMerger:  # noqa: D101
         # 4. map micro to topic & sample images
         final_chunks: List[FinalChunk] = []
         for ch in topic_chunks_dict:
-            imgs = [mc["keyframe_path"] for mc in micro_chunks if ch["start"] <= mc["start"] < ch["end"]]
             imgs_sampled = ImageSampler.sample(imgs, max_n=6)
             fc = FinalChunk(start=ch["start"], end=ch["end"], images=[Path(p) for p in imgs_sampled])
             final_chunks.append(fc)

         # 4. map micro to topic & sample images
         final_chunks: List[FinalChunk] = []
         for ch in topic_chunks_dict:
+            imgs = [
+                mc["keyframe_path"]
+                for mc in micro_chunks
+                if ch["start"] <= mc["start"] < ch["end"] and mc.get("keyframe_path") and Path(mc["keyframe_path"]).exists()
+            ]
             imgs_sampled = ImageSampler.sample(imgs, max_n=6)
             fc = FinalChunk(start=ch["start"], end=ch["end"], images=[Path(p) for p in imgs_sampled])
             final_chunks.append(fc)

lec2note/segmentation/semantic_segmenter.py CHANGED Viewed

@@ -39,12 +39,8 @@ class SemanticSegmenter:  # noqa: D101
         buf_emb = embeddings[0]
         for i in range(1, len(slide_chunks)):
             sim = float(util.cos_sim(buf_emb, embeddings[i]))
-            duration = buffer["end"] - buffer["start"]
-            if duration > 120 and sim < 0.8:  # too long and not similar => split
-                refined.append(buffer)
-                buffer = slide_chunks[i].copy()
-                buf_emb = embeddings[i]
-            elif duration < 10 and sim > 0.9:  # too short and similar => merge
                 buffer["end"] = slide_chunks[i]["end"]
             else:
                 refined.append(buffer)

         buf_emb = embeddings[0]
         for i in range(1, len(slide_chunks)):
             sim = float(util.cos_sim(buf_emb, embeddings[i]))
+            print("semantic sim:",sim)
+            if sim > 0.7:  # too similar => merge
                 buffer["end"] = slide_chunks[i]["end"]
             else:
                 refined.append(buffer)

lec2note/segmentation/visual_merger.py CHANGED Viewed

@@ -37,13 +37,13 @@ class VisualMerger:  # noqa: D101
         cls,
         micro_chunks: List[Dict],
         *,
-        sim_threshold: float = 0.9,
     ) -> List[Dict]:
         if not micro_chunks:
             return []
         visual_chunks: List[Dict] = []
-        buffer = micro_chunks[0].copy()
         for mc in micro_chunks[1:]:
             # compare buffer keyframe (last sentence in current block) with mc keyframe
             try:
@@ -51,13 +51,33 @@ class VisualMerger:  # noqa: D101
             except Exception as exc:  # noqa: BLE001
                 logger.warning("[VisualMerger] similarity calc failed: %s", exc)
                 sim = 0.0  # force split
             if sim >= sim_threshold:
                 # merge: extend end and replace keyframe/path to current (last)
                 buffer["end"] = mc["end"]
                 buffer["keyframe_path"] = mc["keyframe_path"]
             else:
-                visual_chunks.append({"start": buffer["start"], "end": buffer["end"]})
                 buffer = mc.copy()
-        visual_chunks.append({"start": buffer["start"], "end": buffer["end"]})
         logger.info("[VisualMerger] merged %d micro → %d visual chunks", len(micro_chunks), len(visual_chunks))
         return visual_chunks

         cls,
         micro_chunks: List[Dict],
         *,
+        sim_threshold: float = 0.8,
     ) -> List[Dict]:
         if not micro_chunks:
             return []
         visual_chunks: List[Dict] = []
+        buffer = micro_chunks[0].copy()  # includes keyframe_path
         for mc in micro_chunks[1:]:
             # compare buffer keyframe (last sentence in current block) with mc keyframe
             try:
             except Exception as exc:  # noqa: BLE001
                 logger.warning("[VisualMerger] similarity calc failed: %s", exc)
                 sim = 0.0  # force split
+            print("visual sim:",sim)
             if sim >= sim_threshold:
                 # merge: extend end and replace keyframe/path to current (last)
                 buffer["end"] = mc["end"]
                 buffer["keyframe_path"] = mc["keyframe_path"]
             else:
+                visual_chunks.append({
+                    "start": buffer["start"],
+                    "end": buffer["end"],
+                    "keyframe_path": buffer["keyframe_path"],
+                })
                 buffer = mc.copy()
+        visual_chunks.append({
+            "start": buffer["start"],
+            "end": buffer["end"],
+            "keyframe_path": buffer["keyframe_path"],
+        })
+        # Optional: remove micro keyframes that are not kept
+        kept = {vc["keyframe_path"] for vc in visual_chunks}
+        for mc in micro_chunks:
+            kp = mc.get("keyframe_path", "")
+            if kp and kp not in kept:
+                try:
+                    Path(kp).unlink(missing_ok=True)
+                except Exception:  # noqa: BLE001
+                    pass
         logger.info("[VisualMerger] merged %d micro → %d visual chunks", len(micro_chunks), len(visual_chunks))
         return visual_chunks

lec2note/segmentation/visual_segmenter.py DELETED Viewed

@@ -1,52 +0,0 @@
-"""Visual segmentation based on keyframe timestamps.
-This module identifies slide boundaries by extracting keyframes first (via
-``lec2note.vision.keyframe_extractor``), then converting frame indices to time
-range based on video FPS.
-"""
-from __future__ import annotations
-import logging
-from pathlib import Path
-from typing import List, Dict
-import cv2  # type: ignore
-from lec2note.vision.keyframe_extractor import KeyframeExtractor
-from lec2note.types import SlideChunk
-__all__ = ["VisualSegmenter"]
-logger = logging.getLogger(__name__)
-class VisualSegmenter:  # noqa: D101
-    @classmethod
-    def run(cls, video_fp: str | Path) -> List[Dict]:  # slide_chunks list of dict
-        """Return list of ``{start, end}`` slide-level chunks."""
-        video_path = Path(video_fp).expanduser().resolve()
-        logger.info("[VisualSegmenter] start visual segmentation on %s", video_path.name)
-        keyframes = KeyframeExtractor.run(video_path,threshold=0.2)
-        if not keyframes:
-            # fallback single chunk whole video
-            cap = cv2.VideoCapture(str(video_path))
-            duration = cap.get(cv2.CAP_PROP_FRAME_COUNT) / cap.get(cv2.CAP_PROP_FPS)
-            cap.release()
-            return [{"start": 0.0, "end": duration}]
-        # Determine timestamp for each keyframe: assume filename kf_idx order matches frame order
-        cap = cv2.VideoCapture(str(video_path))
-        fps = cap.get(cv2.CAP_PROP_FPS)
-        cap.release()
-        indices = [int(p.stem.split("_")[1]) for p in keyframes]
-        indices.sort()
-        times = [idx / fps for idx in indices]
-        times.append(float("inf"))  # sentinel for last end
-        slide_chunks: List[Dict] = []
-        for i in range(len(times) - 1):
-            slide_chunks.append({"start": times[i], "end": times[i + 1]})
-        logger.info("[VisualSegmenter] generated %d slide chunks", len(slide_chunks))
-        return slide_chunks

lec2note/synthesis/__pycache__/assembler.cpython-310.pyc CHANGED Viewed

Binary files a/lec2note/synthesis/__pycache__/assembler.cpython-310.pyc and b/lec2note/synthesis/__pycache__/assembler.cpython-310.pyc differ

lec2note/synthesis/assembler.py CHANGED Viewed

@@ -2,6 +2,7 @@
 from __future__ import annotations
 import logging
 from pathlib import Path
 logger = logging.getLogger(__name__)
@@ -12,7 +13,7 @@ from lec2note.types import NoteChunk
 __all__ = ["Assembler"]
-TEMPLATE = """# 讲座笔记
 {content}
 """
@@ -38,7 +39,7 @@ class Assembler:  # noqa: D101
                 api_key=os.getenv("OPENAI_API_KEY"),
             )
             response = client.chat.completions.create(
-                            model=getenv("OPENAI_MODEL", "gpt-4o-mini"),
                             temperature=0.3,
                             messages=[
                                 {
@@ -61,6 +62,7 @@ class Assembler:  # noqa: D101
                         )
             polished = response.choices[0].message.content.strip()
         except Exception:  # noqa: BLE001
             polished = raw_md
         logger.info("[Assembler] final document length %d chars", len(polished))

 from __future__ import annotations
+import os
 import logging
 from pathlib import Path
 logger = logging.getLogger(__name__)
 __all__ = ["Assembler"]
+TEMPLATE = """
 {content}
 """
                 api_key=os.getenv("OPENAI_API_KEY"),
             )
             response = client.chat.completions.create(
+                            model=os.getenv("OPENAI_MODEL", "gpt-4o-mini"),
                             temperature=0.3,
                             messages=[
                                 {
                         )
             polished = response.choices[0].message.content.strip()
         except Exception:  # noqa: BLE001
+            logging.error("Error in Assembler.merge", exc_info=True)
             polished = raw_md
         logger.info("[Assembler] final document length %d chars", len(polished))

lec2note/utils/__pycache__/logging_config.cpython-312.pyc ADDED Viewed

Binary file (1.07 kB). View file

lec2note/vision/__pycache__/frame_extractor.cpython-310.pyc CHANGED Viewed

Binary files a/lec2note/vision/__pycache__/frame_extractor.cpython-310.pyc and b/lec2note/vision/__pycache__/frame_extractor.cpython-310.pyc differ

lec2note/vision/__pycache__/image_comparator.cpython-310.pyc ADDED Viewed

Binary file (1.88 kB). View file

lec2note/vision/__pycache__/image_sampler.cpython-310.pyc CHANGED Viewed

Binary files a/lec2note/vision/__pycache__/image_sampler.cpython-310.pyc and b/lec2note/vision/__pycache__/image_sampler.cpython-310.pyc differ

lec2note/vision/__pycache__/keyframe_extractor.cpython-310.pyc CHANGED Viewed

Binary files a/lec2note/vision/__pycache__/keyframe_extractor.cpython-310.pyc and b/lec2note/vision/__pycache__/keyframe_extractor.cpython-310.pyc differ

requirements.txt CHANGED Viewed

@@ -16,4 +16,6 @@ openai>=1.35.0          # 新 SDK，支持 OpenRouter & httpx 0.28+
 httpx>=0.28,<0.30
 anyio>=3.7,<4.0
 scikit-image==0.25.1
-imagehash==4.3.1

 httpx>=0.28,<0.30
 anyio>=3.7,<4.0
 scikit-image==0.25.1
+imagehash==4.3.1
+tenacity==8.2.3
+streamlit>=1.35