Spaces:

LRU1
/

lec2note

Sleeping

App Files Files Community

LRU1 commited on Sep 8, 2025

Commit

c246438

1 Parent(s): 39f25d4

ensure the picture is corresponding to the subtitles

Browse files

Files changed (22) hide show

app.py +1 -1
lec2note/__pycache__/types.cpython-310.pyc +0 -0
lec2note/ingestion/__pycache__/whisper_runner.cpython-310.pyc +0 -0
lec2note/ingestion/whisper_runner.py +7 -2
lec2note/processing/__pycache__/processor.cpython-310.pyc +0 -0
lec2note/processing/processor.py +17 -15
lec2note/scripts/__pycache__/run_pipeline.cpython-310.pyc +0 -0
lec2note/scripts/run_pipeline.py +1 -1
lec2note/segmentation/__pycache__/chunk_merger.cpython-310.pyc +0 -0
lec2note/segmentation/__pycache__/semantic_segmenter.cpython-310.pyc +0 -0
lec2note/segmentation/__pycache__/sentence_chunker.cpython-310.pyc +0 -0
lec2note/segmentation/__pycache__/visual_merger.cpython-310.pyc +0 -0
lec2note/segmentation/chunk_merger.py +2 -13
lec2note/segmentation/semantic_segmenter.py +15 -19
lec2note/segmentation/sentence_chunker.py +9 -9
lec2note/segmentation/visual_merger.py +26 -22
lec2note/synthesis/__pycache__/assembler.cpython-310.pyc +0 -0
lec2note/synthesis/assembler.py +11 -12
lec2note/types.py +15 -7
lec2note/utils/__pycache__/logging_config.cpython-310.pyc +0 -0
lec2note/vision/__pycache__/frame_extractor.cpython-310.pyc +0 -0
lec2note/vision/frame_extractor.py +0 -1

app.py CHANGED Viewed

@@ -3,7 +3,7 @@ from pathlib import Path
 import tempfile, subprocess, threading, queue
 import textwrap
-st.set_page_config(page_title="Lec2Note – Lecture-to-Notes", layout="wide")
 st.title("📝 Lec2Note – Automatic Lecture Notes Generator")

 import tempfile, subprocess, threading, queue
 import textwrap
+st.set_page_config(page_title="Lec2Note2 – Lecture-to-Notes", layout="wide")
 st.title("📝 Lec2Note – Automatic Lecture Notes Generator")

lec2note/__pycache__/types.cpython-310.pyc CHANGED Viewed

Binary files a/lec2note/__pycache__/types.cpython-310.pyc and b/lec2note/__pycache__/types.cpython-310.pyc differ

lec2note/ingestion/__pycache__/whisper_runner.cpython-310.pyc CHANGED Viewed

Binary files a/lec2note/ingestion/__pycache__/whisper_runner.cpython-310.pyc and b/lec2note/ingestion/__pycache__/whisper_runner.cpython-310.pyc differ

lec2note/ingestion/whisper_runner.py CHANGED Viewed

@@ -29,6 +29,11 @@ class WhisperRunner:  # noqa: D101
         - The function is *blocking* and can be called inside a Prefect task.
         """
         audio_path = Path(audio_fp).expanduser().resolve()
         if not audio_path.exists():
             raise FileNotFoundError(audio_path)
@@ -87,6 +92,6 @@ class WhisperRunner:  # noqa: D101
             }
             for seg in segments
         ]
-        # with open(audio_path.with_suffix(".json"), "w") as f:
-        #     json.dump(results, f, indent=2)
         return results

         - The function is *blocking* and can be called inside a Prefect task.
         """
         audio_path = Path(audio_fp).expanduser().resolve()
+        sub_path=audio_path.with_suffix(".json")
+        if sub_path.exists():
+            logger.info("[Whisper] loading exisisting subtitles.")
+            with open(sub_path, "r") as f:
+                return json.load(f)
         if not audio_path.exists():
             raise FileNotFoundError(audio_path)
             }
             for seg in segments
         ]
+        with open(sub_path, "w") as f:
+            json.dump(results, f, indent=2)
         return results

lec2note/processing/__pycache__/processor.cpython-310.pyc CHANGED Viewed

Binary files a/lec2note/processing/__pycache__/processor.cpython-310.pyc and b/lec2note/processing/__pycache__/processor.cpython-310.pyc differ

lec2note/processing/processor.py CHANGED Viewed

@@ -48,18 +48,19 @@ class Processor:  # noqa: D101
     @classmethod
     @classmethod
-    def _build_messages(cls, synced: Dict[str, Any]) -> List[Dict[str, Any]]:
-        subtitle_text = " ".join(synced["text"])
-        # insert numbered placeholders into subtitles for reference
-        placeholder_subs = subtitle_text
-        for idx, _ in enumerate(synced["images"], start=1):
-            placeholder_subs += f"\n\n[IMG{idx}]"
         # Prompt with explicit mapping guidance
         prompt_text = (
-            "**Role**: You are an expert academic assistant tasked with creating a definitive set of study notes from a lecture.\n\n"
             "**Primary Objective**: Generate a **comprehensive and detailed** note segment in Markdown. Do not omit details or simplify concepts excessively. Your goal is to capture the full context of the lecture segment.\n\n"
             "**Key Instructions**:\n\n"
             "1.  **Capture Emphasized Points**: Pay close attention to the subtitles. Identify and highlight key points that the speaker seems to emphasize, such as repeated phrases, direct statements of importance (e.g., 'the key is...', 'remember that...'), and core definitions.\n\n"
             "2.  **Integrate Visuals (Formulas & Tables)**: You MUST analyze the accompanying images. If an image contains crucial information like **formulas, equations, tables, code snippets, or important diagrams**, you must accurately transcribe it into the Markdown note to support the text. Follow these formats:\n"
@@ -71,27 +72,28 @@ class Processor:  # noqa: D101
             "5.  **Image Mapping**: Stop referencing the images and try to use formulas, tables, code snippets, or important diagrams to describe the images.\n\n"
             "---BEGIN LECTURE MATERIALS---\n"
             f"**Subtitles (placeholders inserted)**:\n{placeholder_subs}"
         )
         parts: List[Dict[str, Any]] = [
             {"type": "text", "text": prompt_text}
         ]
-        for idx, img_fp in enumerate(synced["images"][:10], start=1):  # Limit to 6 images
             parts.append({
                 "type": "image_url",
                 "image_url": {
-                    "url": cls._img_to_data_uri(Path(img_fp)),
-                    "detail": f"IMG{idx}",  # label matches placeholder
                 },
             })
         return [{"role": "user", "content": parts}]
     @classmethod
-    def generate_note(cls, chunk: FinalChunk, subtitles: List[Dict]) -> NoteChunk:
         """Generate a single NoteChunk from FinalChunk data."""
         # collect text for this chunk
-        texts = [s["text"] for s in subtitles if chunk.start <= s["start"] < chunk.end]
-        synced = {"text": texts, "images": chunk.images}
-        messages = cls._build_messages(synced)
         note = cls._call_llm(messages)
-        return NoteChunk(note=note, images=chunk.images)

     @classmethod
     @classmethod
+    def _build_messages(cls, chunk: FinalChunk) -> List[Dict[str, Any]]:
+        placeholder_subs="-"*10+"\n\n"
+        for idx, visual_chunk in enumerate(chunk.visual_chunks, start=1):
+            placeholder_subs+=visual_chunk.subtitles
+            placeholder_subs+=f"[IMG{idx}](img corresponding to the subtitles)\n\n"
+            placeholder_subs+=("-"*10+"\n\n")
         # Prompt with explicit mapping guidance
         prompt_text = (
+            "**Role**: You are an expert academic assistant tasked with taking notes from a part of a lecture which will be ultimately merged into a comprehensive and detailed note.\n\n"
             "**Primary Objective**: Generate a **comprehensive and detailed** note segment in Markdown. Do not omit details or simplify concepts excessively. Your goal is to capture the full context of the lecture segment.\n\n"
+            "**Return Format**: Only return the **note** in Markdown format.\n\n"
             "**Key Instructions**:\n\n"
             "1.  **Capture Emphasized Points**: Pay close attention to the subtitles. Identify and highlight key points that the speaker seems to emphasize, such as repeated phrases, direct statements of importance (e.g., 'the key is...', 'remember that...'), and core definitions.\n\n"
             "2.  **Integrate Visuals (Formulas & Tables)**: You MUST analyze the accompanying images. If an image contains crucial information like **formulas, equations, tables, code snippets, or important diagrams**, you must accurately transcribe it into the Markdown note to support the text. Follow these formats:\n"
             "5.  **Image Mapping**: Stop referencing the images and try to use formulas, tables, code snippets, or important diagrams to describe the images.\n\n"
             "---BEGIN LECTURE MATERIALS---\n"
             f"**Subtitles (placeholders inserted)**:\n{placeholder_subs}"
+            "---END LECTURE MATERIALS---\n"
         )
         parts: List[Dict[str, Any]] = [
             {"type": "text", "text": prompt_text}
         ]
+        for idx,visual_chunk in enumerate(chunk.visual_chunks, start=1):  # Limit to 6 images
             parts.append({
                 "type": "image_url",
                 "image_url": {
+                    "url": cls._img_to_data_uri(visual_chunk.image_path),
+                    "detail": f"IMG{idx}",
                 },
             })
         return [{"role": "user", "content": parts}]
     @classmethod
+    def generate_note(cls, chunk: FinalChunk) -> NoteChunk:
         """Generate a single NoteChunk from FinalChunk data."""
         # collect text for this chunk
+        messages = cls._build_messages(chunk)
         note = cls._call_llm(messages)
+        images=[visual_chunk.image_path for visual_chunk in chunk.visual_chunks]
+        return NoteChunk(note=note, images=images)

lec2note/scripts/__pycache__/run_pipeline.cpython-310.pyc CHANGED Viewed

Binary files a/lec2note/scripts/__pycache__/run_pipeline.cpython-310.pyc and b/lec2note/scripts/__pycache__/run_pipeline.cpython-310.pyc differ

lec2note/scripts/run_pipeline.py CHANGED Viewed

@@ -43,7 +43,7 @@ def main():  # noqa: D401
         total=len(final_chunks), desc="Generating notes", unit="chunk"
     ) as pbar:
         future_map = {
-            pool.submit(Processor.generate_note, fc, subtitles): fc
             for fc in final_chunks
         }
         for fut in as_completed(future_map):

         total=len(final_chunks), desc="Generating notes", unit="chunk"
     ) as pbar:
         future_map = {
+            pool.submit(Processor.generate_note, fc): fc
             for fc in final_chunks
         }
         for fut in as_completed(future_map):

lec2note/segmentation/__pycache__/chunk_merger.cpython-310.pyc CHANGED Viewed

Binary files a/lec2note/segmentation/__pycache__/chunk_merger.cpython-310.pyc and b/lec2note/segmentation/__pycache__/chunk_merger.cpython-310.pyc differ

lec2note/segmentation/__pycache__/semantic_segmenter.cpython-310.pyc CHANGED Viewed

Binary files a/lec2note/segmentation/__pycache__/semantic_segmenter.cpython-310.pyc and b/lec2note/segmentation/__pycache__/semantic_segmenter.cpython-310.pyc differ

lec2note/segmentation/__pycache__/sentence_chunker.cpython-310.pyc CHANGED Viewed

Binary files a/lec2note/segmentation/__pycache__/sentence_chunker.cpython-310.pyc and b/lec2note/segmentation/__pycache__/sentence_chunker.cpython-310.pyc differ

lec2note/segmentation/__pycache__/visual_merger.cpython-310.pyc CHANGED Viewed

Binary files a/lec2note/segmentation/__pycache__/visual_merger.cpython-310.pyc and b/lec2note/segmentation/__pycache__/visual_merger.cpython-310.pyc differ

lec2note/segmentation/chunk_merger.py CHANGED Viewed

@@ -50,19 +50,8 @@ class ChunkMerger:  # noqa: D101
         visual_chunks = VisualMerger.merge(micro_chunks)
         # 3. semantic merge – refine by subtitle semantics
-        topic_chunks_dict = SemanticSegmenter.refine(visual_chunks, subtitles)
-        # 4. map micro to topic & sample images
-        final_chunks: List[FinalChunk] = []
-        for ch in topic_chunks_dict:
-            imgs = [
-                mc["keyframe_path"]
-                for mc in micro_chunks
-                if ch["start"] <= mc["start"] < ch["end"] and mc.get("keyframe_path") and Path(mc["keyframe_path"]).exists()
-            ]
-            imgs_sampled = ImageSampler.sample(imgs, max_n=6)
-            fc = FinalChunk(start=ch["start"], end=ch["end"], images=[Path(p) for p in imgs_sampled])
-            final_chunks.append(fc)
         logger.info("[ChunkMerger] produced %d final topic chunks", len(final_chunks))
         return final_chunks

         visual_chunks = VisualMerger.merge(micro_chunks)
         # 3. semantic merge – refine by subtitle semantics
+        final_chunks = SemanticSegmenter.refine(visual_chunks)
         logger.info("[ChunkMerger] produced %d final topic chunks", len(final_chunks))
         return final_chunks

lec2note/segmentation/semantic_segmenter.py CHANGED Viewed

@@ -7,7 +7,7 @@ import logging
 from typing import List, Dict
 from sentence_transformers import SentenceTransformer, util  # type: ignore
 logger = logging.getLogger(__name__)
 __all__ = ["SemanticSegmenter"]
@@ -17,35 +17,31 @@ class SemanticSegmenter:  # noqa: D101
     _model = SentenceTransformer("paraphrase-multilingual-MiniLM-L12-v2")
     @classmethod
-    def refine(cls, slide_chunks: List[Dict], subtitles: List[Dict]) -> List[Dict]:
         """Split long chunks or merge short ones by semantic change."""
-        if not slide_chunks:
-            logger.warning("[SemanticSegmenter] empty slide_chunks input")
             return []
         # Build text per chunk
         chunk_texts: List[str] = []
-        for ch in slide_chunks:
-            txt = []
-            for s in subtitles:
-                if ch["start"] <= s["start"] < ch["end"]:
-                    txt.append(s["text"])
-            chunk_texts.append(" ".join(txt))
         embeddings = cls._model.encode(chunk_texts, convert_to_tensor=True)
-        refined: List[Dict] = []
-        buffer = slide_chunks[0].copy()
         buf_emb = embeddings[0]
-        for i in range(1, len(slide_chunks)):
             sim = float(util.cos_sim(buf_emb, embeddings[i]))
             # print("semantic sim:",sim)
             if sim > 0.55:  # too similar => merge
-                buffer["end"] = slide_chunks[i]["end"]
             else:
-                refined.append(buffer)
-                buffer = slide_chunks[i].copy()
-                buf_emb = embeddings[i]
-        refined.append(buffer)
-        logger.info("[SemanticSegmenter] refined %d→%d chunks", len(slide_chunks), len(refined))
         return refined

 from typing import List, Dict
 from sentence_transformers import SentenceTransformer, util  # type: ignore
+from lec2note.types import VisualChunk,FinalChunk
 logger = logging.getLogger(__name__)
 __all__ = ["SemanticSegmenter"]
     _model = SentenceTransformer("paraphrase-multilingual-MiniLM-L12-v2")
     @classmethod
+    def refine(cls, visual_chunks: List[VisualChunk],) -> List[FinalChunk]:
         """Split long chunks or merge short ones by semantic change."""
+        if not visual_chunks:
+            logger.warning("[SemanticSegmenter] empty visual_chunks input")
             return []
         # Build text per chunk
         chunk_texts: List[str] = []
+        for ch in visual_chunks:
+            chunk_texts.append(ch.subtitles)
         embeddings = cls._model.encode(chunk_texts, convert_to_tensor=True)
+        refined: List[FinalChunk] = []
         buf_emb = embeddings[0]
+        buf_visual_chunks=FinalChunk(visual_chunks=[visual_chunks[0]])
+        for i in range(1, len(visual_chunks)):
             sim = float(util.cos_sim(buf_emb, embeddings[i]))
             # print("semantic sim:",sim)
             if sim > 0.55:  # too similar => merge
+                buf_visual_chunks.visual_chunks.append(visual_chunks[i])
             else:
+                refined.append(buf_visual_chunks)
+                buf_visual_chunks=FinalChunk(visual_chunks=[visual_chunks[i]])
+            buf_emb = embeddings[i]
+        refined.append(buf_visual_chunks)
+        logger.info("[SemanticSegmenter] refined %d→%d chunks", len(visual_chunks), len(refined))
         return refined

lec2note/segmentation/sentence_chunker.py CHANGED Viewed

@@ -21,7 +21,7 @@ from pathlib import Path
 from typing import List, Dict
 from lec2note.vision.frame_extractor import FrameExtractor
 __all__ = ["SentenceChunker"]
 logger = logging.getLogger(__name__)
@@ -35,7 +35,7 @@ class SentenceChunker:  # noqa: D101
         video_fp: str | Path,
         *,
         output_dir: str | Path | None = None,
-    ) -> List[Dict]:
         """Generate micro-chunks aligned with subtitle sentences.
         Parameters
@@ -52,7 +52,7 @@ class SentenceChunker:  # noqa: D101
         if not video_path.exists():
             raise FileNotFoundError(video_path)
-        micro_chunks: List[Dict] = []
         timestamps: List[float] = [s["end"] for s in subtitles]
         keyframe_paths = FrameExtractor.capture_at(video_path, timestamps, output_dir=output_dir)
         # ensure same length
@@ -64,12 +64,12 @@ class SentenceChunker:  # noqa: D101
             )
         for idx, sub in enumerate(subtitles):
-            chunk = {
-                "start": sub["start"],
-                "end": sub["end"],
-                "text": sub["text"],
-                "keyframe_path": str(keyframe_paths[idx]) if idx < len(keyframe_paths) else "",
-            }
             micro_chunks.append(chunk)
         logger.info("[SentenceChunker] generated %d micro-chunks", len(micro_chunks))
         return micro_chunks

 from typing import List, Dict
 from lec2note.vision.frame_extractor import FrameExtractor
+from lec2note.types import MicroChunk
 __all__ = ["SentenceChunker"]
 logger = logging.getLogger(__name__)
         video_fp: str | Path,
         *,
         output_dir: str | Path | None = None,
+    ) -> List[MicroChunk]:
         """Generate micro-chunks aligned with subtitle sentences.
         Parameters
         if not video_path.exists():
             raise FileNotFoundError(video_path)
+        micro_chunks: List[MicroChunk] = []
         timestamps: List[float] = [s["end"] for s in subtitles]
         keyframe_paths = FrameExtractor.capture_at(video_path, timestamps, output_dir=output_dir)
         # ensure same length
             )
         for idx, sub in enumerate(subtitles):
+            chunk = MicroChunk(
+                start=sub["start"],
+                end=sub["end"],
+                subtitle=sub["text"],
+                keyframe_path=keyframe_paths[idx] if idx < len(keyframe_paths) else "",
+            )
             micro_chunks.append(chunk)
         logger.info("[SentenceChunker] generated %d micro-chunks", len(micro_chunks))
         return micro_chunks

lec2note/segmentation/visual_merger.py CHANGED Viewed

@@ -23,7 +23,7 @@ for semantic refinement.
 import logging
 from pathlib import Path
 from typing import List, Dict
 from lec2note.vision.image_comparator import ImageComparator
 logger = logging.getLogger(__name__)
@@ -35,44 +35,48 @@ class VisualMerger:  # noqa: D101
     @classmethod
     def merge(
         cls,
-        micro_chunks: List[Dict],
         *,
-        sim_threshold: float = 0.8,
-    ) -> List[Dict]:
         if not micro_chunks:
             return []
-        visual_chunks: List[Dict] = []
-        buffer = micro_chunks[0].copy()  # includes keyframe_path
         for mc in micro_chunks[1:]:
             # compare buffer keyframe (last sentence in current block) with mc keyframe
             try:
-                sim = ImageComparator.get_similarity(buffer["keyframe_path"], mc["keyframe_path"])
             except Exception as exc:  # noqa: BLE001
                 logger.warning("[VisualMerger] similarity calc failed: %s", exc)
                 sim = 0.0  # force split
             # print("visual sim:",sim)
             if sim >= sim_threshold:
                 # merge: extend end and replace keyframe/path to current (last)
-                buffer["end"] = mc["end"]
-                buffer["keyframe_path"] = mc["keyframe_path"]
             else:
-                visual_chunks.append({
-                    "start": buffer["start"],
-                    "end": buffer["end"],
-                    "keyframe_path": buffer["keyframe_path"],
-                })
-                buffer = mc.copy()
-        visual_chunks.append({
-            "start": buffer["start"],
-            "end": buffer["end"],
-            "keyframe_path": buffer["keyframe_path"],
-        })
         # Optional: remove micro keyframes that are not kept
-        kept = {vc["keyframe_path"] for vc in visual_chunks}
         for mc in micro_chunks:
-            kp = mc.get("keyframe_path", "")
             if kp and kp not in kept:
                 try:
                     Path(kp).unlink(missing_ok=True)

 import logging
 from pathlib import Path
 from typing import List, Dict
+from lec2note.types import VisualChunk,MicroChunk
 from lec2note.vision.image_comparator import ImageComparator
 logger = logging.getLogger(__name__)
     @classmethod
     def merge(
         cls,
+        micro_chunks: List[MicroChunk],
         *,
+        sim_threshold: float = 0.75,
+    ) -> List[VisualChunk]:
         if not micro_chunks:
             return []
+        visual_chunks: List[VisualChunk] = []
+        buffer:VisualChunk=VisualChunk(
+            start=micro_chunks[0].start,
+            end=micro_chunks[0].end,
+            image_path=micro_chunks[0].keyframe_path,
+            subtitles=micro_chunks[0].subtitle,
+        )
         for mc in micro_chunks[1:]:
             # compare buffer keyframe (last sentence in current block) with mc keyframe
             try:
+                sim = ImageComparator.get_similarity(buffer.image_path, mc.keyframe_path)
             except Exception as exc:  # noqa: BLE001
                 logger.warning("[VisualMerger] similarity calc failed: %s", exc)
                 sim = 0.0  # force split
             # print("visual sim:",sim)
             if sim >= sim_threshold:
                 # merge: extend end and replace keyframe/path to current (last)
+                buffer.end = mc.end
+                buffer.image_path = mc.keyframe_path
+                buffer.subtitles=buffer.subtitles+"\n\n"+mc.subtitle
             else:
+                visual_chunks.append(buffer)
+                buffer = VisualChunk(
+                    start=mc.start,
+                    end=mc.end,
+                    image_path=mc.keyframe_path,
+                    subtitles=mc.subtitle,
+                )
+        if visual_chunks[-1].end!= buffer.end:
+            visual_chunks.append(buffer)
         # Optional: remove micro keyframes that are not kept
+        kept = {vc.image_path for vc in visual_chunks}
         for mc in micro_chunks:
+            kp = mc.keyframe_path
             if kp and kp not in kept:
                 try:
                     Path(kp).unlink(missing_ok=True)

lec2note/synthesis/__pycache__/assembler.cpython-310.pyc CHANGED Viewed

Binary files a/lec2note/synthesis/__pycache__/assembler.cpython-310.pyc and b/lec2note/synthesis/__pycache__/assembler.cpython-310.pyc differ

lec2note/synthesis/assembler.py CHANGED Viewed

@@ -23,10 +23,10 @@ class Assembler:  # noqa: D101
     @staticmethod
     def merge(chunks: List[NoteChunk]) -> str:
         """Concatenate note chunks and wrap with template."""
-        body_parts = []
-        for c in chunks:
-            body_parts.append(c.note)
-        raw_md = "\n\n".join(body_parts)
         logger.info("[Assembler] merging %d note chunks", len(chunks))
         logger.info("[Assembler] polishing with LLM…")
@@ -40,19 +40,18 @@ class Assembler:  # noqa: D101
             )
             response = client.chat.completions.create(
                             model=os.getenv("OPENAI_MODEL", "gpt-4o-mini"),
-                            temperature=0.3,
                             messages=[
                                 {
                                     "role": "user",
                                     "content": (
-                                        "You are an expert academic editor and content synthesizer. Your task is to transform a collection of fragmented and repetitive lecture notes into a single, coherent, and logically structured study guide.\n\n"
-                                        "**Context:** These notes were generated by summarizing different segments of a single video lecture. As a result, they are not chronologically ordered and contain significant overlap and redundancy.\n\n"
-                                        "**Primary Goal:** Create a comprehensive, well-organized, and de-duplicated final document from the provided fragments.\n\n"
                                         "**Key Instructions:**\n"
-                                        "1.  **De-duplicate and Consolidate:** Identify all repetitive definitions and explanations. Merge them into a single, comprehensive section for each core concept. For instance, fundamental terms like 'State vs. Observation', 'Policy', and the notation aside (s_t vs x_t) are likely defined multiple times; these must be consolidated.\n"
-                                        "2.  **Reorganize and Structure:** Do NOT preserve the original order. Instead, create a new, logical structure for the entire document. Use clear headings and subheadings (e.g., using Markdown's #, ##, ###) to build a clear narrative, starting from fundamental definitions and progressing to more complex topics.\n"
-                                        "3.  **Synthesize and Enhance:** Where different fragments explain the same concept with slightly different examples or details (e.g., one note uses a 'cheetah' example, another uses a 'robot'), synthesize these details to create a richer, more complete explanation under a single heading.\n"
-                                        "4.  **Polish and Format:** Ensure the final text is grammatically correct, flows naturally, and uses consistent, clean Markdown formatting (e.g., for tables, code blocks, and mathematical notation).\n\n"
                                         "**Constraint:** Ensure all unique concepts and key details from the original notes are preserved in the final document. The goal is to lose redundancy, not information.\n\n"
                                         "Here are the fragmented notes to process:\n\n"
                                         f"{raw_md}"

     @staticmethod
     def merge(chunks: List[NoteChunk]) -> str:
         """Concatenate note chunks and wrap with template."""
+        raw_md=""
+        for idx, c in enumerate(chunks):
+            raw_md += f"\n\n-----SEGEMENT{idx}-----\n\n"
+            raw_md += c.note
         logger.info("[Assembler] merging %d note chunks", len(chunks))
         logger.info("[Assembler] polishing with LLM…")
             )
             response = client.chat.completions.create(
                             model=os.getenv("OPENAI_MODEL", "gpt-4o-mini"),
+                            temperature=0.1,
                             messages=[
                                 {
                                     "role": "user",
                                     "content": (
+                                        "You are an expert academic editor and content synthesizer. Your task is to transform a collection of fragmented lecture notes into a single, coherent, detailed and logically structured study note.\n\n"
+                                        "**Context:** These notes were generated by summarizing different segments of a single video lecture. As a result, they are chronologically ordered but probably contain significant overlap and redundancy.\n\n"
+                                        "**Primary Goal:** Create a comprehensive and well-organized final document from the provided segements.\n\n"
                                         "**Key Instructions:**\n"
+                                        "1.  **De-duplicate and Consolidate:** Identify all repetitive definitions and explanations. Merge them into a single, comprehensive section for each core concept. \n"
+                                        "2.  **Synthesize and Enhance:** Where different fragments explain the same concept with slightly different examples or details (e.g., one note uses a 'cheetah' example, another uses a 'robot'), synthesize these details to create a richer, more complete explanation under a single heading.\n"
+                                        "3.  **Polish and Format:** Ensure the final text is grammatically correct, flows naturally, and uses consistent, clean Markdown formatting (e.g., for tables, code blocks, and mathematical notation).\n\n"
                                         "**Constraint:** Ensure all unique concepts and key details from the original notes are preserved in the final document. The goal is to lose redundancy, not information.\n\n"
                                         "Here are the fragmented notes to process:\n\n"
                                         f"{raw_md}"

lec2note/types.py CHANGED Viewed

@@ -7,7 +7,7 @@ from pathlib import Path
 from typing import List, Dict, Any
 __all__ = [
-    "SlideChunk",
     "FinalChunk",
     "NoteChunk",
     "Chunk",
@@ -15,15 +15,23 @@ __all__ = [
 @dataclass
-class SlideChunk:  # noqa: D101
-    start: float  # seconds
     end: float
 @dataclass
-class FinalChunk(SlideChunk):  # noqa: D101
-    images: List[Path] = field(default_factory=list)
-    subtitles: List[int] = field(default_factory=list)  # indices in subtitles list
 @dataclass

 from typing import List, Dict, Any
 __all__ = [
+    "VisualChunk",
     "FinalChunk",
     "NoteChunk",
     "Chunk",
 @dataclass
+class MicroChunk:  # noqa: D101
+    start: float
     end: float
+    subtitle: str
+    keyframe_path: Path=field(default=None)
 @dataclass
+class VisualChunk:  # noqa: D101
+    start: float
+    end: float
+    subtitles: str
+    image_path: Path=field(default=None)
+@dataclass
+class FinalChunk:  # noqa: D101
+    visual_chunks: List[VisualChunk]
 @dataclass

lec2note/utils/__pycache__/logging_config.cpython-310.pyc CHANGED Viewed

Binary files a/lec2note/utils/__pycache__/logging_config.cpython-310.pyc and b/lec2note/utils/__pycache__/logging_config.cpython-310.pyc differ

lec2note/vision/__pycache__/frame_extractor.cpython-310.pyc CHANGED Viewed

Binary files a/lec2note/vision/__pycache__/frame_extractor.cpython-310.pyc and b/lec2note/vision/__pycache__/frame_extractor.cpython-310.pyc differ

lec2note/vision/frame_extractor.py CHANGED Viewed

@@ -67,7 +67,6 @@ class FrameExtractor:  # noqa: D101
             cv2.imwrite(str(out_fp), frame)
             saved.append(out_fp)
         cap.release()
-        logger.info("[FrameExtractor] captured %d frames", len(saved))
         return saved

             cv2.imwrite(str(out_fp), frame)
             saved.append(out_fp)
         cap.release()
         return saved