ensure the picture is corresponding to the subtitles
Browse files- app.py +1 -1
- lec2note/__pycache__/types.cpython-310.pyc +0 -0
- lec2note/ingestion/__pycache__/whisper_runner.cpython-310.pyc +0 -0
- lec2note/ingestion/whisper_runner.py +7 -2
- lec2note/processing/__pycache__/processor.cpython-310.pyc +0 -0
- lec2note/processing/processor.py +17 -15
- lec2note/scripts/__pycache__/run_pipeline.cpython-310.pyc +0 -0
- lec2note/scripts/run_pipeline.py +1 -1
- lec2note/segmentation/__pycache__/chunk_merger.cpython-310.pyc +0 -0
- lec2note/segmentation/__pycache__/semantic_segmenter.cpython-310.pyc +0 -0
- lec2note/segmentation/__pycache__/sentence_chunker.cpython-310.pyc +0 -0
- lec2note/segmentation/__pycache__/visual_merger.cpython-310.pyc +0 -0
- lec2note/segmentation/chunk_merger.py +2 -13
- lec2note/segmentation/semantic_segmenter.py +15 -19
- lec2note/segmentation/sentence_chunker.py +9 -9
- lec2note/segmentation/visual_merger.py +26 -22
- lec2note/synthesis/__pycache__/assembler.cpython-310.pyc +0 -0
- lec2note/synthesis/assembler.py +11 -12
- lec2note/types.py +15 -7
- lec2note/utils/__pycache__/logging_config.cpython-310.pyc +0 -0
- lec2note/vision/__pycache__/frame_extractor.cpython-310.pyc +0 -0
- lec2note/vision/frame_extractor.py +0 -1
app.py
CHANGED
|
@@ -3,7 +3,7 @@ from pathlib import Path
|
|
| 3 |
import tempfile, subprocess, threading, queue
|
| 4 |
import textwrap
|
| 5 |
|
| 6 |
-
st.set_page_config(page_title="
|
| 7 |
|
| 8 |
st.title("📝 Lec2Note – Automatic Lecture Notes Generator")
|
| 9 |
|
|
|
|
| 3 |
import tempfile, subprocess, threading, queue
|
| 4 |
import textwrap
|
| 5 |
|
| 6 |
+
st.set_page_config(page_title="Lec2Note2 – Lecture-to-Notes", layout="wide")
|
| 7 |
|
| 8 |
st.title("📝 Lec2Note – Automatic Lecture Notes Generator")
|
| 9 |
|
lec2note/__pycache__/types.cpython-310.pyc
CHANGED
|
Binary files a/lec2note/__pycache__/types.cpython-310.pyc and b/lec2note/__pycache__/types.cpython-310.pyc differ
|
|
|
lec2note/ingestion/__pycache__/whisper_runner.cpython-310.pyc
CHANGED
|
Binary files a/lec2note/ingestion/__pycache__/whisper_runner.cpython-310.pyc and b/lec2note/ingestion/__pycache__/whisper_runner.cpython-310.pyc differ
|
|
|
lec2note/ingestion/whisper_runner.py
CHANGED
|
@@ -29,6 +29,11 @@ class WhisperRunner: # noqa: D101
|
|
| 29 |
- The function is *blocking* and can be called inside a Prefect task.
|
| 30 |
"""
|
| 31 |
audio_path = Path(audio_fp).expanduser().resolve()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
if not audio_path.exists():
|
| 33 |
raise FileNotFoundError(audio_path)
|
| 34 |
|
|
@@ -87,6 +92,6 @@ class WhisperRunner: # noqa: D101
|
|
| 87 |
}
|
| 88 |
for seg in segments
|
| 89 |
]
|
| 90 |
-
|
| 91 |
-
|
| 92 |
return results
|
|
|
|
| 29 |
- The function is *blocking* and can be called inside a Prefect task.
|
| 30 |
"""
|
| 31 |
audio_path = Path(audio_fp).expanduser().resolve()
|
| 32 |
+
sub_path=audio_path.with_suffix(".json")
|
| 33 |
+
if sub_path.exists():
|
| 34 |
+
logger.info("[Whisper] loading exisisting subtitles.")
|
| 35 |
+
with open(sub_path, "r") as f:
|
| 36 |
+
return json.load(f)
|
| 37 |
if not audio_path.exists():
|
| 38 |
raise FileNotFoundError(audio_path)
|
| 39 |
|
|
|
|
| 92 |
}
|
| 93 |
for seg in segments
|
| 94 |
]
|
| 95 |
+
with open(sub_path, "w") as f:
|
| 96 |
+
json.dump(results, f, indent=2)
|
| 97 |
return results
|
lec2note/processing/__pycache__/processor.cpython-310.pyc
CHANGED
|
Binary files a/lec2note/processing/__pycache__/processor.cpython-310.pyc and b/lec2note/processing/__pycache__/processor.cpython-310.pyc differ
|
|
|
lec2note/processing/processor.py
CHANGED
|
@@ -48,18 +48,19 @@ class Processor: # noqa: D101
|
|
| 48 |
|
| 49 |
@classmethod
|
| 50 |
@classmethod
|
| 51 |
-
def _build_messages(cls,
|
| 52 |
-
subtitle_text = " ".join(synced["text"])
|
| 53 |
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
placeholder_subs
|
|
|
|
| 58 |
|
| 59 |
# Prompt with explicit mapping guidance
|
| 60 |
prompt_text = (
|
| 61 |
-
"**Role**: You are an expert academic assistant tasked with
|
| 62 |
"**Primary Objective**: Generate a **comprehensive and detailed** note segment in Markdown. Do not omit details or simplify concepts excessively. Your goal is to capture the full context of the lecture segment.\n\n"
|
|
|
|
| 63 |
"**Key Instructions**:\n\n"
|
| 64 |
"1. **Capture Emphasized Points**: Pay close attention to the subtitles. Identify and highlight key points that the speaker seems to emphasize, such as repeated phrases, direct statements of importance (e.g., 'the key is...', 'remember that...'), and core definitions.\n\n"
|
| 65 |
"2. **Integrate Visuals (Formulas & Tables)**: You MUST analyze the accompanying images. If an image contains crucial information like **formulas, equations, tables, code snippets, or important diagrams**, you must accurately transcribe it into the Markdown note to support the text. Follow these formats:\n"
|
|
@@ -71,27 +72,28 @@ class Processor: # noqa: D101
|
|
| 71 |
"5. **Image Mapping**: Stop referencing the images and try to use formulas, tables, code snippets, or important diagrams to describe the images.\n\n"
|
| 72 |
"---BEGIN LECTURE MATERIALS---\n"
|
| 73 |
f"**Subtitles (placeholders inserted)**:\n{placeholder_subs}"
|
|
|
|
| 74 |
)
|
| 75 |
|
| 76 |
parts: List[Dict[str, Any]] = [
|
| 77 |
{"type": "text", "text": prompt_text}
|
| 78 |
]
|
| 79 |
-
for idx,
|
| 80 |
parts.append({
|
| 81 |
"type": "image_url",
|
| 82 |
"image_url": {
|
| 83 |
-
"url": cls._img_to_data_uri(
|
| 84 |
-
"detail": f"IMG{idx}",
|
| 85 |
},
|
| 86 |
})
|
|
|
|
| 87 |
return [{"role": "user", "content": parts}]
|
| 88 |
|
| 89 |
@classmethod
|
| 90 |
-
def generate_note(cls, chunk: FinalChunk
|
| 91 |
"""Generate a single NoteChunk from FinalChunk data."""
|
| 92 |
# collect text for this chunk
|
| 93 |
-
|
| 94 |
-
synced = {"text": texts, "images": chunk.images}
|
| 95 |
-
messages = cls._build_messages(synced)
|
| 96 |
note = cls._call_llm(messages)
|
| 97 |
-
|
|
|
|
|
|
| 48 |
|
| 49 |
@classmethod
|
| 50 |
@classmethod
|
| 51 |
+
def _build_messages(cls, chunk: FinalChunk) -> List[Dict[str, Any]]:
|
|
|
|
| 52 |
|
| 53 |
+
placeholder_subs="-"*10+"\n\n"
|
| 54 |
+
for idx, visual_chunk in enumerate(chunk.visual_chunks, start=1):
|
| 55 |
+
placeholder_subs+=visual_chunk.subtitles
|
| 56 |
+
placeholder_subs+=f"[IMG{idx}](img corresponding to the subtitles)\n\n"
|
| 57 |
+
placeholder_subs+=("-"*10+"\n\n")
|
| 58 |
|
| 59 |
# Prompt with explicit mapping guidance
|
| 60 |
prompt_text = (
|
| 61 |
+
"**Role**: You are an expert academic assistant tasked with taking notes from a part of a lecture which will be ultimately merged into a comprehensive and detailed note.\n\n"
|
| 62 |
"**Primary Objective**: Generate a **comprehensive and detailed** note segment in Markdown. Do not omit details or simplify concepts excessively. Your goal is to capture the full context of the lecture segment.\n\n"
|
| 63 |
+
"**Return Format**: Only return the **note** in Markdown format.\n\n"
|
| 64 |
"**Key Instructions**:\n\n"
|
| 65 |
"1. **Capture Emphasized Points**: Pay close attention to the subtitles. Identify and highlight key points that the speaker seems to emphasize, such as repeated phrases, direct statements of importance (e.g., 'the key is...', 'remember that...'), and core definitions.\n\n"
|
| 66 |
"2. **Integrate Visuals (Formulas & Tables)**: You MUST analyze the accompanying images. If an image contains crucial information like **formulas, equations, tables, code snippets, or important diagrams**, you must accurately transcribe it into the Markdown note to support the text. Follow these formats:\n"
|
|
|
|
| 72 |
"5. **Image Mapping**: Stop referencing the images and try to use formulas, tables, code snippets, or important diagrams to describe the images.\n\n"
|
| 73 |
"---BEGIN LECTURE MATERIALS---\n"
|
| 74 |
f"**Subtitles (placeholders inserted)**:\n{placeholder_subs}"
|
| 75 |
+
"---END LECTURE MATERIALS---\n"
|
| 76 |
)
|
| 77 |
|
| 78 |
parts: List[Dict[str, Any]] = [
|
| 79 |
{"type": "text", "text": prompt_text}
|
| 80 |
]
|
| 81 |
+
for idx,visual_chunk in enumerate(chunk.visual_chunks, start=1): # Limit to 6 images
|
| 82 |
parts.append({
|
| 83 |
"type": "image_url",
|
| 84 |
"image_url": {
|
| 85 |
+
"url": cls._img_to_data_uri(visual_chunk.image_path),
|
| 86 |
+
"detail": f"IMG{idx}",
|
| 87 |
},
|
| 88 |
})
|
| 89 |
+
|
| 90 |
return [{"role": "user", "content": parts}]
|
| 91 |
|
| 92 |
@classmethod
|
| 93 |
+
def generate_note(cls, chunk: FinalChunk) -> NoteChunk:
|
| 94 |
"""Generate a single NoteChunk from FinalChunk data."""
|
| 95 |
# collect text for this chunk
|
| 96 |
+
messages = cls._build_messages(chunk)
|
|
|
|
|
|
|
| 97 |
note = cls._call_llm(messages)
|
| 98 |
+
images=[visual_chunk.image_path for visual_chunk in chunk.visual_chunks]
|
| 99 |
+
return NoteChunk(note=note, images=images)
|
lec2note/scripts/__pycache__/run_pipeline.cpython-310.pyc
CHANGED
|
Binary files a/lec2note/scripts/__pycache__/run_pipeline.cpython-310.pyc and b/lec2note/scripts/__pycache__/run_pipeline.cpython-310.pyc differ
|
|
|
lec2note/scripts/run_pipeline.py
CHANGED
|
@@ -43,7 +43,7 @@ def main(): # noqa: D401
|
|
| 43 |
total=len(final_chunks), desc="Generating notes", unit="chunk"
|
| 44 |
) as pbar:
|
| 45 |
future_map = {
|
| 46 |
-
pool.submit(Processor.generate_note, fc
|
| 47 |
for fc in final_chunks
|
| 48 |
}
|
| 49 |
for fut in as_completed(future_map):
|
|
|
|
| 43 |
total=len(final_chunks), desc="Generating notes", unit="chunk"
|
| 44 |
) as pbar:
|
| 45 |
future_map = {
|
| 46 |
+
pool.submit(Processor.generate_note, fc): fc
|
| 47 |
for fc in final_chunks
|
| 48 |
}
|
| 49 |
for fut in as_completed(future_map):
|
lec2note/segmentation/__pycache__/chunk_merger.cpython-310.pyc
CHANGED
|
Binary files a/lec2note/segmentation/__pycache__/chunk_merger.cpython-310.pyc and b/lec2note/segmentation/__pycache__/chunk_merger.cpython-310.pyc differ
|
|
|
lec2note/segmentation/__pycache__/semantic_segmenter.cpython-310.pyc
CHANGED
|
Binary files a/lec2note/segmentation/__pycache__/semantic_segmenter.cpython-310.pyc and b/lec2note/segmentation/__pycache__/semantic_segmenter.cpython-310.pyc differ
|
|
|
lec2note/segmentation/__pycache__/sentence_chunker.cpython-310.pyc
CHANGED
|
Binary files a/lec2note/segmentation/__pycache__/sentence_chunker.cpython-310.pyc and b/lec2note/segmentation/__pycache__/sentence_chunker.cpython-310.pyc differ
|
|
|
lec2note/segmentation/__pycache__/visual_merger.cpython-310.pyc
CHANGED
|
Binary files a/lec2note/segmentation/__pycache__/visual_merger.cpython-310.pyc and b/lec2note/segmentation/__pycache__/visual_merger.cpython-310.pyc differ
|
|
|
lec2note/segmentation/chunk_merger.py
CHANGED
|
@@ -50,19 +50,8 @@ class ChunkMerger: # noqa: D101
|
|
| 50 |
visual_chunks = VisualMerger.merge(micro_chunks)
|
| 51 |
|
| 52 |
# 3. semantic merge – refine by subtitle semantics
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
# 4. map micro to topic & sample images
|
| 56 |
-
final_chunks: List[FinalChunk] = []
|
| 57 |
-
for ch in topic_chunks_dict:
|
| 58 |
-
imgs = [
|
| 59 |
-
mc["keyframe_path"]
|
| 60 |
-
for mc in micro_chunks
|
| 61 |
-
if ch["start"] <= mc["start"] < ch["end"] and mc.get("keyframe_path") and Path(mc["keyframe_path"]).exists()
|
| 62 |
-
]
|
| 63 |
-
imgs_sampled = ImageSampler.sample(imgs, max_n=6)
|
| 64 |
-
fc = FinalChunk(start=ch["start"], end=ch["end"], images=[Path(p) for p in imgs_sampled])
|
| 65 |
-
final_chunks.append(fc)
|
| 66 |
logger.info("[ChunkMerger] produced %d final topic chunks", len(final_chunks))
|
| 67 |
return final_chunks
|
| 68 |
|
|
|
|
| 50 |
visual_chunks = VisualMerger.merge(micro_chunks)
|
| 51 |
|
| 52 |
# 3. semantic merge – refine by subtitle semantics
|
| 53 |
+
final_chunks = SemanticSegmenter.refine(visual_chunks)
|
| 54 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
logger.info("[ChunkMerger] produced %d final topic chunks", len(final_chunks))
|
| 56 |
return final_chunks
|
| 57 |
|
lec2note/segmentation/semantic_segmenter.py
CHANGED
|
@@ -7,7 +7,7 @@ import logging
|
|
| 7 |
from typing import List, Dict
|
| 8 |
|
| 9 |
from sentence_transformers import SentenceTransformer, util # type: ignore
|
| 10 |
-
|
| 11 |
logger = logging.getLogger(__name__)
|
| 12 |
|
| 13 |
__all__ = ["SemanticSegmenter"]
|
|
@@ -17,35 +17,31 @@ class SemanticSegmenter: # noqa: D101
|
|
| 17 |
_model = SentenceTransformer("paraphrase-multilingual-MiniLM-L12-v2")
|
| 18 |
|
| 19 |
@classmethod
|
| 20 |
-
def refine(cls,
|
| 21 |
"""Split long chunks or merge short ones by semantic change."""
|
| 22 |
-
if not
|
| 23 |
-
logger.warning("[SemanticSegmenter] empty
|
| 24 |
return []
|
| 25 |
|
| 26 |
# Build text per chunk
|
| 27 |
chunk_texts: List[str] = []
|
| 28 |
-
for ch in
|
| 29 |
-
|
| 30 |
-
for s in subtitles:
|
| 31 |
-
if ch["start"] <= s["start"] < ch["end"]:
|
| 32 |
-
txt.append(s["text"])
|
| 33 |
-
chunk_texts.append(" ".join(txt))
|
| 34 |
|
| 35 |
embeddings = cls._model.encode(chunk_texts, convert_to_tensor=True)
|
| 36 |
|
| 37 |
-
refined: List[
|
| 38 |
-
buffer = slide_chunks[0].copy()
|
| 39 |
buf_emb = embeddings[0]
|
| 40 |
-
|
|
|
|
| 41 |
sim = float(util.cos_sim(buf_emb, embeddings[i]))
|
| 42 |
# print("semantic sim:",sim)
|
| 43 |
if sim > 0.55: # too similar => merge
|
| 44 |
-
|
| 45 |
else:
|
| 46 |
-
refined.append(
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
refined.append(
|
| 50 |
-
logger.info("[SemanticSegmenter] refined %d→%d chunks", len(
|
| 51 |
return refined
|
|
|
|
| 7 |
from typing import List, Dict
|
| 8 |
|
| 9 |
from sentence_transformers import SentenceTransformer, util # type: ignore
|
| 10 |
+
from lec2note.types import VisualChunk,FinalChunk
|
| 11 |
logger = logging.getLogger(__name__)
|
| 12 |
|
| 13 |
__all__ = ["SemanticSegmenter"]
|
|
|
|
| 17 |
_model = SentenceTransformer("paraphrase-multilingual-MiniLM-L12-v2")
|
| 18 |
|
| 19 |
@classmethod
|
| 20 |
+
def refine(cls, visual_chunks: List[VisualChunk],) -> List[FinalChunk]:
|
| 21 |
"""Split long chunks or merge short ones by semantic change."""
|
| 22 |
+
if not visual_chunks:
|
| 23 |
+
logger.warning("[SemanticSegmenter] empty visual_chunks input")
|
| 24 |
return []
|
| 25 |
|
| 26 |
# Build text per chunk
|
| 27 |
chunk_texts: List[str] = []
|
| 28 |
+
for ch in visual_chunks:
|
| 29 |
+
chunk_texts.append(ch.subtitles)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
|
| 31 |
embeddings = cls._model.encode(chunk_texts, convert_to_tensor=True)
|
| 32 |
|
| 33 |
+
refined: List[FinalChunk] = []
|
|
|
|
| 34 |
buf_emb = embeddings[0]
|
| 35 |
+
buf_visual_chunks=FinalChunk(visual_chunks=[visual_chunks[0]])
|
| 36 |
+
for i in range(1, len(visual_chunks)):
|
| 37 |
sim = float(util.cos_sim(buf_emb, embeddings[i]))
|
| 38 |
# print("semantic sim:",sim)
|
| 39 |
if sim > 0.55: # too similar => merge
|
| 40 |
+
buf_visual_chunks.visual_chunks.append(visual_chunks[i])
|
| 41 |
else:
|
| 42 |
+
refined.append(buf_visual_chunks)
|
| 43 |
+
buf_visual_chunks=FinalChunk(visual_chunks=[visual_chunks[i]])
|
| 44 |
+
buf_emb = embeddings[i]
|
| 45 |
+
refined.append(buf_visual_chunks)
|
| 46 |
+
logger.info("[SemanticSegmenter] refined %d→%d chunks", len(visual_chunks), len(refined))
|
| 47 |
return refined
|
lec2note/segmentation/sentence_chunker.py
CHANGED
|
@@ -21,7 +21,7 @@ from pathlib import Path
|
|
| 21 |
from typing import List, Dict
|
| 22 |
|
| 23 |
from lec2note.vision.frame_extractor import FrameExtractor
|
| 24 |
-
|
| 25 |
__all__ = ["SentenceChunker"]
|
| 26 |
|
| 27 |
logger = logging.getLogger(__name__)
|
|
@@ -35,7 +35,7 @@ class SentenceChunker: # noqa: D101
|
|
| 35 |
video_fp: str | Path,
|
| 36 |
*,
|
| 37 |
output_dir: str | Path | None = None,
|
| 38 |
-
) -> List[
|
| 39 |
"""Generate micro-chunks aligned with subtitle sentences.
|
| 40 |
|
| 41 |
Parameters
|
|
@@ -52,7 +52,7 @@ class SentenceChunker: # noqa: D101
|
|
| 52 |
if not video_path.exists():
|
| 53 |
raise FileNotFoundError(video_path)
|
| 54 |
|
| 55 |
-
micro_chunks: List[
|
| 56 |
timestamps: List[float] = [s["end"] for s in subtitles]
|
| 57 |
keyframe_paths = FrameExtractor.capture_at(video_path, timestamps, output_dir=output_dir)
|
| 58 |
# ensure same length
|
|
@@ -64,12 +64,12 @@ class SentenceChunker: # noqa: D101
|
|
| 64 |
)
|
| 65 |
|
| 66 |
for idx, sub in enumerate(subtitles):
|
| 67 |
-
chunk =
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
micro_chunks.append(chunk)
|
| 74 |
logger.info("[SentenceChunker] generated %d micro-chunks", len(micro_chunks))
|
| 75 |
return micro_chunks
|
|
|
|
| 21 |
from typing import List, Dict
|
| 22 |
|
| 23 |
from lec2note.vision.frame_extractor import FrameExtractor
|
| 24 |
+
from lec2note.types import MicroChunk
|
| 25 |
__all__ = ["SentenceChunker"]
|
| 26 |
|
| 27 |
logger = logging.getLogger(__name__)
|
|
|
|
| 35 |
video_fp: str | Path,
|
| 36 |
*,
|
| 37 |
output_dir: str | Path | None = None,
|
| 38 |
+
) -> List[MicroChunk]:
|
| 39 |
"""Generate micro-chunks aligned with subtitle sentences.
|
| 40 |
|
| 41 |
Parameters
|
|
|
|
| 52 |
if not video_path.exists():
|
| 53 |
raise FileNotFoundError(video_path)
|
| 54 |
|
| 55 |
+
micro_chunks: List[MicroChunk] = []
|
| 56 |
timestamps: List[float] = [s["end"] for s in subtitles]
|
| 57 |
keyframe_paths = FrameExtractor.capture_at(video_path, timestamps, output_dir=output_dir)
|
| 58 |
# ensure same length
|
|
|
|
| 64 |
)
|
| 65 |
|
| 66 |
for idx, sub in enumerate(subtitles):
|
| 67 |
+
chunk = MicroChunk(
|
| 68 |
+
start=sub["start"],
|
| 69 |
+
end=sub["end"],
|
| 70 |
+
subtitle=sub["text"],
|
| 71 |
+
keyframe_path=keyframe_paths[idx] if idx < len(keyframe_paths) else "",
|
| 72 |
+
)
|
| 73 |
micro_chunks.append(chunk)
|
| 74 |
logger.info("[SentenceChunker] generated %d micro-chunks", len(micro_chunks))
|
| 75 |
return micro_chunks
|
lec2note/segmentation/visual_merger.py
CHANGED
|
@@ -23,7 +23,7 @@ for semantic refinement.
|
|
| 23 |
import logging
|
| 24 |
from pathlib import Path
|
| 25 |
from typing import List, Dict
|
| 26 |
-
|
| 27 |
from lec2note.vision.image_comparator import ImageComparator
|
| 28 |
|
| 29 |
logger = logging.getLogger(__name__)
|
|
@@ -35,44 +35,48 @@ class VisualMerger: # noqa: D101
|
|
| 35 |
@classmethod
|
| 36 |
def merge(
|
| 37 |
cls,
|
| 38 |
-
micro_chunks: List[
|
| 39 |
*,
|
| 40 |
-
sim_threshold: float = 0.
|
| 41 |
-
) -> List[
|
| 42 |
if not micro_chunks:
|
| 43 |
return []
|
| 44 |
|
| 45 |
-
visual_chunks: List[
|
| 46 |
-
buffer
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
for mc in micro_chunks[1:]:
|
| 48 |
# compare buffer keyframe (last sentence in current block) with mc keyframe
|
| 49 |
try:
|
| 50 |
-
sim = ImageComparator.get_similarity(buffer
|
| 51 |
except Exception as exc: # noqa: BLE001
|
| 52 |
logger.warning("[VisualMerger] similarity calc failed: %s", exc)
|
| 53 |
sim = 0.0 # force split
|
| 54 |
# print("visual sim:",sim)
|
| 55 |
if sim >= sim_threshold:
|
| 56 |
# merge: extend end and replace keyframe/path to current (last)
|
| 57 |
-
buffer
|
| 58 |
-
buffer
|
|
|
|
| 59 |
else:
|
| 60 |
-
visual_chunks.append(
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
"keyframe_path": buffer["keyframe_path"],
|
| 70 |
-
})
|
| 71 |
|
| 72 |
# Optional: remove micro keyframes that are not kept
|
| 73 |
-
kept = {vc
|
| 74 |
for mc in micro_chunks:
|
| 75 |
-
kp = mc.
|
| 76 |
if kp and kp not in kept:
|
| 77 |
try:
|
| 78 |
Path(kp).unlink(missing_ok=True)
|
|
|
|
| 23 |
import logging
|
| 24 |
from pathlib import Path
|
| 25 |
from typing import List, Dict
|
| 26 |
+
from lec2note.types import VisualChunk,MicroChunk
|
| 27 |
from lec2note.vision.image_comparator import ImageComparator
|
| 28 |
|
| 29 |
logger = logging.getLogger(__name__)
|
|
|
|
| 35 |
@classmethod
|
| 36 |
def merge(
|
| 37 |
cls,
|
| 38 |
+
micro_chunks: List[MicroChunk],
|
| 39 |
*,
|
| 40 |
+
sim_threshold: float = 0.75,
|
| 41 |
+
) -> List[VisualChunk]:
|
| 42 |
if not micro_chunks:
|
| 43 |
return []
|
| 44 |
|
| 45 |
+
visual_chunks: List[VisualChunk] = []
|
| 46 |
+
buffer:VisualChunk=VisualChunk(
|
| 47 |
+
start=micro_chunks[0].start,
|
| 48 |
+
end=micro_chunks[0].end,
|
| 49 |
+
image_path=micro_chunks[0].keyframe_path,
|
| 50 |
+
subtitles=micro_chunks[0].subtitle,
|
| 51 |
+
)
|
| 52 |
for mc in micro_chunks[1:]:
|
| 53 |
# compare buffer keyframe (last sentence in current block) with mc keyframe
|
| 54 |
try:
|
| 55 |
+
sim = ImageComparator.get_similarity(buffer.image_path, mc.keyframe_path)
|
| 56 |
except Exception as exc: # noqa: BLE001
|
| 57 |
logger.warning("[VisualMerger] similarity calc failed: %s", exc)
|
| 58 |
sim = 0.0 # force split
|
| 59 |
# print("visual sim:",sim)
|
| 60 |
if sim >= sim_threshold:
|
| 61 |
# merge: extend end and replace keyframe/path to current (last)
|
| 62 |
+
buffer.end = mc.end
|
| 63 |
+
buffer.image_path = mc.keyframe_path
|
| 64 |
+
buffer.subtitles=buffer.subtitles+"\n\n"+mc.subtitle
|
| 65 |
else:
|
| 66 |
+
visual_chunks.append(buffer)
|
| 67 |
+
buffer = VisualChunk(
|
| 68 |
+
start=mc.start,
|
| 69 |
+
end=mc.end,
|
| 70 |
+
image_path=mc.keyframe_path,
|
| 71 |
+
subtitles=mc.subtitle,
|
| 72 |
+
)
|
| 73 |
+
if visual_chunks[-1].end!= buffer.end:
|
| 74 |
+
visual_chunks.append(buffer)
|
|
|
|
|
|
|
| 75 |
|
| 76 |
# Optional: remove micro keyframes that are not kept
|
| 77 |
+
kept = {vc.image_path for vc in visual_chunks}
|
| 78 |
for mc in micro_chunks:
|
| 79 |
+
kp = mc.keyframe_path
|
| 80 |
if kp and kp not in kept:
|
| 81 |
try:
|
| 82 |
Path(kp).unlink(missing_ok=True)
|
lec2note/synthesis/__pycache__/assembler.cpython-310.pyc
CHANGED
|
Binary files a/lec2note/synthesis/__pycache__/assembler.cpython-310.pyc and b/lec2note/synthesis/__pycache__/assembler.cpython-310.pyc differ
|
|
|
lec2note/synthesis/assembler.py
CHANGED
|
@@ -23,10 +23,10 @@ class Assembler: # noqa: D101
|
|
| 23 |
@staticmethod
|
| 24 |
def merge(chunks: List[NoteChunk]) -> str:
|
| 25 |
"""Concatenate note chunks and wrap with template."""
|
| 26 |
-
|
| 27 |
-
for c in chunks:
|
| 28 |
-
|
| 29 |
-
|
| 30 |
logger.info("[Assembler] merging %d note chunks", len(chunks))
|
| 31 |
|
| 32 |
logger.info("[Assembler] polishing with LLM…")
|
|
@@ -40,19 +40,18 @@ class Assembler: # noqa: D101
|
|
| 40 |
)
|
| 41 |
response = client.chat.completions.create(
|
| 42 |
model=os.getenv("OPENAI_MODEL", "gpt-4o-mini"),
|
| 43 |
-
temperature=0.
|
| 44 |
messages=[
|
| 45 |
{
|
| 46 |
"role": "user",
|
| 47 |
"content": (
|
| 48 |
-
"You are an expert academic editor and content synthesizer. Your task is to transform a collection of fragmented
|
| 49 |
-
"**Context:** These notes were generated by summarizing different segments of a single video lecture. As a result, they are
|
| 50 |
-
"**Primary Goal:** Create a comprehensive
|
| 51 |
"**Key Instructions:**\n"
|
| 52 |
-
"1. **De-duplicate and Consolidate:** Identify all repetitive definitions and explanations. Merge them into a single, comprehensive section for each core concept.
|
| 53 |
-
"2. **
|
| 54 |
-
"3. **
|
| 55 |
-
"4. **Polish and Format:** Ensure the final text is grammatically correct, flows naturally, and uses consistent, clean Markdown formatting (e.g., for tables, code blocks, and mathematical notation).\n\n"
|
| 56 |
"**Constraint:** Ensure all unique concepts and key details from the original notes are preserved in the final document. The goal is to lose redundancy, not information.\n\n"
|
| 57 |
"Here are the fragmented notes to process:\n\n"
|
| 58 |
f"{raw_md}"
|
|
|
|
| 23 |
@staticmethod
|
| 24 |
def merge(chunks: List[NoteChunk]) -> str:
|
| 25 |
"""Concatenate note chunks and wrap with template."""
|
| 26 |
+
raw_md=""
|
| 27 |
+
for idx, c in enumerate(chunks):
|
| 28 |
+
raw_md += f"\n\n-----SEGEMENT{idx}-----\n\n"
|
| 29 |
+
raw_md += c.note
|
| 30 |
logger.info("[Assembler] merging %d note chunks", len(chunks))
|
| 31 |
|
| 32 |
logger.info("[Assembler] polishing with LLM…")
|
|
|
|
| 40 |
)
|
| 41 |
response = client.chat.completions.create(
|
| 42 |
model=os.getenv("OPENAI_MODEL", "gpt-4o-mini"),
|
| 43 |
+
temperature=0.1,
|
| 44 |
messages=[
|
| 45 |
{
|
| 46 |
"role": "user",
|
| 47 |
"content": (
|
| 48 |
+
"You are an expert academic editor and content synthesizer. Your task is to transform a collection of fragmented lecture notes into a single, coherent, detailed and logically structured study note.\n\n"
|
| 49 |
+
"**Context:** These notes were generated by summarizing different segments of a single video lecture. As a result, they are chronologically ordered but probably contain significant overlap and redundancy.\n\n"
|
| 50 |
+
"**Primary Goal:** Create a comprehensive and well-organized final document from the provided segements.\n\n"
|
| 51 |
"**Key Instructions:**\n"
|
| 52 |
+
"1. **De-duplicate and Consolidate:** Identify all repetitive definitions and explanations. Merge them into a single, comprehensive section for each core concept. \n"
|
| 53 |
+
"2. **Synthesize and Enhance:** Where different fragments explain the same concept with slightly different examples or details (e.g., one note uses a 'cheetah' example, another uses a 'robot'), synthesize these details to create a richer, more complete explanation under a single heading.\n"
|
| 54 |
+
"3. **Polish and Format:** Ensure the final text is grammatically correct, flows naturally, and uses consistent, clean Markdown formatting (e.g., for tables, code blocks, and mathematical notation).\n\n"
|
|
|
|
| 55 |
"**Constraint:** Ensure all unique concepts and key details from the original notes are preserved in the final document. The goal is to lose redundancy, not information.\n\n"
|
| 56 |
"Here are the fragmented notes to process:\n\n"
|
| 57 |
f"{raw_md}"
|
lec2note/types.py
CHANGED
|
@@ -7,7 +7,7 @@ from pathlib import Path
|
|
| 7 |
from typing import List, Dict, Any
|
| 8 |
|
| 9 |
__all__ = [
|
| 10 |
-
"
|
| 11 |
"FinalChunk",
|
| 12 |
"NoteChunk",
|
| 13 |
"Chunk",
|
|
@@ -15,15 +15,23 @@ __all__ = [
|
|
| 15 |
|
| 16 |
|
| 17 |
@dataclass
|
| 18 |
-
class
|
| 19 |
-
start: float
|
| 20 |
end: float
|
|
|
|
|
|
|
| 21 |
|
| 22 |
-
|
| 23 |
@dataclass
|
| 24 |
-
class
|
| 25 |
-
|
| 26 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
|
| 28 |
|
| 29 |
@dataclass
|
|
|
|
| 7 |
from typing import List, Dict, Any
|
| 8 |
|
| 9 |
__all__ = [
|
| 10 |
+
"VisualChunk",
|
| 11 |
"FinalChunk",
|
| 12 |
"NoteChunk",
|
| 13 |
"Chunk",
|
|
|
|
| 15 |
|
| 16 |
|
| 17 |
@dataclass
|
| 18 |
+
class MicroChunk: # noqa: D101
|
| 19 |
+
start: float
|
| 20 |
end: float
|
| 21 |
+
subtitle: str
|
| 22 |
+
keyframe_path: Path=field(default=None)
|
| 23 |
|
|
|
|
| 24 |
@dataclass
|
| 25 |
+
class VisualChunk: # noqa: D101
|
| 26 |
+
start: float
|
| 27 |
+
end: float
|
| 28 |
+
subtitles: str
|
| 29 |
+
image_path: Path=field(default=None)
|
| 30 |
+
|
| 31 |
+
@dataclass
|
| 32 |
+
class FinalChunk: # noqa: D101
|
| 33 |
+
visual_chunks: List[VisualChunk]
|
| 34 |
+
|
| 35 |
|
| 36 |
|
| 37 |
@dataclass
|
lec2note/utils/__pycache__/logging_config.cpython-310.pyc
CHANGED
|
Binary files a/lec2note/utils/__pycache__/logging_config.cpython-310.pyc and b/lec2note/utils/__pycache__/logging_config.cpython-310.pyc differ
|
|
|
lec2note/vision/__pycache__/frame_extractor.cpython-310.pyc
CHANGED
|
Binary files a/lec2note/vision/__pycache__/frame_extractor.cpython-310.pyc and b/lec2note/vision/__pycache__/frame_extractor.cpython-310.pyc differ
|
|
|
lec2note/vision/frame_extractor.py
CHANGED
|
@@ -67,7 +67,6 @@ class FrameExtractor: # noqa: D101
|
|
| 67 |
cv2.imwrite(str(out_fp), frame)
|
| 68 |
saved.append(out_fp)
|
| 69 |
cap.release()
|
| 70 |
-
logger.info("[FrameExtractor] captured %d frames", len(saved))
|
| 71 |
return saved
|
| 72 |
|
| 73 |
|
|
|
|
| 67 |
cv2.imwrite(str(out_fp), frame)
|
| 68 |
saved.append(out_fp)
|
| 69 |
cap.release()
|
|
|
|
| 70 |
return saved
|
| 71 |
|
| 72 |
|