LRU1 commited on
Commit
c246438
·
1 Parent(s): 39f25d4

ensure the picture is corresponding to the subtitles

Browse files
app.py CHANGED
@@ -3,7 +3,7 @@ from pathlib import Path
3
  import tempfile, subprocess, threading, queue
4
  import textwrap
5
 
6
- st.set_page_config(page_title="Lec2Note – Lecture-to-Notes", layout="wide")
7
 
8
  st.title("📝 Lec2Note – Automatic Lecture Notes Generator")
9
 
 
3
  import tempfile, subprocess, threading, queue
4
  import textwrap
5
 
6
+ st.set_page_config(page_title="Lec2Note2 – Lecture-to-Notes", layout="wide")
7
 
8
  st.title("📝 Lec2Note – Automatic Lecture Notes Generator")
9
 
lec2note/__pycache__/types.cpython-310.pyc CHANGED
Binary files a/lec2note/__pycache__/types.cpython-310.pyc and b/lec2note/__pycache__/types.cpython-310.pyc differ
 
lec2note/ingestion/__pycache__/whisper_runner.cpython-310.pyc CHANGED
Binary files a/lec2note/ingestion/__pycache__/whisper_runner.cpython-310.pyc and b/lec2note/ingestion/__pycache__/whisper_runner.cpython-310.pyc differ
 
lec2note/ingestion/whisper_runner.py CHANGED
@@ -29,6 +29,11 @@ class WhisperRunner: # noqa: D101
29
  - The function is *blocking* and can be called inside a Prefect task.
30
  """
31
  audio_path = Path(audio_fp).expanduser().resolve()
 
 
 
 
 
32
  if not audio_path.exists():
33
  raise FileNotFoundError(audio_path)
34
 
@@ -87,6 +92,6 @@ class WhisperRunner: # noqa: D101
87
  }
88
  for seg in segments
89
  ]
90
- # with open(audio_path.with_suffix(".json"), "w") as f:
91
- # json.dump(results, f, indent=2)
92
  return results
 
29
  - The function is *blocking* and can be called inside a Prefect task.
30
  """
31
  audio_path = Path(audio_fp).expanduser().resolve()
32
+ sub_path=audio_path.with_suffix(".json")
33
+ if sub_path.exists():
34
+ logger.info("[Whisper] loading exisisting subtitles.")
35
+ with open(sub_path, "r") as f:
36
+ return json.load(f)
37
  if not audio_path.exists():
38
  raise FileNotFoundError(audio_path)
39
 
 
92
  }
93
  for seg in segments
94
  ]
95
+ with open(sub_path, "w") as f:
96
+ json.dump(results, f, indent=2)
97
  return results
lec2note/processing/__pycache__/processor.cpython-310.pyc CHANGED
Binary files a/lec2note/processing/__pycache__/processor.cpython-310.pyc and b/lec2note/processing/__pycache__/processor.cpython-310.pyc differ
 
lec2note/processing/processor.py CHANGED
@@ -48,18 +48,19 @@ class Processor: # noqa: D101
48
 
49
  @classmethod
50
  @classmethod
51
- def _build_messages(cls, synced: Dict[str, Any]) -> List[Dict[str, Any]]:
52
- subtitle_text = " ".join(synced["text"])
53
 
54
- # insert numbered placeholders into subtitles for reference
55
- placeholder_subs = subtitle_text
56
- for idx, _ in enumerate(synced["images"], start=1):
57
- placeholder_subs += f"\n\n[IMG{idx}]"
 
58
 
59
  # Prompt with explicit mapping guidance
60
  prompt_text = (
61
- "**Role**: You are an expert academic assistant tasked with creating a definitive set of study notes from a lecture.\n\n"
62
  "**Primary Objective**: Generate a **comprehensive and detailed** note segment in Markdown. Do not omit details or simplify concepts excessively. Your goal is to capture the full context of the lecture segment.\n\n"
 
63
  "**Key Instructions**:\n\n"
64
  "1. **Capture Emphasized Points**: Pay close attention to the subtitles. Identify and highlight key points that the speaker seems to emphasize, such as repeated phrases, direct statements of importance (e.g., 'the key is...', 'remember that...'), and core definitions.\n\n"
65
  "2. **Integrate Visuals (Formulas & Tables)**: You MUST analyze the accompanying images. If an image contains crucial information like **formulas, equations, tables, code snippets, or important diagrams**, you must accurately transcribe it into the Markdown note to support the text. Follow these formats:\n"
@@ -71,27 +72,28 @@ class Processor: # noqa: D101
71
  "5. **Image Mapping**: Stop referencing the images and try to use formulas, tables, code snippets, or important diagrams to describe the images.\n\n"
72
  "---BEGIN LECTURE MATERIALS---\n"
73
  f"**Subtitles (placeholders inserted)**:\n{placeholder_subs}"
 
74
  )
75
 
76
  parts: List[Dict[str, Any]] = [
77
  {"type": "text", "text": prompt_text}
78
  ]
79
- for idx, img_fp in enumerate(synced["images"][:10], start=1): # Limit to 6 images
80
  parts.append({
81
  "type": "image_url",
82
  "image_url": {
83
- "url": cls._img_to_data_uri(Path(img_fp)),
84
- "detail": f"IMG{idx}", # label matches placeholder
85
  },
86
  })
 
87
  return [{"role": "user", "content": parts}]
88
 
89
  @classmethod
90
- def generate_note(cls, chunk: FinalChunk, subtitles: List[Dict]) -> NoteChunk:
91
  """Generate a single NoteChunk from FinalChunk data."""
92
  # collect text for this chunk
93
- texts = [s["text"] for s in subtitles if chunk.start <= s["start"] < chunk.end]
94
- synced = {"text": texts, "images": chunk.images}
95
- messages = cls._build_messages(synced)
96
  note = cls._call_llm(messages)
97
- return NoteChunk(note=note, images=chunk.images)
 
 
48
 
49
  @classmethod
50
  @classmethod
51
+ def _build_messages(cls, chunk: FinalChunk) -> List[Dict[str, Any]]:
 
52
 
53
+ placeholder_subs="-"*10+"\n\n"
54
+ for idx, visual_chunk in enumerate(chunk.visual_chunks, start=1):
55
+ placeholder_subs+=visual_chunk.subtitles
56
+ placeholder_subs+=f"[IMG{idx}](img corresponding to the subtitles)\n\n"
57
+ placeholder_subs+=("-"*10+"\n\n")
58
 
59
  # Prompt with explicit mapping guidance
60
  prompt_text = (
61
+ "**Role**: You are an expert academic assistant tasked with taking notes from a part of a lecture which will be ultimately merged into a comprehensive and detailed note.\n\n"
62
  "**Primary Objective**: Generate a **comprehensive and detailed** note segment in Markdown. Do not omit details or simplify concepts excessively. Your goal is to capture the full context of the lecture segment.\n\n"
63
+ "**Return Format**: Only return the **note** in Markdown format.\n\n"
64
  "**Key Instructions**:\n\n"
65
  "1. **Capture Emphasized Points**: Pay close attention to the subtitles. Identify and highlight key points that the speaker seems to emphasize, such as repeated phrases, direct statements of importance (e.g., 'the key is...', 'remember that...'), and core definitions.\n\n"
66
  "2. **Integrate Visuals (Formulas & Tables)**: You MUST analyze the accompanying images. If an image contains crucial information like **formulas, equations, tables, code snippets, or important diagrams**, you must accurately transcribe it into the Markdown note to support the text. Follow these formats:\n"
 
72
  "5. **Image Mapping**: Stop referencing the images and try to use formulas, tables, code snippets, or important diagrams to describe the images.\n\n"
73
  "---BEGIN LECTURE MATERIALS---\n"
74
  f"**Subtitles (placeholders inserted)**:\n{placeholder_subs}"
75
+ "---END LECTURE MATERIALS---\n"
76
  )
77
 
78
  parts: List[Dict[str, Any]] = [
79
  {"type": "text", "text": prompt_text}
80
  ]
81
+ for idx,visual_chunk in enumerate(chunk.visual_chunks, start=1): # Limit to 6 images
82
  parts.append({
83
  "type": "image_url",
84
  "image_url": {
85
+ "url": cls._img_to_data_uri(visual_chunk.image_path),
86
+ "detail": f"IMG{idx}",
87
  },
88
  })
89
+
90
  return [{"role": "user", "content": parts}]
91
 
92
  @classmethod
93
+ def generate_note(cls, chunk: FinalChunk) -> NoteChunk:
94
  """Generate a single NoteChunk from FinalChunk data."""
95
  # collect text for this chunk
96
+ messages = cls._build_messages(chunk)
 
 
97
  note = cls._call_llm(messages)
98
+ images=[visual_chunk.image_path for visual_chunk in chunk.visual_chunks]
99
+ return NoteChunk(note=note, images=images)
lec2note/scripts/__pycache__/run_pipeline.cpython-310.pyc CHANGED
Binary files a/lec2note/scripts/__pycache__/run_pipeline.cpython-310.pyc and b/lec2note/scripts/__pycache__/run_pipeline.cpython-310.pyc differ
 
lec2note/scripts/run_pipeline.py CHANGED
@@ -43,7 +43,7 @@ def main(): # noqa: D401
43
  total=len(final_chunks), desc="Generating notes", unit="chunk"
44
  ) as pbar:
45
  future_map = {
46
- pool.submit(Processor.generate_note, fc, subtitles): fc
47
  for fc in final_chunks
48
  }
49
  for fut in as_completed(future_map):
 
43
  total=len(final_chunks), desc="Generating notes", unit="chunk"
44
  ) as pbar:
45
  future_map = {
46
+ pool.submit(Processor.generate_note, fc): fc
47
  for fc in final_chunks
48
  }
49
  for fut in as_completed(future_map):
lec2note/segmentation/__pycache__/chunk_merger.cpython-310.pyc CHANGED
Binary files a/lec2note/segmentation/__pycache__/chunk_merger.cpython-310.pyc and b/lec2note/segmentation/__pycache__/chunk_merger.cpython-310.pyc differ
 
lec2note/segmentation/__pycache__/semantic_segmenter.cpython-310.pyc CHANGED
Binary files a/lec2note/segmentation/__pycache__/semantic_segmenter.cpython-310.pyc and b/lec2note/segmentation/__pycache__/semantic_segmenter.cpython-310.pyc differ
 
lec2note/segmentation/__pycache__/sentence_chunker.cpython-310.pyc CHANGED
Binary files a/lec2note/segmentation/__pycache__/sentence_chunker.cpython-310.pyc and b/lec2note/segmentation/__pycache__/sentence_chunker.cpython-310.pyc differ
 
lec2note/segmentation/__pycache__/visual_merger.cpython-310.pyc CHANGED
Binary files a/lec2note/segmentation/__pycache__/visual_merger.cpython-310.pyc and b/lec2note/segmentation/__pycache__/visual_merger.cpython-310.pyc differ
 
lec2note/segmentation/chunk_merger.py CHANGED
@@ -50,19 +50,8 @@ class ChunkMerger: # noqa: D101
50
  visual_chunks = VisualMerger.merge(micro_chunks)
51
 
52
  # 3. semantic merge – refine by subtitle semantics
53
- topic_chunks_dict = SemanticSegmenter.refine(visual_chunks, subtitles)
54
-
55
- # 4. map micro to topic & sample images
56
- final_chunks: List[FinalChunk] = []
57
- for ch in topic_chunks_dict:
58
- imgs = [
59
- mc["keyframe_path"]
60
- for mc in micro_chunks
61
- if ch["start"] <= mc["start"] < ch["end"] and mc.get("keyframe_path") and Path(mc["keyframe_path"]).exists()
62
- ]
63
- imgs_sampled = ImageSampler.sample(imgs, max_n=6)
64
- fc = FinalChunk(start=ch["start"], end=ch["end"], images=[Path(p) for p in imgs_sampled])
65
- final_chunks.append(fc)
66
  logger.info("[ChunkMerger] produced %d final topic chunks", len(final_chunks))
67
  return final_chunks
68
 
 
50
  visual_chunks = VisualMerger.merge(micro_chunks)
51
 
52
  # 3. semantic merge – refine by subtitle semantics
53
+ final_chunks = SemanticSegmenter.refine(visual_chunks)
54
+
 
 
 
 
 
 
 
 
 
 
 
55
  logger.info("[ChunkMerger] produced %d final topic chunks", len(final_chunks))
56
  return final_chunks
57
 
lec2note/segmentation/semantic_segmenter.py CHANGED
@@ -7,7 +7,7 @@ import logging
7
  from typing import List, Dict
8
 
9
  from sentence_transformers import SentenceTransformer, util # type: ignore
10
-
11
  logger = logging.getLogger(__name__)
12
 
13
  __all__ = ["SemanticSegmenter"]
@@ -17,35 +17,31 @@ class SemanticSegmenter: # noqa: D101
17
  _model = SentenceTransformer("paraphrase-multilingual-MiniLM-L12-v2")
18
 
19
  @classmethod
20
- def refine(cls, slide_chunks: List[Dict], subtitles: List[Dict]) -> List[Dict]:
21
  """Split long chunks or merge short ones by semantic change."""
22
- if not slide_chunks:
23
- logger.warning("[SemanticSegmenter] empty slide_chunks input")
24
  return []
25
 
26
  # Build text per chunk
27
  chunk_texts: List[str] = []
28
- for ch in slide_chunks:
29
- txt = []
30
- for s in subtitles:
31
- if ch["start"] <= s["start"] < ch["end"]:
32
- txt.append(s["text"])
33
- chunk_texts.append(" ".join(txt))
34
 
35
  embeddings = cls._model.encode(chunk_texts, convert_to_tensor=True)
36
 
37
- refined: List[Dict] = []
38
- buffer = slide_chunks[0].copy()
39
  buf_emb = embeddings[0]
40
- for i in range(1, len(slide_chunks)):
 
41
  sim = float(util.cos_sim(buf_emb, embeddings[i]))
42
  # print("semantic sim:",sim)
43
  if sim > 0.55: # too similar => merge
44
- buffer["end"] = slide_chunks[i]["end"]
45
  else:
46
- refined.append(buffer)
47
- buffer = slide_chunks[i].copy()
48
- buf_emb = embeddings[i]
49
- refined.append(buffer)
50
- logger.info("[SemanticSegmenter] refined %d→%d chunks", len(slide_chunks), len(refined))
51
  return refined
 
7
  from typing import List, Dict
8
 
9
  from sentence_transformers import SentenceTransformer, util # type: ignore
10
+ from lec2note.types import VisualChunk,FinalChunk
11
  logger = logging.getLogger(__name__)
12
 
13
  __all__ = ["SemanticSegmenter"]
 
17
  _model = SentenceTransformer("paraphrase-multilingual-MiniLM-L12-v2")
18
 
19
  @classmethod
20
+ def refine(cls, visual_chunks: List[VisualChunk],) -> List[FinalChunk]:
21
  """Split long chunks or merge short ones by semantic change."""
22
+ if not visual_chunks:
23
+ logger.warning("[SemanticSegmenter] empty visual_chunks input")
24
  return []
25
 
26
  # Build text per chunk
27
  chunk_texts: List[str] = []
28
+ for ch in visual_chunks:
29
+ chunk_texts.append(ch.subtitles)
 
 
 
 
30
 
31
  embeddings = cls._model.encode(chunk_texts, convert_to_tensor=True)
32
 
33
+ refined: List[FinalChunk] = []
 
34
  buf_emb = embeddings[0]
35
+ buf_visual_chunks=FinalChunk(visual_chunks=[visual_chunks[0]])
36
+ for i in range(1, len(visual_chunks)):
37
  sim = float(util.cos_sim(buf_emb, embeddings[i]))
38
  # print("semantic sim:",sim)
39
  if sim > 0.55: # too similar => merge
40
+ buf_visual_chunks.visual_chunks.append(visual_chunks[i])
41
  else:
42
+ refined.append(buf_visual_chunks)
43
+ buf_visual_chunks=FinalChunk(visual_chunks=[visual_chunks[i]])
44
+ buf_emb = embeddings[i]
45
+ refined.append(buf_visual_chunks)
46
+ logger.info("[SemanticSegmenter] refined %d→%d chunks", len(visual_chunks), len(refined))
47
  return refined
lec2note/segmentation/sentence_chunker.py CHANGED
@@ -21,7 +21,7 @@ from pathlib import Path
21
  from typing import List, Dict
22
 
23
  from lec2note.vision.frame_extractor import FrameExtractor
24
-
25
  __all__ = ["SentenceChunker"]
26
 
27
  logger = logging.getLogger(__name__)
@@ -35,7 +35,7 @@ class SentenceChunker: # noqa: D101
35
  video_fp: str | Path,
36
  *,
37
  output_dir: str | Path | None = None,
38
- ) -> List[Dict]:
39
  """Generate micro-chunks aligned with subtitle sentences.
40
 
41
  Parameters
@@ -52,7 +52,7 @@ class SentenceChunker: # noqa: D101
52
  if not video_path.exists():
53
  raise FileNotFoundError(video_path)
54
 
55
- micro_chunks: List[Dict] = []
56
  timestamps: List[float] = [s["end"] for s in subtitles]
57
  keyframe_paths = FrameExtractor.capture_at(video_path, timestamps, output_dir=output_dir)
58
  # ensure same length
@@ -64,12 +64,12 @@ class SentenceChunker: # noqa: D101
64
  )
65
 
66
  for idx, sub in enumerate(subtitles):
67
- chunk = {
68
- "start": sub["start"],
69
- "end": sub["end"],
70
- "text": sub["text"],
71
- "keyframe_path": str(keyframe_paths[idx]) if idx < len(keyframe_paths) else "",
72
- }
73
  micro_chunks.append(chunk)
74
  logger.info("[SentenceChunker] generated %d micro-chunks", len(micro_chunks))
75
  return micro_chunks
 
21
  from typing import List, Dict
22
 
23
  from lec2note.vision.frame_extractor import FrameExtractor
24
+ from lec2note.types import MicroChunk
25
  __all__ = ["SentenceChunker"]
26
 
27
  logger = logging.getLogger(__name__)
 
35
  video_fp: str | Path,
36
  *,
37
  output_dir: str | Path | None = None,
38
+ ) -> List[MicroChunk]:
39
  """Generate micro-chunks aligned with subtitle sentences.
40
 
41
  Parameters
 
52
  if not video_path.exists():
53
  raise FileNotFoundError(video_path)
54
 
55
+ micro_chunks: List[MicroChunk] = []
56
  timestamps: List[float] = [s["end"] for s in subtitles]
57
  keyframe_paths = FrameExtractor.capture_at(video_path, timestamps, output_dir=output_dir)
58
  # ensure same length
 
64
  )
65
 
66
  for idx, sub in enumerate(subtitles):
67
+ chunk = MicroChunk(
68
+ start=sub["start"],
69
+ end=sub["end"],
70
+ subtitle=sub["text"],
71
+ keyframe_path=keyframe_paths[idx] if idx < len(keyframe_paths) else "",
72
+ )
73
  micro_chunks.append(chunk)
74
  logger.info("[SentenceChunker] generated %d micro-chunks", len(micro_chunks))
75
  return micro_chunks
lec2note/segmentation/visual_merger.py CHANGED
@@ -23,7 +23,7 @@ for semantic refinement.
23
  import logging
24
  from pathlib import Path
25
  from typing import List, Dict
26
-
27
  from lec2note.vision.image_comparator import ImageComparator
28
 
29
  logger = logging.getLogger(__name__)
@@ -35,44 +35,48 @@ class VisualMerger: # noqa: D101
35
  @classmethod
36
  def merge(
37
  cls,
38
- micro_chunks: List[Dict],
39
  *,
40
- sim_threshold: float = 0.8,
41
- ) -> List[Dict]:
42
  if not micro_chunks:
43
  return []
44
 
45
- visual_chunks: List[Dict] = []
46
- buffer = micro_chunks[0].copy() # includes keyframe_path
 
 
 
 
 
47
  for mc in micro_chunks[1:]:
48
  # compare buffer keyframe (last sentence in current block) with mc keyframe
49
  try:
50
- sim = ImageComparator.get_similarity(buffer["keyframe_path"], mc["keyframe_path"])
51
  except Exception as exc: # noqa: BLE001
52
  logger.warning("[VisualMerger] similarity calc failed: %s", exc)
53
  sim = 0.0 # force split
54
  # print("visual sim:",sim)
55
  if sim >= sim_threshold:
56
  # merge: extend end and replace keyframe/path to current (last)
57
- buffer["end"] = mc["end"]
58
- buffer["keyframe_path"] = mc["keyframe_path"]
 
59
  else:
60
- visual_chunks.append({
61
- "start": buffer["start"],
62
- "end": buffer["end"],
63
- "keyframe_path": buffer["keyframe_path"],
64
- })
65
- buffer = mc.copy()
66
- visual_chunks.append({
67
- "start": buffer["start"],
68
- "end": buffer["end"],
69
- "keyframe_path": buffer["keyframe_path"],
70
- })
71
 
72
  # Optional: remove micro keyframes that are not kept
73
- kept = {vc["keyframe_path"] for vc in visual_chunks}
74
  for mc in micro_chunks:
75
- kp = mc.get("keyframe_path", "")
76
  if kp and kp not in kept:
77
  try:
78
  Path(kp).unlink(missing_ok=True)
 
23
  import logging
24
  from pathlib import Path
25
  from typing import List, Dict
26
+ from lec2note.types import VisualChunk,MicroChunk
27
  from lec2note.vision.image_comparator import ImageComparator
28
 
29
  logger = logging.getLogger(__name__)
 
35
  @classmethod
36
  def merge(
37
  cls,
38
+ micro_chunks: List[MicroChunk],
39
  *,
40
+ sim_threshold: float = 0.75,
41
+ ) -> List[VisualChunk]:
42
  if not micro_chunks:
43
  return []
44
 
45
+ visual_chunks: List[VisualChunk] = []
46
+ buffer:VisualChunk=VisualChunk(
47
+ start=micro_chunks[0].start,
48
+ end=micro_chunks[0].end,
49
+ image_path=micro_chunks[0].keyframe_path,
50
+ subtitles=micro_chunks[0].subtitle,
51
+ )
52
  for mc in micro_chunks[1:]:
53
  # compare buffer keyframe (last sentence in current block) with mc keyframe
54
  try:
55
+ sim = ImageComparator.get_similarity(buffer.image_path, mc.keyframe_path)
56
  except Exception as exc: # noqa: BLE001
57
  logger.warning("[VisualMerger] similarity calc failed: %s", exc)
58
  sim = 0.0 # force split
59
  # print("visual sim:",sim)
60
  if sim >= sim_threshold:
61
  # merge: extend end and replace keyframe/path to current (last)
62
+ buffer.end = mc.end
63
+ buffer.image_path = mc.keyframe_path
64
+ buffer.subtitles=buffer.subtitles+"\n\n"+mc.subtitle
65
  else:
66
+ visual_chunks.append(buffer)
67
+ buffer = VisualChunk(
68
+ start=mc.start,
69
+ end=mc.end,
70
+ image_path=mc.keyframe_path,
71
+ subtitles=mc.subtitle,
72
+ )
73
+ if visual_chunks[-1].end!= buffer.end:
74
+ visual_chunks.append(buffer)
 
 
75
 
76
  # Optional: remove micro keyframes that are not kept
77
+ kept = {vc.image_path for vc in visual_chunks}
78
  for mc in micro_chunks:
79
+ kp = mc.keyframe_path
80
  if kp and kp not in kept:
81
  try:
82
  Path(kp).unlink(missing_ok=True)
lec2note/synthesis/__pycache__/assembler.cpython-310.pyc CHANGED
Binary files a/lec2note/synthesis/__pycache__/assembler.cpython-310.pyc and b/lec2note/synthesis/__pycache__/assembler.cpython-310.pyc differ
 
lec2note/synthesis/assembler.py CHANGED
@@ -23,10 +23,10 @@ class Assembler: # noqa: D101
23
  @staticmethod
24
  def merge(chunks: List[NoteChunk]) -> str:
25
  """Concatenate note chunks and wrap with template."""
26
- body_parts = []
27
- for c in chunks:
28
- body_parts.append(c.note)
29
- raw_md = "\n\n".join(body_parts)
30
  logger.info("[Assembler] merging %d note chunks", len(chunks))
31
 
32
  logger.info("[Assembler] polishing with LLM…")
@@ -40,19 +40,18 @@ class Assembler: # noqa: D101
40
  )
41
  response = client.chat.completions.create(
42
  model=os.getenv("OPENAI_MODEL", "gpt-4o-mini"),
43
- temperature=0.3,
44
  messages=[
45
  {
46
  "role": "user",
47
  "content": (
48
- "You are an expert academic editor and content synthesizer. Your task is to transform a collection of fragmented and repetitive lecture notes into a single, coherent, and logically structured study guide.\n\n"
49
- "**Context:** These notes were generated by summarizing different segments of a single video lecture. As a result, they are not chronologically ordered and contain significant overlap and redundancy.\n\n"
50
- "**Primary Goal:** Create a comprehensive, well-organized, and de-duplicated final document from the provided fragments.\n\n"
51
  "**Key Instructions:**\n"
52
- "1. **De-duplicate and Consolidate:** Identify all repetitive definitions and explanations. Merge them into a single, comprehensive section for each core concept. For instance, fundamental terms like 'State vs. Observation', 'Policy', and the notation aside (s_t vs x_t) are likely defined multiple times; these must be consolidated.\n"
53
- "2. **Reorganize and Structure:** Do NOT preserve the original order. Instead, create a new, logical structure for the entire document. Use clear headings and subheadings (e.g., using Markdown's #, ##, ###) to build a clear narrative, starting from fundamental definitions and progressing to more complex topics.\n"
54
- "3. **Synthesize and Enhance:** Where different fragments explain the same concept with slightly different examples or details (e.g., one note uses a 'cheetah' example, another uses a 'robot'), synthesize these details to create a richer, more complete explanation under a single heading.\n"
55
- "4. **Polish and Format:** Ensure the final text is grammatically correct, flows naturally, and uses consistent, clean Markdown formatting (e.g., for tables, code blocks, and mathematical notation).\n\n"
56
  "**Constraint:** Ensure all unique concepts and key details from the original notes are preserved in the final document. The goal is to lose redundancy, not information.\n\n"
57
  "Here are the fragmented notes to process:\n\n"
58
  f"{raw_md}"
 
23
  @staticmethod
24
  def merge(chunks: List[NoteChunk]) -> str:
25
  """Concatenate note chunks and wrap with template."""
26
+ raw_md=""
27
+ for idx, c in enumerate(chunks):
28
+ raw_md += f"\n\n-----SEGEMENT{idx}-----\n\n"
29
+ raw_md += c.note
30
  logger.info("[Assembler] merging %d note chunks", len(chunks))
31
 
32
  logger.info("[Assembler] polishing with LLM…")
 
40
  )
41
  response = client.chat.completions.create(
42
  model=os.getenv("OPENAI_MODEL", "gpt-4o-mini"),
43
+ temperature=0.1,
44
  messages=[
45
  {
46
  "role": "user",
47
  "content": (
48
+ "You are an expert academic editor and content synthesizer. Your task is to transform a collection of fragmented lecture notes into a single, coherent, detailed and logically structured study note.\n\n"
49
+ "**Context:** These notes were generated by summarizing different segments of a single video lecture. As a result, they are chronologically ordered but probably contain significant overlap and redundancy.\n\n"
50
+ "**Primary Goal:** Create a comprehensive and well-organized final document from the provided segements.\n\n"
51
  "**Key Instructions:**\n"
52
+ "1. **De-duplicate and Consolidate:** Identify all repetitive definitions and explanations. Merge them into a single, comprehensive section for each core concept. \n"
53
+ "2. **Synthesize and Enhance:** Where different fragments explain the same concept with slightly different examples or details (e.g., one note uses a 'cheetah' example, another uses a 'robot'), synthesize these details to create a richer, more complete explanation under a single heading.\n"
54
+ "3. **Polish and Format:** Ensure the final text is grammatically correct, flows naturally, and uses consistent, clean Markdown formatting (e.g., for tables, code blocks, and mathematical notation).\n\n"
 
55
  "**Constraint:** Ensure all unique concepts and key details from the original notes are preserved in the final document. The goal is to lose redundancy, not information.\n\n"
56
  "Here are the fragmented notes to process:\n\n"
57
  f"{raw_md}"
lec2note/types.py CHANGED
@@ -7,7 +7,7 @@ from pathlib import Path
7
  from typing import List, Dict, Any
8
 
9
  __all__ = [
10
- "SlideChunk",
11
  "FinalChunk",
12
  "NoteChunk",
13
  "Chunk",
@@ -15,15 +15,23 @@ __all__ = [
15
 
16
 
17
  @dataclass
18
- class SlideChunk: # noqa: D101
19
- start: float # seconds
20
  end: float
 
 
21
 
22
-
23
  @dataclass
24
- class FinalChunk(SlideChunk): # noqa: D101
25
- images: List[Path] = field(default_factory=list)
26
- subtitles: List[int] = field(default_factory=list) # indices in subtitles list
 
 
 
 
 
 
 
27
 
28
 
29
  @dataclass
 
7
  from typing import List, Dict, Any
8
 
9
  __all__ = [
10
+ "VisualChunk",
11
  "FinalChunk",
12
  "NoteChunk",
13
  "Chunk",
 
15
 
16
 
17
  @dataclass
18
+ class MicroChunk: # noqa: D101
19
+ start: float
20
  end: float
21
+ subtitle: str
22
+ keyframe_path: Path=field(default=None)
23
 
 
24
  @dataclass
25
+ class VisualChunk: # noqa: D101
26
+ start: float
27
+ end: float
28
+ subtitles: str
29
+ image_path: Path=field(default=None)
30
+
31
+ @dataclass
32
+ class FinalChunk: # noqa: D101
33
+ visual_chunks: List[VisualChunk]
34
+
35
 
36
 
37
  @dataclass
lec2note/utils/__pycache__/logging_config.cpython-310.pyc CHANGED
Binary files a/lec2note/utils/__pycache__/logging_config.cpython-310.pyc and b/lec2note/utils/__pycache__/logging_config.cpython-310.pyc differ
 
lec2note/vision/__pycache__/frame_extractor.cpython-310.pyc CHANGED
Binary files a/lec2note/vision/__pycache__/frame_extractor.cpython-310.pyc and b/lec2note/vision/__pycache__/frame_extractor.cpython-310.pyc differ
 
lec2note/vision/frame_extractor.py CHANGED
@@ -67,7 +67,6 @@ class FrameExtractor: # noqa: D101
67
  cv2.imwrite(str(out_fp), frame)
68
  saved.append(out_fp)
69
  cap.release()
70
- logger.info("[FrameExtractor] captured %d frames", len(saved))
71
  return saved
72
 
73
 
 
67
  cv2.imwrite(str(out_fp), frame)
68
  saved.append(out_fp)
69
  cap.release()
 
70
  return saved
71
 
72