LRU1 commited on
Commit
819e875
·
1 Parent(s): dd9b31b

basic functions done

Browse files
Files changed (26) hide show
  1. app.py +66 -0
  2. lec2note/ingestion/__pycache__/audio_extractor.cpython-310.pyc +0 -0
  3. lec2note/ingestion/__pycache__/audio_extractor.cpython-312.pyc +0 -0
  4. lec2note/ingestion/__pycache__/whisper_runner.cpython-310.pyc +0 -0
  5. lec2note/ingestion/__pycache__/whisper_runner.cpython-312.pyc +0 -0
  6. lec2note/ingestion/whisper_runner.py +5 -1
  7. lec2note/processing/__pycache__/processor.cpython-310.pyc +0 -0
  8. lec2note/scripts/__pycache__/run_pipeline.cpython-310.pyc +0 -0
  9. lec2note/scripts/__pycache__/run_pipeline.cpython-312.pyc +0 -0
  10. lec2note/scripts/run_pipeline.py +16 -3
  11. lec2note/segmentation/__pycache__/chunk_merger.cpython-310.pyc +0 -0
  12. lec2note/segmentation/__pycache__/semantic_segmenter.cpython-310.pyc +0 -0
  13. lec2note/segmentation/__pycache__/sentence_chunker.cpython-310.pyc +0 -0
  14. lec2note/segmentation/__pycache__/visual_merger.cpython-310.pyc +0 -0
  15. lec2note/segmentation/chunk_merger.py +5 -1
  16. lec2note/segmentation/semantic_segmenter.py +2 -6
  17. lec2note/segmentation/visual_merger.py +24 -4
  18. lec2note/segmentation/visual_segmenter.py +0 -52
  19. lec2note/synthesis/__pycache__/assembler.cpython-310.pyc +0 -0
  20. lec2note/synthesis/assembler.py +4 -2
  21. lec2note/utils/__pycache__/logging_config.cpython-312.pyc +0 -0
  22. lec2note/vision/__pycache__/frame_extractor.cpython-310.pyc +0 -0
  23. lec2note/vision/__pycache__/image_comparator.cpython-310.pyc +0 -0
  24. lec2note/vision/__pycache__/image_sampler.cpython-310.pyc +0 -0
  25. lec2note/vision/__pycache__/keyframe_extractor.cpython-310.pyc +0 -0
  26. requirements.txt +3 -1
app.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from pathlib import Path
3
+ import tempfile
4
+ import subprocess
5
+ import textwrap
6
+
7
+ st.set_page_config(page_title="Lec2Note2 – Lecture-to-Notes", layout="wide")
8
+
9
+ st.title("📝 Lec2Note2 – Automatic Lecture Notes Generator")
10
+
11
+ st.markdown(
12
+ textwrap.dedent(
13
+ """
14
+ Upload a lecture **video** and receive a fully-formatted **Markdown** study note – complete with key images and structured sections.
15
+ The processing pipeline performs ASR transcription, vision & semantic segmentation, then invokes an LLM to produce rich notes.
16
+ """
17
+ )
18
+ )
19
+
20
+ video_file = st.file_uploader("🎬 Upload MP4/MKV/AVI", type=["mp4", "mkv", "avi"])
21
+
22
+ run_btn = st.button("🚀 Generate Notes", disabled=video_file is None)
23
+
24
+ if run_btn and video_file:
25
+ # Save upload to a temporary file
26
+ tmp_dir = tempfile.TemporaryDirectory()
27
+ vid_path = Path(tmp_dir.name) / video_file.name
28
+ with vid_path.open("wb") as f:
29
+ f.write(video_file.read())
30
+
31
+ output_md = vid_path.with_suffix(".md")
32
+
33
+ st.info("Processing started. This may take several minutes depending on video length …")
34
+ progress_txt = st.empty()
35
+
36
+ # Run pipeline via subprocess to avoid blocking UI; capture logs
37
+ with st.spinner("Running Lec2Note2 pipeline …"):
38
+ result = subprocess.run(
39
+ [
40
+ "python",
41
+ "-m",
42
+ "lec2note.scripts.run_pipeline",
43
+ "--video",
44
+ str(vid_path),
45
+ "--output",
46
+ str(output_md),
47
+ ],
48
+ text=True,
49
+ capture_output=True,
50
+ )
51
+ if result.returncode != 0:
52
+ st.error("❌ Pipeline failed. See logs below.")
53
+ with st.expander("Show logs"):
54
+ st.code(result.stderr + "\n" + result.stdout)
55
+ else:
56
+ st.success("✅ Notes generated!")
57
+ md_content = output_md.read_text()
58
+ st.markdown(md_content)
59
+ st.download_button(
60
+ label="💾 Download notes.md",
61
+ data=md_content,
62
+ file_name="lecture_notes.md",
63
+ mime="text/markdown",
64
+ )
65
+
66
+ tmp_dir.cleanup()
lec2note/ingestion/__pycache__/audio_extractor.cpython-310.pyc CHANGED
Binary files a/lec2note/ingestion/__pycache__/audio_extractor.cpython-310.pyc and b/lec2note/ingestion/__pycache__/audio_extractor.cpython-310.pyc differ
 
lec2note/ingestion/__pycache__/audio_extractor.cpython-312.pyc CHANGED
Binary files a/lec2note/ingestion/__pycache__/audio_extractor.cpython-312.pyc and b/lec2note/ingestion/__pycache__/audio_extractor.cpython-312.pyc differ
 
lec2note/ingestion/__pycache__/whisper_runner.cpython-310.pyc CHANGED
Binary files a/lec2note/ingestion/__pycache__/whisper_runner.cpython-310.pyc and b/lec2note/ingestion/__pycache__/whisper_runner.cpython-310.pyc differ
 
lec2note/ingestion/__pycache__/whisper_runner.cpython-312.pyc CHANGED
Binary files a/lec2note/ingestion/__pycache__/whisper_runner.cpython-312.pyc and b/lec2note/ingestion/__pycache__/whisper_runner.cpython-312.pyc differ
 
lec2note/ingestion/whisper_runner.py CHANGED
@@ -10,6 +10,7 @@ from typing import List, Dict, Optional, Any
10
 
11
  import torch
12
  from whisper import load_model # type: ignore
 
13
 
14
  __all__ = ["WhisperRunner"]
15
 
@@ -40,7 +41,7 @@ class WhisperRunner: # noqa: D101
40
 
41
  # convert to our schema
42
  logger.info("[Whisper] got %d segments", len(segments))
43
- return [
44
  {
45
  "start": round(seg["start"], 2),
46
  "end": round(seg["end"], 2),
@@ -48,3 +49,6 @@ class WhisperRunner: # noqa: D101
48
  }
49
  for seg in segments
50
  ]
 
 
 
 
10
 
11
  import torch
12
  from whisper import load_model # type: ignore
13
+ import json
14
 
15
  __all__ = ["WhisperRunner"]
16
 
 
41
 
42
  # convert to our schema
43
  logger.info("[Whisper] got %d segments", len(segments))
44
+ results = [
45
  {
46
  "start": round(seg["start"], 2),
47
  "end": round(seg["end"], 2),
 
49
  }
50
  for seg in segments
51
  ]
52
+ with open(audio_path.with_suffix(".json"), "w") as f:
53
+ json.dump(results, f, indent=2)
54
+ return results
lec2note/processing/__pycache__/processor.cpython-310.pyc CHANGED
Binary files a/lec2note/processing/__pycache__/processor.cpython-310.pyc and b/lec2note/processing/__pycache__/processor.cpython-310.pyc differ
 
lec2note/scripts/__pycache__/run_pipeline.cpython-310.pyc CHANGED
Binary files a/lec2note/scripts/__pycache__/run_pipeline.cpython-310.pyc and b/lec2note/scripts/__pycache__/run_pipeline.cpython-310.pyc differ
 
lec2note/scripts/__pycache__/run_pipeline.cpython-312.pyc CHANGED
Binary files a/lec2note/scripts/__pycache__/run_pipeline.cpython-312.pyc and b/lec2note/scripts/__pycache__/run_pipeline.cpython-312.pyc differ
 
lec2note/scripts/run_pipeline.py CHANGED
@@ -13,9 +13,10 @@ from pathlib import Path
13
  from lec2note.ingestion.audio_extractor import AudioExtractor
14
  from lec2note.utils.logging_config import setup_logging
15
  from lec2note.ingestion.whisper_runner import WhisperRunner
16
- from lec2note.segmentation.visual_segmenter import VisualSegmenter
17
- from lec2note.segmentation.semantic_segmenter import SemanticSegmenter
18
  from lec2note.segmentation.chunk_merger import ChunkMerger
 
 
 
19
  from lec2note.processing.processor import Processor
20
  from lec2note.synthesis.assembler import Assembler
21
 
@@ -36,7 +37,19 @@ def main(): # noqa: D401
36
 
37
  final_chunks = ChunkMerger.run(subtitles, video_path)
38
 
39
- note_chunks = [Processor.generate_note(fc, subtitles) for fc in final_chunks]
 
 
 
 
 
 
 
 
 
 
 
 
40
  markdown = Assembler.merge(note_chunks)
41
  Assembler.save(markdown, args.output)
42
  print(f"Saved markdown to {args.output}")
 
13
  from lec2note.ingestion.audio_extractor import AudioExtractor
14
  from lec2note.utils.logging_config import setup_logging
15
  from lec2note.ingestion.whisper_runner import WhisperRunner
 
 
16
  from lec2note.segmentation.chunk_merger import ChunkMerger
17
+ # parallel execution
18
+ from concurrent.futures import ThreadPoolExecutor, as_completed
19
+ from tqdm.auto import tqdm # progress bar
20
  from lec2note.processing.processor import Processor
21
  from lec2note.synthesis.assembler import Assembler
22
 
 
37
 
38
  final_chunks = ChunkMerger.run(subtitles, video_path)
39
 
40
+ # generate notes in parallel (IO-bound: LLM API calls threads sufficient)
41
+ note_chunks = []
42
+ with ThreadPoolExecutor(max_workers=min(8, len(final_chunks))) as pool, tqdm(
43
+ total=len(final_chunks), desc="Generating notes", unit="chunk"
44
+ ) as pbar:
45
+ future_map = {
46
+ pool.submit(Processor.generate_note, fc, subtitles): fc
47
+ for fc in final_chunks
48
+ }
49
+ for fut in as_completed(future_map):
50
+ note_chunks.append(fut.result())
51
+ pbar.update(1)
52
+
53
  markdown = Assembler.merge(note_chunks)
54
  Assembler.save(markdown, args.output)
55
  print(f"Saved markdown to {args.output}")
lec2note/segmentation/__pycache__/chunk_merger.cpython-310.pyc CHANGED
Binary files a/lec2note/segmentation/__pycache__/chunk_merger.cpython-310.pyc and b/lec2note/segmentation/__pycache__/chunk_merger.cpython-310.pyc differ
 
lec2note/segmentation/__pycache__/semantic_segmenter.cpython-310.pyc CHANGED
Binary files a/lec2note/segmentation/__pycache__/semantic_segmenter.cpython-310.pyc and b/lec2note/segmentation/__pycache__/semantic_segmenter.cpython-310.pyc differ
 
lec2note/segmentation/__pycache__/sentence_chunker.cpython-310.pyc CHANGED
Binary files a/lec2note/segmentation/__pycache__/sentence_chunker.cpython-310.pyc and b/lec2note/segmentation/__pycache__/sentence_chunker.cpython-310.pyc differ
 
lec2note/segmentation/__pycache__/visual_merger.cpython-310.pyc ADDED
Binary file (1.75 kB). View file
 
lec2note/segmentation/chunk_merger.py CHANGED
@@ -55,7 +55,11 @@ class ChunkMerger: # noqa: D101
55
  # 4. map micro to topic & sample images
56
  final_chunks: List[FinalChunk] = []
57
  for ch in topic_chunks_dict:
58
- imgs = [mc["keyframe_path"] for mc in micro_chunks if ch["start"] <= mc["start"] < ch["end"]]
 
 
 
 
59
  imgs_sampled = ImageSampler.sample(imgs, max_n=6)
60
  fc = FinalChunk(start=ch["start"], end=ch["end"], images=[Path(p) for p in imgs_sampled])
61
  final_chunks.append(fc)
 
55
  # 4. map micro to topic & sample images
56
  final_chunks: List[FinalChunk] = []
57
  for ch in topic_chunks_dict:
58
+ imgs = [
59
+ mc["keyframe_path"]
60
+ for mc in micro_chunks
61
+ if ch["start"] <= mc["start"] < ch["end"] and mc.get("keyframe_path") and Path(mc["keyframe_path"]).exists()
62
+ ]
63
  imgs_sampled = ImageSampler.sample(imgs, max_n=6)
64
  fc = FinalChunk(start=ch["start"], end=ch["end"], images=[Path(p) for p in imgs_sampled])
65
  final_chunks.append(fc)
lec2note/segmentation/semantic_segmenter.py CHANGED
@@ -39,12 +39,8 @@ class SemanticSegmenter: # noqa: D101
39
  buf_emb = embeddings[0]
40
  for i in range(1, len(slide_chunks)):
41
  sim = float(util.cos_sim(buf_emb, embeddings[i]))
42
- duration = buffer["end"] - buffer["start"]
43
- if duration > 120 and sim < 0.8: # too long and not similar => split
44
- refined.append(buffer)
45
- buffer = slide_chunks[i].copy()
46
- buf_emb = embeddings[i]
47
- elif duration < 10 and sim > 0.9: # too short and similar => merge
48
  buffer["end"] = slide_chunks[i]["end"]
49
  else:
50
  refined.append(buffer)
 
39
  buf_emb = embeddings[0]
40
  for i in range(1, len(slide_chunks)):
41
  sim = float(util.cos_sim(buf_emb, embeddings[i]))
42
+ print("semantic sim:",sim)
43
+ if sim > 0.7: # too similar => merge
 
 
 
 
44
  buffer["end"] = slide_chunks[i]["end"]
45
  else:
46
  refined.append(buffer)
lec2note/segmentation/visual_merger.py CHANGED
@@ -37,13 +37,13 @@ class VisualMerger: # noqa: D101
37
  cls,
38
  micro_chunks: List[Dict],
39
  *,
40
- sim_threshold: float = 0.9,
41
  ) -> List[Dict]:
42
  if not micro_chunks:
43
  return []
44
 
45
  visual_chunks: List[Dict] = []
46
- buffer = micro_chunks[0].copy()
47
  for mc in micro_chunks[1:]:
48
  # compare buffer keyframe (last sentence in current block) with mc keyframe
49
  try:
@@ -51,13 +51,33 @@ class VisualMerger: # noqa: D101
51
  except Exception as exc: # noqa: BLE001
52
  logger.warning("[VisualMerger] similarity calc failed: %s", exc)
53
  sim = 0.0 # force split
 
54
  if sim >= sim_threshold:
55
  # merge: extend end and replace keyframe/path to current (last)
56
  buffer["end"] = mc["end"]
57
  buffer["keyframe_path"] = mc["keyframe_path"]
58
  else:
59
- visual_chunks.append({"start": buffer["start"], "end": buffer["end"]})
 
 
 
 
60
  buffer = mc.copy()
61
- visual_chunks.append({"start": buffer["start"], "end": buffer["end"]})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  logger.info("[VisualMerger] merged %d micro → %d visual chunks", len(micro_chunks), len(visual_chunks))
63
  return visual_chunks
 
37
  cls,
38
  micro_chunks: List[Dict],
39
  *,
40
+ sim_threshold: float = 0.8,
41
  ) -> List[Dict]:
42
  if not micro_chunks:
43
  return []
44
 
45
  visual_chunks: List[Dict] = []
46
+ buffer = micro_chunks[0].copy() # includes keyframe_path
47
  for mc in micro_chunks[1:]:
48
  # compare buffer keyframe (last sentence in current block) with mc keyframe
49
  try:
 
51
  except Exception as exc: # noqa: BLE001
52
  logger.warning("[VisualMerger] similarity calc failed: %s", exc)
53
  sim = 0.0 # force split
54
+ print("visual sim:",sim)
55
  if sim >= sim_threshold:
56
  # merge: extend end and replace keyframe/path to current (last)
57
  buffer["end"] = mc["end"]
58
  buffer["keyframe_path"] = mc["keyframe_path"]
59
  else:
60
+ visual_chunks.append({
61
+ "start": buffer["start"],
62
+ "end": buffer["end"],
63
+ "keyframe_path": buffer["keyframe_path"],
64
+ })
65
  buffer = mc.copy()
66
+ visual_chunks.append({
67
+ "start": buffer["start"],
68
+ "end": buffer["end"],
69
+ "keyframe_path": buffer["keyframe_path"],
70
+ })
71
+
72
+ # Optional: remove micro keyframes that are not kept
73
+ kept = {vc["keyframe_path"] for vc in visual_chunks}
74
+ for mc in micro_chunks:
75
+ kp = mc.get("keyframe_path", "")
76
+ if kp and kp not in kept:
77
+ try:
78
+ Path(kp).unlink(missing_ok=True)
79
+ except Exception: # noqa: BLE001
80
+ pass
81
+
82
  logger.info("[VisualMerger] merged %d micro → %d visual chunks", len(micro_chunks), len(visual_chunks))
83
  return visual_chunks
lec2note/segmentation/visual_segmenter.py DELETED
@@ -1,52 +0,0 @@
1
- """Visual segmentation based on keyframe timestamps.
2
-
3
- This module identifies slide boundaries by extracting keyframes first (via
4
- ``lec2note.vision.keyframe_extractor``), then converting frame indices to time
5
- range based on video FPS.
6
- """
7
-
8
- from __future__ import annotations
9
-
10
- import logging
11
- from pathlib import Path
12
- from typing import List, Dict
13
-
14
- import cv2 # type: ignore
15
-
16
- from lec2note.vision.keyframe_extractor import KeyframeExtractor
17
- from lec2note.types import SlideChunk
18
-
19
- __all__ = ["VisualSegmenter"]
20
-
21
- logger = logging.getLogger(__name__)
22
-
23
-
24
- class VisualSegmenter: # noqa: D101
25
- @classmethod
26
- def run(cls, video_fp: str | Path) -> List[Dict]: # slide_chunks list of dict
27
- """Return list of ``{start, end}`` slide-level chunks."""
28
- video_path = Path(video_fp).expanduser().resolve()
29
- logger.info("[VisualSegmenter] start visual segmentation on %s", video_path.name)
30
- keyframes = KeyframeExtractor.run(video_path,threshold=0.2)
31
- if not keyframes:
32
- # fallback single chunk whole video
33
- cap = cv2.VideoCapture(str(video_path))
34
- duration = cap.get(cv2.CAP_PROP_FRAME_COUNT) / cap.get(cv2.CAP_PROP_FPS)
35
- cap.release()
36
- return [{"start": 0.0, "end": duration}]
37
-
38
- # Determine timestamp for each keyframe: assume filename kf_idx order matches frame order
39
- cap = cv2.VideoCapture(str(video_path))
40
- fps = cap.get(cv2.CAP_PROP_FPS)
41
- cap.release()
42
-
43
- indices = [int(p.stem.split("_")[1]) for p in keyframes]
44
- indices.sort()
45
- times = [idx / fps for idx in indices]
46
- times.append(float("inf")) # sentinel for last end
47
-
48
- slide_chunks: List[Dict] = []
49
- for i in range(len(times) - 1):
50
- slide_chunks.append({"start": times[i], "end": times[i + 1]})
51
- logger.info("[VisualSegmenter] generated %d slide chunks", len(slide_chunks))
52
- return slide_chunks
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
lec2note/synthesis/__pycache__/assembler.cpython-310.pyc CHANGED
Binary files a/lec2note/synthesis/__pycache__/assembler.cpython-310.pyc and b/lec2note/synthesis/__pycache__/assembler.cpython-310.pyc differ
 
lec2note/synthesis/assembler.py CHANGED
@@ -2,6 +2,7 @@
2
 
3
  from __future__ import annotations
4
 
 
5
  import logging
6
  from pathlib import Path
7
  logger = logging.getLogger(__name__)
@@ -12,7 +13,7 @@ from lec2note.types import NoteChunk
12
 
13
  __all__ = ["Assembler"]
14
 
15
- TEMPLATE = """# 讲座笔记
16
 
17
  {content}
18
  """
@@ -38,7 +39,7 @@ class Assembler: # noqa: D101
38
  api_key=os.getenv("OPENAI_API_KEY"),
39
  )
40
  response = client.chat.completions.create(
41
- model=getenv("OPENAI_MODEL", "gpt-4o-mini"),
42
  temperature=0.3,
43
  messages=[
44
  {
@@ -61,6 +62,7 @@ class Assembler: # noqa: D101
61
  )
62
  polished = response.choices[0].message.content.strip()
63
  except Exception: # noqa: BLE001
 
64
  polished = raw_md
65
 
66
  logger.info("[Assembler] final document length %d chars", len(polished))
 
2
 
3
  from __future__ import annotations
4
 
5
+ import os
6
  import logging
7
  from pathlib import Path
8
  logger = logging.getLogger(__name__)
 
13
 
14
  __all__ = ["Assembler"]
15
 
16
+ TEMPLATE = """
17
 
18
  {content}
19
  """
 
39
  api_key=os.getenv("OPENAI_API_KEY"),
40
  )
41
  response = client.chat.completions.create(
42
+ model=os.getenv("OPENAI_MODEL", "gpt-4o-mini"),
43
  temperature=0.3,
44
  messages=[
45
  {
 
62
  )
63
  polished = response.choices[0].message.content.strip()
64
  except Exception: # noqa: BLE001
65
+ logging.error("Error in Assembler.merge", exc_info=True)
66
  polished = raw_md
67
 
68
  logger.info("[Assembler] final document length %d chars", len(polished))
lec2note/utils/__pycache__/logging_config.cpython-312.pyc ADDED
Binary file (1.07 kB). View file
 
lec2note/vision/__pycache__/frame_extractor.cpython-310.pyc CHANGED
Binary files a/lec2note/vision/__pycache__/frame_extractor.cpython-310.pyc and b/lec2note/vision/__pycache__/frame_extractor.cpython-310.pyc differ
 
lec2note/vision/__pycache__/image_comparator.cpython-310.pyc ADDED
Binary file (1.88 kB). View file
 
lec2note/vision/__pycache__/image_sampler.cpython-310.pyc CHANGED
Binary files a/lec2note/vision/__pycache__/image_sampler.cpython-310.pyc and b/lec2note/vision/__pycache__/image_sampler.cpython-310.pyc differ
 
lec2note/vision/__pycache__/keyframe_extractor.cpython-310.pyc CHANGED
Binary files a/lec2note/vision/__pycache__/keyframe_extractor.cpython-310.pyc and b/lec2note/vision/__pycache__/keyframe_extractor.cpython-310.pyc differ
 
requirements.txt CHANGED
@@ -16,4 +16,6 @@ openai>=1.35.0 # 新 SDK,支持 OpenRouter & httpx 0.28+
16
  httpx>=0.28,<0.30
17
  anyio>=3.7,<4.0
18
  scikit-image==0.25.1
19
- imagehash==4.3.1
 
 
 
16
  httpx>=0.28,<0.30
17
  anyio>=3.7,<4.0
18
  scikit-image==0.25.1
19
+ imagehash==4.3.1
20
+ tenacity==8.2.3
21
+ streamlit>=1.35