Quran-multi-aligner

Running on Zero

hetchyy Claude Opus 4.6 commited on Feb 28

Commit

16c3710

1 Parent(s): 84de10e

Remove dead code and add GPU VRAM profiling

Delete unused functions: reanchor_within_surah(), clear_chapter_cache(),
log_error(). Remove unused imports across multiple files. Add GPU peak
and reserved VRAM tracking to ProfilingData. Gitignore paid-api-plan.md.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Files changed (8) hide show

.gitignore +1 -0
src/alignment/alignment_pipeline.py +1 -1
src/alignment/phoneme_anchor.py +0 -79
src/alignment/phoneme_asr.py +1 -1
src/alignment/phoneme_matcher_cache.py +0 -6
src/core/segment_types.py +10 -1
src/core/usage_logger.py +1 -16
src/ui/segments.py +0 -1

.gitignore CHANGED Viewed

@@ -51,6 +51,7 @@ captures/
 docs/api.md
 docs/lease_duration_history.md
 scripts/
 tests/
 align_config.py

 docs/api.md
 docs/lease_duration_history.md
+docs/paid-api-plan.md
 scripts/
 tests/
 align_config.py

src/alignment/alignment_pipeline.py CHANGED Viewed

@@ -36,7 +36,7 @@ def run_phoneme_matching(
     """
     from .phoneme_matcher import align_segment, get_matched_text
     from .phoneme_matcher_cache import get_chapter_reference
-    from .phoneme_anchor import reanchor_within_surah, verse_to_word_index, find_anchor_by_voting
     from .ngram_index import get_ngram_index
     # Only import time if profiling enabled

     """
     from .phoneme_matcher import align_segment, get_matched_text
     from .phoneme_matcher_cache import get_chapter_reference
+    from .phoneme_anchor import verse_to_word_index, find_anchor_by_voting
     from .ngram_index import get_ngram_index
     # Only import time if profiling enabled

src/alignment/phoneme_anchor.py CHANGED Viewed

@@ -200,85 +200,6 @@ def find_anchor_by_voting(
     return (best_surah, best_run_start)
-def reanchor_within_surah(
-    phoneme_texts: List[List[str]],
-    ngram_index: PhonemeNgramIndex,
-    surah: int,
-    n_segments: int,
-) -> int:
-    """
-    Re-anchor within a known surah after consecutive DP failures.
-    Same n-gram voting as find_anchor_by_voting but:
-    - Only counts votes for the given surah (skip all others)
-    - Returns ayah (start of best contiguous run), or 0 if no votes
-    Args:
-        phoneme_texts: Remaining unprocessed phoneme lists
-        ngram_index: Pre-built n-gram index
-        surah: Current surah (fixed)
-        n_segments: How many segments to use for voting
-    Returns:
-        ayah number to re-anchor to (0 = failed)
-    """
-    # Concatenate first N non-empty segments
-    combined: List[str] = []
-    segments_used = 0
-    for phonemes in phoneme_texts[:n_segments]:
-        if phonemes:
-            combined.extend(phonemes)
-            segments_used += 1
-    n = ngram_index.ngram_size
-    if ANCHOR_DEBUG:
-        print(f"\n{'=' * 60}")
-        print(f"RE-ANCHOR WITHIN SURAH {surah}")
-        print(f"{'=' * 60}")
-        print(f"  Segments used: {segments_used}/{n_segments}")
-        print(f"  Combined phonemes: {len(combined)}")
-    # Extract n-grams from ASR
-    asr_ngrams = [
-        tuple(combined[i : i + n])
-        for i in range(len(combined) - n + 1)
-    ]
-    # Vote — only accumulate weight for positions in the given surah
-    ayah_weights: Dict[int, float] = defaultdict(float)
-    matched_ngrams = 0
-    for ng in asr_ngrams:
-        if ng not in ngram_index.ngram_positions:
-            continue
-        matched_ngrams += 1
-        weight = (1.0 / ngram_index.ngram_counts[ng]) if ANCHOR_RARITY_WEIGHTING else 1.0
-        for s, a in ngram_index.ngram_positions[ng]:
-            if s == surah:
-                ayah_weights[a] += weight
-    if ANCHOR_DEBUG:
-        print(f"  N-grams matched: {matched_ngrams}/{len(asr_ngrams)}")
-        print(f"  Ayahs with votes: {len(ayah_weights)}")
-    if not ayah_weights:
-        if ANCHOR_DEBUG:
-            print(f"  RESULT: No votes — returning 0")
-            print(f"{'=' * 60}\n")
-        return 0
-    run_start, run_end, run_weight = _find_best_contiguous_run(dict(ayah_weights))
-    if ANCHOR_DEBUG:
-        print(f"  Best contiguous run (after trim): ayahs {run_start}-{run_end} "
-              f"(weight={run_weight:.3f}, trim_ratio={ANCHOR_RUN_TRIM_RATIO})")
-        print(f"  RESULT: Ayah {run_start}")
-        print(f"{'=' * 60}\n")
-    return run_start
 def verse_to_word_index(chapter_ref: ChapterReference, ayah: int) -> int:
     """
     Find word index of the first word in a given ayah.

     return (best_surah, best_run_start)
 def verse_to_word_index(chapter_ref: ChapterReference, ayah: int) -> int:
     """
     Find word index of the first word in a given ayah.

src/alignment/phoneme_asr.py CHANGED Viewed

@@ -4,7 +4,7 @@ import os
 import time
 import torch
 import numpy as np
-from typing import List, Dict, Any
 from config import (
     PHONEME_ASR_MODELS, PHONEME_ASR_MODEL_DEFAULT, DTYPE, IS_HF_SPACE, TORCH_COMPILE,

 import time
 import torch
 import numpy as np
+from typing import List
 from config import (
     PHONEME_ASR_MODELS, PHONEME_ASR_MODEL_DEFAULT, DTYPE, IS_HF_SPACE, TORCH_COMPILE,

src/alignment/phoneme_matcher_cache.py CHANGED Viewed

@@ -51,9 +51,3 @@ def preload_all_chapters() -> None:
         for surah in range(1, 115):
             get_chapter_reference(surah)
         print(f"[CACHE] All 114 chapters built at runtime")
-def clear_chapter_cache() -> None:
-    """Clear cache (for memory management)."""
-    _chapter_cache.clear()
-    print("[CACHE] Cleared chapter cache")

         for surah in range(1, 115):
             get_chapter_reference(surah)
         print(f"[CACHE] All 114 chapters built at runtime")

src/core/segment_types.py CHANGED Viewed

@@ -71,6 +71,9 @@ class ProfilingData:
     # Result building profiling
     result_build_time: float = 0.0           # Total result building time
     result_audio_encode_time: float = 0.0    # Audio-to-data-URL encoding
     # Total pipeline time
     total_time: float = 0.0                  # End-to-end pipeline time
@@ -150,6 +153,12 @@ class ProfilingData:
         lines += [
             f"  PROFILED SUM:      {_fmt(profiled_sum)}",
             f"  TOTAL (wall):      {_fmt(self.total_time)}   (unaccounted: {_fmt(unaccounted)})",
-            "=" * 60,
         ]
         return "\n".join(lines)

     # Result building profiling
     result_build_time: float = 0.0           # Total result building time
     result_audio_encode_time: float = 0.0    # Audio-to-data-URL encoding
+    # GPU memory profiling
+    gpu_peak_vram_mb: float = 0.0            # torch.cuda.max_memory_allocated() in MB
+    gpu_reserved_vram_mb: float = 0.0        # torch.cuda.max_memory_reserved() in MB
     # Total pipeline time
     total_time: float = 0.0                  # End-to-end pipeline time
         lines += [
             f"  PROFILED SUM:      {_fmt(profiled_sum)}",
             f"  TOTAL (wall):      {_fmt(self.total_time)}   (unaccounted: {_fmt(unaccounted)})",
         ]
+        if self.gpu_peak_vram_mb > 0:
+            lines += [
+                "-" * 60,
+                f"  GPU VRAM Peak:     {self.gpu_peak_vram_mb:.0f} MB",
+                f"  GPU VRAM Reserved: {self.gpu_reserved_vram_mb:.0f} MB",
+            ]
+        lines.append("=" * 60)
         return "\n".join(lines)

src/core/usage_logger.py CHANGED Viewed

@@ -11,12 +11,11 @@ interfere with ZeroGPU's startup function scan.
 """
 import hashlib
-import io
 import json
 import threading
 from datetime import datetime
 from pathlib import Path
-from typing import Any, Dict, List, Optional, Tuple, Union
 from uuid import uuid4
 import numpy as np
@@ -623,20 +622,6 @@ def update_word_timestamps(
         print(f"[USAGE_LOG] Failed to update word timestamps: {e}")
-def log_error(user_id: str, error_message: str) -> None:
-    """Log a pipeline error to JSONL."""
-    try:
-        with _get_error_lock():
-            with ERROR_LOG_PATH.open("a") as f:
-                json.dump({
-                    "timestamp": datetime.now().isoformat(timespec="seconds"),
-                    "user_id": user_id,
-                    "error_message": error_message or "",
-                }, f)
-                f.write("\n")
-    except Exception:
-        pass
 def _write_fallback(row: Dict[str, Any]) -> None:
     """Local-only fallback: write JSONL (without audio)."""

 """
 import hashlib
 import json
 import threading
 from datetime import datetime
 from pathlib import Path
+from typing import Any, Dict, List, Optional, Union
 from uuid import uuid4
 import numpy as np
         print(f"[USAGE_LOG] Failed to update word timestamps: {e}")
 def _write_fallback(row: Dict[str, Any]) -> None:
     """Local-only fallback: write JSONL (without audio)."""

src/ui/segments.py CHANGED Viewed

@@ -2,7 +2,6 @@
 import json
 import time
 import wave
-import io
 import base64
 import unicodedata
 from pathlib import Path

 import json
 import time
 import wave
 import base64
 import unicodedata
 from pathlib import Path