Spaces:

emmajeed
/

transcriptinator_v2

Build error

App Files Files Community

emmajeed commited on Dec 26, 2025

Commit

fa9aec9

verified ·

1 Parent(s): df8ba4d

Update transcribe_core.py

Browse files

Files changed (1) hide show

transcribe_core.py +40 -188

transcribe_core.py CHANGED Viewed

@@ -16,6 +16,9 @@ import zipfile
 import time
 from ai_providers import TranscriptionProvider
 def format_timestamp(seconds: float) -> str:
     """Convert seconds to ffmpeg time format (HH:MM:SS.xxx)."""
@@ -25,7 +28,6 @@ def format_timestamp(seconds: float) -> str:
     secs = seconds % 60
     return f"{hours:02d}:{minutes:02d}:{secs:06.3f}"
 def check_memory_usage() -> bool:
     """Check current memory usage and print warning if too high."""
     process = psutil.Process()
@@ -35,194 +37,85 @@ def check_memory_usage() -> bool:
         return False
     return True
 def clean_partial_chunks(base_file_path: str) -> None:
     """Clean up any existing partial chunks before starting."""
     try:
         base_name = os.path.splitext(os.path.basename(base_file_path))[0]
-        output_folder = os.path.dirname(base_file_path)
-        pattern = f"{base_name}_part*"
-        print(f"Cleaning up any existing chunks matching: {pattern}")
-        for file in os.listdir(output_folder):
             if file.startswith(f"{base_name}_part") and file.endswith(".mp3"):
-                file_path = os.path.join(output_folder, file)
                 try:
                     os.remove(file_path)
-                    print(f"Removed existing chunk: {file}")
                 except Exception as e:
                     print(f"Warning: Could not remove {file}: {e}")
     except Exception as e:
         print(f"Warning: Error during cleanup: {e}")
 def chunk_audio_file(audio_file_path: str, chunk_duration_minutes: int = 25, overlap_seconds: int = 5) -> List[str]:
     """Chunks an audio file into smaller parts using ffmpeg streaming."""
     chunked_files = []
     try:
-        # Clean up any existing chunks first
         clean_partial_chunks(audio_file_path)
-        # Get audio duration
-        print("\nAnalyzing audio file duration...")
         duration = get_audio_duration(audio_file_path)
-        if duration is None:
-            print("Error: Could not determine audio file duration.")
-            return chunked_files
         chunk_length = chunk_duration_minutes * 60
-        overlap = overlap_seconds
         start_time = 0
         chunk_index = 1
         base_name = os.path.splitext(os.path.basename(audio_file_path))[0]
         output_folder = os.path.dirname(audio_file_path)
-        total_chunks = int((duration - overlap) / (chunk_length - overlap)) + 1
-        print(f"\nChunking audio file: {audio_file_path}")
-        print(f"Total duration: {format_timestamp(duration)}")
-        print(f"Chunk duration: {chunk_duration_minutes} minutes, Overlap: {overlap_seconds} seconds")
-        print(f"Estimated number of chunks: {total_chunks}\n")
         while start_time < duration:
             if not check_memory_usage():
-                print("Memory usage too high, waiting before continuing...")
                 time.sleep(5)
                 continue
-            # Calculate end time for current chunk
             end_time = min(start_time + chunk_length, duration)
-            # Make sure we don't create a tiny final chunk
-            if end_time - start_time < 30:  # If chunk would be less than 30 seconds
-                if chunk_index > 1:  # If not the first chunk
-                    break  # Skip creating this small final chunk
-                end_time = duration  # If it's the first chunk, include all audio
             chunk_file_name = f"{base_name}_part{chunk_index}.mp3"
             chunk_file_path = os.path.join(output_folder, chunk_file_name)
-            print(f"Creating chunk {chunk_index}/{total_chunks}: {chunk_file_name}")
-            print(f"  Time range: {format_timestamp(start_time)} to {format_timestamp(end_time)}")
             try:
-                # Use ffmpeg to extract chunk
-                if os.path.exists(chunk_file_path):
-                    os.remove(chunk_file_path)
                 stream = ffmpeg.input(audio_file_path, ss=start_time, t=end_time-start_time)
                 stream = ffmpeg.output(stream, chunk_file_path, acodec='libmp3lame', loglevel='error')
-                ffmpeg.run(stream, capture_stdout=True, capture_stderr=True, overwrite_output=True)
                 if os.path.exists(chunk_file_path):
-                    chunk_size = os.path.getsize(chunk_file_path) / (1024 * 1024)
-                    print(f"  ✓ Saved chunk: {chunk_file_path} ({chunk_size:.2f}MB)")
                     chunked_files.append(chunk_file_path)
                     chunk_index += 1
-                else:
-                    print(f"  ✗ Error: Chunk file was not created")
-                    break
             except ffmpeg.Error as e:
-                print(f"  ✗ Error processing chunk: {e.stderr.decode() if e.stderr else str(e)}")
                 break
-            # Update start time for next chunk, considering overlap
-            if end_time == duration:  # If this was the last chunk
                 break
-            start_time = end_time - overlap
-            # Force garbage collection after each chunk
             gc.collect()
-        created_chunks = chunk_index - 1
-        print(f"\nAudio file chunking completed:")
-        print(f"- Created {created_chunks} out of {total_chunks} expected chunks")
-        print(f"- Final chunk duration: {format_timestamp(end_time - start_time)}")
     except Exception as e:
         print(f"Error during audio chunking: {e}")
     return chunked_files
 def get_audio_duration(file_path: str) -> float:
     """Get the duration of an audio file using ffmpeg."""
-    try:
-        probe = ffmpeg.probe(file_path)
-        duration = float(probe['format']['duration'])
-        return duration
-    except Exception as e:
-        raise Exception(f"Error getting audio duration: {e}")
 def generate_transcription(audio_file_path: str, provider: TranscriptionProvider) -> str:
-    """
-    Generate transcription using the configured AI provider.
-    Args:
-        audio_file_path: Path to audio file
-        provider: TranscriptionProvider instance (Gemini or HuggingFace)
-    Returns:
-        Transcription text (with timestamps/speakers for Gemini, plain text for HF)
-    """
-    try:
-        return provider.transcribe(audio_file_path)
-    except Exception as e:
-        raise Exception(f"Error during transcription: {e}")
 def generate_summary(transcription_text: str, provider: TranscriptionProvider) -> str:
-    """
-    Generate a concise 2-3 sentence summary using the configured provider.
-    Args:
-        transcription_text: Full transcription
-        provider: TranscriptionProvider instance
-    Returns:
-        Summary text
-    """
-    try:
-        return provider.generate_summary(transcription_text)
-    except Exception as e:
-        return f"Error generating summary: {e}"
 def generate_key_ideas(transcription_text: str, provider: TranscriptionProvider) -> List[Dict[str, str]]:
-    """
-    Identify 3-5 key ideas from the transcription using the configured provider.
-    Args:
-        transcription_text: Full transcription
-        provider: TranscriptionProvider instance
-    Returns:
-        List of {idea, description} dictionaries
-    """
-    try:
-        return provider.generate_key_ideas(transcription_text)
-    except Exception as e:
-        return [{'idea': 'Error generating key ideas', 'description': str(e)}]
 def create_transcript_markdown(audio_filename: str, transcription: str, summary: str, key_ideas: List[Dict[str, str]]) -> str:
-    """
-    Create a formatted markdown file with YAML frontmatter.
-    Args:
-        audio_filename: Name of the audio file
-        transcription: Full transcription text
-        summary: Summary text
-        key_ideas: List of key ideas
-    Returns:
-        Formatted markdown content
-    """
     base_name = os.path.splitext(audio_filename)[0]
-    # Build YAML frontmatter
     yaml_metadata = {
         'title': base_name,
         'audio_file': audio_filename,
@@ -231,99 +124,58 @@ def create_transcript_markdown(audio_filename: str, transcription: str, summary:
         'key_ideas': key_ideas,
         'note_id': str(uuid.uuid4())
     }
     yaml_frontmatter = "---\n" + yaml.dump(yaml_metadata, sort_keys=False, indent=2, allow_unicode=True) + "---\n\n"
-    # Build content sections
-    content = yaml_frontmatter
-    # Key ideas section
-    content += "## Key Ideas\n\n"
-    if key_ideas:
-        for idea_item in key_ideas:
-            if idea_item['description']:
-                content += f"- **{idea_item['idea']}:** {idea_item['description']}\n"
-            else:
-                content += f"- **{idea_item['idea']}**\n"
-    else:
-        content += "*(No key ideas generated)*\n"
-    content += "\n## Full Transcription\n\n"
-    content += transcription
     return content
 def process_audio_file(audio_file_path: str, gemini_provider: TranscriptionProvider, openrouter_provider: TranscriptionProvider = None, progress_callback=None) -> Tuple[str, str]:
-    # 1. SETUP ABSOLUTE PATH (Keep this)
-    current_dir = os.path.dirname(os.path.abspath(__file__))
-    output_dir = os.path.join(current_dir, "outputs")
-    os.makedirs(output_dir, exist_ok=True)
     audio_filename = os.path.basename(audio_file_path)
     base_name = os.path.splitext(audio_filename)[0]
     file_size_mb = os.path.getsize(audio_file_path) / (1024 * 1024)
     files_to_transcribe = []
     if file_size_mb > 30:
-        if progress_callback:
-            progress_callback("📦 Chunking large audio file...", 0.1)
-        chunked_files = chunk_audio_file(audio_file_path)
-        files_to_transcribe.extend(chunked_files)
     else:
         files_to_transcribe.append(audio_file_path)
     markdown_files = []
-    total_files = len(files_to_transcribe)
     for idx, file_path in enumerate(files_to_transcribe, 1):
-        file_name = os.path.basename(file_path)
-        if progress_callback:
-            progress = 0.2 + (0.6 * (idx - 1) / total_files)
-            progress_callback(f"🎙️ Transcribing part {idx}/{total_files}...", progress)
         transcription = generate_transcription(file_path, gemini_provider)
         text_provider = openrouter_provider if openrouter_provider else gemini_provider
         summary = generate_summary(transcription, text_provider)
         key_ideas = generate_key_ideas(transcription, text_provider)
-        markdown_content = create_transcript_markdown(file_name, transcription, summary, key_ideas)
-        # 2. FIX: Use the absolute output_dir established at the top
-        output_filename = os.path.splitext(file_name)[0] + ".md"
-        markdown_path = os.path.join(output_dir, output_filename)
         with open(markdown_path, 'w', encoding='utf-8') as f:
             f.write(markdown_content)
         markdown_files.append(markdown_path)
-        if "_part" in file_name:
-            try:
-                os.remove(file_path)
-            except Exception as e:
-                print(f"Warning: Could not delete chunk {file_name}: {e}")
     if len(markdown_files) == 1:
         return markdown_files[0], "False"
     else:
-        if progress_callback:
-            progress_callback("📦 Creating ZIP file...", 0.9)
-        # 3. FIX: Use absolute zip path
-        zip_filename = f"{base_name}_transcripts.zip"
-        zip_path = os.path.join(output_dir, zip_filename)
         with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
             for md_file in markdown_files:
-                basename = os.path.basename(md_file)
-                zipf.write(md_file, basename)
-                try:
-                    os.remove(md_file)
-                except Exception as e:
-                    print(f"Warning: Could not delete {md_file}: {e}")
         return zip_path, "True"

 import time
 from ai_providers import TranscriptionProvider
+# Define absolute output directory relative to this file
+CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
+OUTPUT_DIR = os.path.join(CURRENT_DIR, "outputs")
 def format_timestamp(seconds: float) -> str:
     """Convert seconds to ffmpeg time format (HH:MM:SS.xxx)."""
     secs = seconds % 60
     return f"{hours:02d}:{minutes:02d}:{secs:06.3f}"
 def check_memory_usage() -> bool:
     """Check current memory usage and print warning if too high."""
     process = psutil.Process()
         return False
     return True
 def clean_partial_chunks(base_file_path: str) -> None:
     """Clean up any existing partial chunks before starting."""
     try:
         base_name = os.path.splitext(os.path.basename(base_file_path))[0]
+        # Ensure we look in the same directory as the audio file for chunks
+        chunk_folder = os.path.dirname(base_file_path)
+        for file in os.listdir(chunk_folder):
             if file.startswith(f"{base_name}_part") and file.endswith(".mp3"):
+                file_path = os.path.join(chunk_folder, file)
                 try:
                     os.remove(file_path)
                 except Exception as e:
                     print(f"Warning: Could not remove {file}: {e}")
     except Exception as e:
         print(f"Warning: Error during cleanup: {e}")
 def chunk_audio_file(audio_file_path: str, chunk_duration_minutes: int = 25, overlap_seconds: int = 5) -> List[str]:
     """Chunks an audio file into smaller parts using ffmpeg streaming."""
     chunked_files = []
     try:
         clean_partial_chunks(audio_file_path)
         duration = get_audio_duration(audio_file_path)
         chunk_length = chunk_duration_minutes * 60
         start_time = 0
         chunk_index = 1
         base_name = os.path.splitext(os.path.basename(audio_file_path))[0]
         output_folder = os.path.dirname(audio_file_path)
         while start_time < duration:
             if not check_memory_usage():
                 time.sleep(5)
                 continue
             end_time = min(start_time + chunk_length, duration)
+            if end_time - start_time < 30 and chunk_index > 1:
+                break
             chunk_file_name = f"{base_name}_part{chunk_index}.mp3"
             chunk_file_path = os.path.join(output_folder, chunk_file_name)
             try:
                 stream = ffmpeg.input(audio_file_path, ss=start_time, t=end_time-start_time)
                 stream = ffmpeg.output(stream, chunk_file_path, acodec='libmp3lame', loglevel='error')
+                ffmpeg.run(stream, overwrite_output=True)
                 if os.path.exists(chunk_file_path):
                     chunked_files.append(chunk_file_path)
                     chunk_index += 1
             except ffmpeg.Error as e:
                 break
+            if end_time == duration:
                 break
+            start_time = end_time - overlap_seconds
             gc.collect()
     except Exception as e:
         print(f"Error during audio chunking: {e}")
     return chunked_files
 def get_audio_duration(file_path: str) -> float:
     """Get the duration of an audio file using ffmpeg."""
+    probe = ffmpeg.probe(file_path)
+    return float(probe['format']['duration'])
 def generate_transcription(audio_file_path: str, provider: TranscriptionProvider) -> str:
+    return provider.transcribe(audio_file_path)
 def generate_summary(transcription_text: str, provider: TranscriptionProvider) -> str:
+    return provider.generate_summary(transcription_text)
 def generate_key_ideas(transcription_text: str, provider: TranscriptionProvider) -> List[Dict[str, str]]:
+    return provider.generate_key_ideas(transcription_text)
 def create_transcript_markdown(audio_filename: str, transcription: str, summary: str, key_ideas: List[Dict[str, str]]) -> str:
     base_name = os.path.splitext(audio_filename)[0]
     yaml_metadata = {
         'title': base_name,
         'audio_file': audio_filename,
         'key_ideas': key_ideas,
         'note_id': str(uuid.uuid4())
     }
     yaml_frontmatter = "---\n" + yaml.dump(yaml_metadata, sort_keys=False, indent=2, allow_unicode=True) + "---\n\n"
+    content = yaml_frontmatter + "## Key Ideas\n\n"
+    for idea_item in key_ideas:
+        content += f"- **{idea_item['idea']}:** {idea_item['description']}\n"
+    content += "\n## Full Transcription\n\n" + transcription
     return content
 def process_audio_file(audio_file_path: str, gemini_provider: TranscriptionProvider, openrouter_provider: TranscriptionProvider = None, progress_callback=None) -> Tuple[str, str]:
+    # Ensure the absolute output directory exists
+    os.makedirs(OUTPUT_DIR, exist_ok=True)
     audio_filename = os.path.basename(audio_file_path)
     base_name = os.path.splitext(audio_filename)[0]
     file_size_mb = os.path.getsize(audio_file_path) / (1024 * 1024)
     files_to_transcribe = []
     if file_size_mb > 30:
+        if progress_callback: progress_callback("📦 Chunking file...", 0.1)
+        files_to_transcribe = chunk_audio_file(audio_file_path)
     else:
         files_to_transcribe.append(audio_file_path)
     markdown_files = []
     for idx, file_path in enumerate(files_to_transcribe, 1):
+        if progress_callback: progress_callback(f"🎙️ Transcribing {idx}/{len(files_to_transcribe)}...", 0.2 + (0.6 * idx/len(files_to_transcribe)))
         transcription = generate_transcription(file_path, gemini_provider)
         text_provider = openrouter_provider if openrouter_provider else gemini_provider
         summary = generate_summary(transcription, text_provider)
         key_ideas = generate_key_ideas(transcription, text_provider)
+        markdown_content = create_transcript_markdown(os.path.basename(file_path), transcription, summary, key_ideas)
+        # Use the global absolute OUTPUT_DIR
+        output_filename = os.path.splitext(os.path.basename(file_path))[0] + ".md"
+        markdown_path = os.path.join(OUTPUT_DIR, output_filename)
         with open(markdown_path, 'w', encoding='utf-8') as f:
             f.write(markdown_content)
         markdown_files.append(markdown_path)
+        if "_part" in file_path:
+            try: os.remove(file_path)
+            except: pass
     if len(markdown_files) == 1:
         return markdown_files[0], "False"
     else:
+        zip_path = os.path.join(OUTPUT_DIR, f"{base_name}_transcripts.zip")
         with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
             for md_file in markdown_files:
+                zipf.write(md_file, os.path.basename(md_file))
+                try: os.remove(md_file)
+                except: pass
         return zip_path, "True"