Spaces:

aseelflihan
/

SyncMaster

Sleeping

App Files Files Community

aseelflihan commited on Jul 1, 2025

Commit

126577b

verified ·

1 Parent(s): 9f7de57

Upload 15 files

Browse files

Files changed (15) hide show

.env +1 -0
.gitattributes +35 -35
.gitignore +12 -0
Dockerfile +34 -0
README.md +20 -0
app.py +386 -0
audio_processor.py +253 -0
mp3_embedder.py +323 -0
package-lock.json +12 -0
package.json +10 -0
packages.txt +5 -0
pyproject.toml +15 -0
requirements.txt +9 -0
utils.py +355 -0
video_generator.py +33 -0

.env ADDED Viewed

	@@ -0,0 +1 @@


1	+ GEMINI_API_KEY=AIzaSyAS7JtrXjlNjyuo3RG5z6rkwocCwFy1YuA

.gitattributes CHANGED Viewed

@@ -1,35 +1,35 @@
-*.7z filter=lfs diff=lfs merge=lfs -text
-*.arrow filter=lfs diff=lfs merge=lfs -text
-*.bin filter=lfs diff=lfs merge=lfs -text
-*.bz2 filter=lfs diff=lfs merge=lfs -text
-*.ckpt filter=lfs diff=lfs merge=lfs -text
-*.ftz filter=lfs diff=lfs merge=lfs -text
-*.gz filter=lfs diff=lfs merge=lfs -text
-*.h5 filter=lfs diff=lfs merge=lfs -text
-*.joblib filter=lfs diff=lfs merge=lfs -text
-*.lfs.* filter=lfs diff=lfs merge=lfs -text
-*.mlmodel filter=lfs diff=lfs merge=lfs -text
-*.model filter=lfs diff=lfs merge=lfs -text
-*.msgpack filter=lfs diff=lfs merge=lfs -text
-*.npy filter=lfs diff=lfs merge=lfs -text
-*.npz filter=lfs diff=lfs merge=lfs -text
-*.onnx filter=lfs diff=lfs merge=lfs -text
-*.ot filter=lfs diff=lfs merge=lfs -text
-*.parquet filter=lfs diff=lfs merge=lfs -text
-*.pb filter=lfs diff=lfs merge=lfs -text
-*.pickle filter=lfs diff=lfs merge=lfs -text
-*.pkl filter=lfs diff=lfs merge=lfs -text
-*.pt filter=lfs diff=lfs merge=lfs -text
-*.pth filter=lfs diff=lfs merge=lfs -text
-*.rar filter=lfs diff=lfs merge=lfs -text
-*.safetensors filter=lfs diff=lfs merge=lfs -text
-saved_model/**/* filter=lfs diff=lfs merge=lfs -text
-*.tar.* filter=lfs diff=lfs merge=lfs -text
-*.tar filter=lfs diff=lfs merge=lfs -text
-*.tflite filter=lfs diff=lfs merge=lfs -text
-*.tgz filter=lfs diff=lfs merge=lfs -text
-*.wasm filter=lfs diff=lfs merge=lfs -text
-*.xz filter=lfs diff=lfs merge=lfs -text
-*.zip filter=lfs diff=lfs merge=lfs -text
-*.zst filter=lfs diff=lfs merge=lfs -text
-*tfevents* filter=lfs diff=lfs merge=lfs -text

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,12 @@

+# Ignore environment files
+.env
+# Python
+__pycache__/
+*.py[cod]
+# Virtual environments
+.venv/
+# Other
+.DS_Store

Dockerfile ADDED Viewed

	@@ -0,0 +1,34 @@

+#
+# -- Dockerfile for Streamlit app --
+#
+# Base image
+FROM python:3.9-slim
+# Set working directory
+WORKDIR /app
+# Install system dependencies (including ffmpeg)
+RUN apt-get update && apt-get install -y \
+    build-essential \
+    ffmpeg \
+    && rm -rf /var/lib/apt/lists/*
+# Copy requirements file
+COPY requirements.txt ./requirements.txt
+# Install Python dependencies
+RUN pip install --no-cache-dir --upgrade pip
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy the entire app
+COPY . .
+# Expose the port that Streamlit runs on
+EXPOSE 8501
+# Add a health check
+HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
+# Command to run the app
+ENTRYPOINT ["streamlit", "run", "app.py", "--server.port=8501", "--server.address=0.0.0.0"]

README.md ADDED Viewed

	@@ -0,0 +1,20 @@

+---
+title: SyncMaster
+emoji: 🚀
+colorFrom: red
+colorTo: red
+sdk: docker
+app_port: 8501
+tags:
+- streamlit
+pinned: false
+short_description: in
+license: mit
+---
+# Welcome to Streamlit!
+Edit `/src/streamlit_app.py` to customize this app to your heart's desire. :heart:
+If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
+forums](https://discuss.streamlit.io).

app.py ADDED Viewed

	@@ -0,0 +1,386 @@

+import streamlit as st
+import os
+import tempfile
+import json
+from pathlib import Path
+import time
+import traceback
+import streamlit.components.v1 as components
+AUDIO_PROCESSOR_CLASS = None
+IMPORT_ERROR_TRACEBACK = None
+try:
+    from audio_processor import AudioProcessor
+    AUDIO_PROCESSOR_CLASS = AudioProcessor
+except Exception:
+    IMPORT_ERROR_TRACEBACK = traceback.format_exc()
+from video_generator import VideoGenerator
+from mp3_embedder import MP3Embedder
+from utils import format_timestamp, validate_audio_file, get_audio_info
+# Page configuration
+st.set_page_config(
+    page_title="SyncMaster - AI Audio-Text Synchronization",
+    page_icon="🎵",
+    layout="wide"
+)
+# --- Function to log messages to the browser console ---
+def log_to_browser_console(messages):
+    """Injects JavaScript to log messages to the browser's console."""
+    if isinstance(messages, str):
+        messages = [messages]
+    # Escape backticks, backslashes, and ${} to prevent breaking the template literal
+    escaped_messages = []
+    for msg in messages:
+        # Simple JSON stringification is a safe way to escape the string for JS
+        escaped_messages.append(json.dumps(msg))
+    js_code = f"""
+    <script>
+    (function() {{
+        const logs = [{', '.join(escaped_messages)}];
+        console.group("Backend Logs from SyncMaster");
+        logs.forEach(log => {{
+            if (typeof log === 'string' && log.startsWith('--- ERROR')) {{
+                console.error(log);
+            }} else if (typeof log === 'string' && log.startsWith('--- WARNING')) {{
+                console.warn(log);
+            }} else {{
+                console.log(log);
+            }}
+        }});
+        console.groupEnd();
+    }})();
+    </script>
+    """
+    components.html(js_code, height=0)
+# Initialize session state
+if 'step' not in st.session_state:
+    st.session_state.step = 1
+if 'audio_file' not in st.session_state:
+    st.session_state.audio_file = None
+if 'transcription_data' not in st.session_state:
+    st.session_state.transcription_data = None
+if 'edited_text' not in st.session_state:
+    st.session_state.edited_text = ""
+if 'video_style' not in st.session_state:
+    st.session_state.video_style = {
+        'animation_style': 'Karaoke Style',
+        'text_color': '#FFFFFF',
+        'highlight_color': '#FFD700',
+        'background_color': '#000000',
+        'font_family': 'Arial',
+        'font_size': 48
+    }
+if not hasattr(st, "divider"):
+    def _divider():
+        st.markdown("---")
+    st.divider = _divider
+# Patch st.button for Streamlit versions that don't support the 'type' argument (<=1.12)
+import inspect as _st_inspect
+if "type" not in _st_inspect.signature(st.button).parameters:
+    _orig_button = st.button
+    def _patched_button(label, *args, **kwargs):
+        kwargs.pop("type", None)
+        kwargs.pop("use_container_width", None)
+        return _orig_button(label, *args, **kwargs)
+    st.button = _patched_button
+if not hasattr(st, "rerun") and hasattr(st, "experimental_rerun"):
+    st.rerun = st.experimental_rerun
+if hasattr(st, "download_button"):
+    import inspect as _dl_inspect
+    _dl_sig = _dl_inspect.signature(st.download_button)
+    if "use_container_width" not in _dl_sig.parameters:
+        _orig_download_button = st.download_button
+        def _patched_download_button(label, data, *args, **kwargs):
+            kwargs.pop("use_container_width", None)
+            return _orig_download_button(label, data, *args, **kwargs)
+        st.download_button = _patched_download_button
+def main():
+    st.title("🎵 SyncMaster")
+    st.markdown("### The Intelligent Audio-Text Synchronization Platform")
+    st.markdown("Transform your audio files into mobile-compatible MP3s with synchronized lyrics and animated MP4 videos.")
+    col1, col2, col3 = st.columns(3)
+    with col1:
+        if st.session_state.step >= 1:
+            st.success("Step 1: Upload & Process")
+        else:
+            st.info("Step 1: Upload & Process")
+    with col2:
+        if st.session_state.step >= 2:
+            st.success("Step 2: Review & Customize")
+        elif st.session_state.step == 1:
+            st.info("Step 2: Review & Customize")
+    with col3:
+        if st.session_state.step >= 3:
+            st.success("Step 3: Export")
+        elif st.session_state.step >= 2:
+            st.info("Step 3: Export")
+    st.divider()
+    if AUDIO_PROCESSOR_CLASS is None:
+        st.error("فشل حاسم: لم يتمكن التطبيق من بدء التشغيل بشكل صحيح.")
+        st.subheader("حدث خطأ أثناء محاولة استيراد `AudioProcessor`:")
+        st.code(IMPORT_ERROR_TRACEBACK, language="python")
+        st.warning("السبب المحتمل: خطأ في الكود في ملف `audio_processor.py` أو مشكلة في الاتصال بـ Google Gemini.")
+        st.stop()
+    if st.session_state.step == 1:
+        step_1_upload_and_process()
+    elif st.session_state.step == 2:
+        step_2_review_and_customize()
+    elif st.session_state.step == 3:
+        step_3_export()
+def step_1_upload_and_process():
+    st.header("Step 1: Upload Your Audio File")
+    uploaded_file = st.file_uploader(
+        "Choose an audio file",
+        type=['mp3', 'wav', 'm4a'],
+        help="Supported formats: MP3, WAV, M4A"
+    )
+    if uploaded_file is not None:
+        st.session_state.audio_file = uploaded_file
+        st.success(f"File uploaded: {uploaded_file.name}")
+        st.info(f"File size: {uploaded_file.size / 1024 / 1024:.2f} MB")
+        st.audio(uploaded_file)
+        if st.button("🚀 Start AI Processing", type="primary", use_container_width=True):
+            process_audio()
+    if st.session_state.audio_file is not None:
+        if st.button("🔄 Upload Different File"):
+            reset_session()
+            st.rerun()
+def process_audio():
+    try:
+        with tempfile.NamedTemporaryFile(delete=False, suffix=Path(st.session_state.audio_file.name).suffix) as tmp_file:
+            tmp_file.write(st.session_state.audio_file.getvalue())
+            tmp_file_path = tmp_file.name
+        processor = AUDIO_PROCESSOR_CLASS()
+        with st.spinner("🎤 Transcribing audio with AI..."):
+            transcription_result = processor.transcribe_audio(tmp_file_path)
+        if "Error:" in transcription_result or not transcription_result:
+            st.error(f"Transcription failed: {transcription_result}")
+            os.unlink(tmp_file_path)
+            return
+        word_timestamps = []
+        if hasattr(processor, 'get_word_timestamps'):
+            try:
+                with st.spinner("🔍 Extracting word timestamps..."):
+                    word_timestamps = processor.get_word_timestamps(tmp_file_path)
+                # فحص محتوى word_timestamps وعرضه للمستخدم
+                st.write("word_timestamps sample:", word_timestamps[:3])
+                if not word_timestamps:
+                    st.warning("No word timestamps extracted! SYLT embedding will not work.")
+            except Exception as e:
+                st.warning(f"Could not extract word timestamps: {e}")
+        st.session_state.transcription_data = {
+            'text': transcription_result,
+            'word_timestamps': word_timestamps,
+            'audio_path': tmp_file_path
+        }
+        st.session_state.edited_text = transcription_result
+        st.session_state.step = 2
+        st.success("🎉 Audio processing complete! Moving to customization...")
+        time.sleep(1)
+        st.rerun()
+    except Exception as e:
+        st.error("An error occurred during processing!")
+        st.exception(e)
+        if 'tmp_file_path' in locals() and os.path.exists(tmp_file_path):
+            os.unlink(tmp_file_path)
+def step_2_review_and_customize():
+    st.header("Step 2: Review & Customize")
+    if st.session_state.transcription_data is None:
+        st.error("No transcription data found. Please go back to Step 1.")
+        if st.button("← Back to Step 1"):
+            st.session_state.step = 1
+            st.rerun()
+        return
+    col1, col2 = st.columns([3, 2])
+    with col1:
+        st.subheader("📝 Text Editor")
+        edited_text = st.text_area(
+            "Transcribed Text",
+            value=st.session_state.edited_text,
+            height=300
+        )
+        st.session_state.edited_text = edited_text
+        st.caption(f"Word count: {len(edited_text.split())}")
+    with col2:
+        st.subheader("🎨 Video Style Customization")
+        st.session_state.video_style['animation_style'] = st.selectbox("Animation Style", ["Karaoke Style", "Pop-up Word"])
+        st.session_state.video_style['text_color'] = st.color_picker("Text Color", st.session_state.video_style['text_color'])
+        st.session_state.video_style['highlight_color'] = st.color_picker("Highlight Color", st.session_state.video_style['highlight_color'])
+    col1, col2, col3 = st.columns([1, 2, 1])
+    with col1:
+        if st.button("← Back to Upload"):
+            st.session_state.step = 1
+            st.rerun()
+    with col3:
+        if st.button("Continue to Export →", type="primary"):
+            st.session_state.step = 3
+            st.rerun()
+def step_3_export():
+    st.header("Step 3: Export Your Synchronized Media")
+    if st.session_state.transcription_data is None:
+        st.error("No data found. Please go back to Step 1.")
+        if st.button("← Back to Step 1"):
+            st.session_state.step = 1
+            st.rerun()
+        return
+    col1, col2 = st.columns(2)
+    with col1:
+        st.subheader("🎵 MP3 Export")
+        st.markdown("Export MP3 with embedded synchronized lyrics (SYLT).")
+        if st.button("📱 Export MP3 with Lyrics", type="primary", use_container_width=True):
+            export_mp3()
+    with col2:
+        st.subheader("🎬 MP4 Video Export")
+        st.markdown("Create an animated video with synchronized text.")
+        if st.button("🎥 Generate Video Summary", type="primary", use_container_width=True):
+            export_mp4()
+    st.divider()
+    col1, col2, col3 = st.columns([1, 2, 1])
+    with col1:
+        if st.button("← Back to Customize"):
+            st.session_state.step = 2
+            st.rerun()
+    with col3:
+        if st.button("🔄 Start Over"):
+            reset_session()
+            st.rerun()
+def export_mp3():
+    """Export MP3 file and log diagnostics to the browser console and Streamlit UI."""
+    try:
+        with st.spinner("Embedding lyrics into MP3..."):
+            embedder = MP3Embedder()
+            word_timestamps = st.session_state.transcription_data['word_timestamps']
+            audio_path = st.session_state.transcription_data['audio_path']
+            output_filename = f"synced_{Path(st.session_state.audio_file.name).stem}.mp3"
+            st.info("🔄 بدء عملية دمج النصوص...")
+            output_path, log_messages = embedder.embed_sylt_lyrics(
+                audio_path,
+                word_timestamps,
+                st.session_state.edited_text,
+                output_filename
+            )
+            log_to_browser_console(log_messages)
+            # عرض الـ logs في Streamlit
+            st.subheader("📝 تفاصيل العملية:")
+            for log in log_messages:
+                if "ERROR" in log:
+                    st.error(log)
+                elif "WARNING" in log:
+                    st.warning(log)
+                else:
+                    st.info(log)
+        st.subheader("✅ Export Complete")
+        if os.path.exists(output_path):
+            with open(output_path, 'rb') as audio_file:
+                audio_bytes = audio_file.read()
+            st.audio(audio_bytes, format='audio/mp3')
+            # --- فحص التاغات بعد الدمج مباشرة ---
+            from mutagen.mp3 import MP3
+            from mutagen.id3 import ID3, SYLT, USLT
+            audio_file_obj = MP3(output_path, ID3=ID3)
+            sylt_frames = audio_file_obj.tags.getall('SYLT') if audio_file_obj.tags else []
+            uslt_frames = audio_file_obj.tags.getall('USLT') if audio_file_obj.tags else []
+            st.write(f"SYLT frames after export: {len(sylt_frames)}")
+            st.write(f"USLT frames after export: {len(uslt_frames)}")
+            if sylt_frames:
+                st.write("SYLT frame sample:", sylt_frames[0])
+            if uslt_frames:
+                st.write("USLT frame sample:", uslt_frames[0])
+            # --- نهاية الفحص ---
+            verification = embedder.verify_sylt_embedding(output_path)
+            st.json(verification)
+            if verification['has_sylt']:
+                st.success(f"Successfully embedded {verification['sylt_entries']} synchronized words!")
+            else:
+                st.warning("Warning: Could not verify SYLT embedding. The lyrics may not be synchronized.")
+            st.download_button(
+                label="Download Synced MP3",
+                data=audio_bytes,
+                file_name=output_filename,
+                mime="audio/mpeg",
+                use_container_width=True
+            )
+        else:
+            st.error("Failed to create the MP3 file. Check the browser console for logs.")
+    except Exception as e:
+        st.error(f"An error occurred during MP3 export: {e}")
+        log_to_browser_console([f"--- FATAL ERROR in export_mp3: {traceback.format_exc()} ---"])
+def export_mp4():
+    st.info("MP4 export functionality is not yet implemented with console logging.")
+def get_audio_duration_seconds(audio_path: str) -> float:
+    try:
+        audio_info = get_audio_info(audio_path)
+        return audio_info.get('duration', 0)
+    except:
+        return 0
+def get_audio_duration_formatted(audio_path: str) -> str:
+    duration = get_audio_duration_seconds(audio_path)
+    minutes = int(duration // 60)
+    seconds = int(duration % 60)
+    return f"{minutes}:{seconds:02d}"
+def reset_session():
+    for key in list(st.session_state.keys()):
+        if key not in ['step']:
+            del st.session_state[key]
+    st.session_state.step = 1
+    st.session_state.audio_file = None
+    st.session_state.transcription_data = None
+    st.session_state.edited_text = ""
+    st.session_state.video_style = {
+        'animation_style': 'Karaoke Style',
+        'text_color': '#FFFFFF',
+        'highlight_color': '#FFD700',
+        'background_color': '#000000',
+        'font_family': 'Arial',
+        'font_size': 48
+    }
+if __name__ == "__main__":
+    main()

audio_processor.py ADDED Viewed

	@@ -0,0 +1,253 @@

+import os
+from dotenv import load_dotenv
+import tempfile
+from typing import List, Dict, Optional
+import json
+import librosa
+import numpy as np
+from google import genai
+from google.genai import types
+class AudioProcessor:
+    """Handles audio transcription and word-level timestamp extraction using Gemini AI"""
+    def __init__(self):
+        """Initialize the audio processor with Gemini client"""
+        self.client = None
+        self._initialize_gemini()
+    def _initialize_gemini(self):
+        """Initialize the Gemini client"""
+        try:
+            # Load environment variables from a .env file if present
+            load_dotenv()
+            # Obtain API key from environment variables
+            api_key = os.getenv("GEMINI_API_KEY")
+            if not api_key:
+                raise ValueError("GEMINI_API_KEY not found in environment variables. Please set it in a .env file.")
+            self.client = genai.Client(api_key=api_key)
+        except Exception as e:
+            print(f"Warning: Failed to initialize Gemini client: {str(e)}")
+            self.client = None
+    def transcribe_audio(self, audio_file_path: str) -> Optional[str]:
+        """
+        Transcribe audio file to text using Gemini AI
+        Args:
+            audio_file_path: Path to the audio file
+        Returns:
+            Transcribed text or None if failed
+        """
+        try:
+            if not os.path.exists(audio_file_path):
+                raise FileNotFoundError(f"Audio file not found: {audio_file_path}")
+            if not self.client:
+                # Fallback to sample text if Gemini is not available
+                return "Please edit this text to match your audio content. Gemini transcription is not available."
+            # Read audio file as bytes
+            with open(audio_file_path, 'rb') as f:
+                audio_bytes = f.read()
+            # Determine MIME type based on file extension
+            file_ext = os.path.splitext(audio_file_path)[1].lower()
+            mime_type_map = {
+                '.mp3': 'audio/mpeg',
+                '.wav': 'audio/wav',
+                '.m4a': 'audio/mp4',
+                '.flac': 'audio/flac',
+                '.ogg': 'audio/ogg'
+            }
+            mime_type = mime_type_map.get(file_ext, 'audio/mpeg')
+            # Transcribe with Gemini
+            response = self.client.models.generate_content(
+                model="gemini-2.5-flash",
+                contents=[
+                    types.Part.from_bytes(
+                        data=audio_bytes,
+                        mime_type=mime_type,
+                    ),
+                    "Please transcribe this audio file accurately. Provide only the spoken text without any additional commentary, formatting, or explanations. Just return the pure transcribed text."
+                ],
+            )
+            if response and response.text:
+                return response.text.strip()
+            else:
+                return "Please edit this text to match your audio content. Transcription failed."
+        except Exception as e:
+            print(f"Error transcribing audio: {str(e)}")
+            return "Please edit this text to match your audio content. An error occurred during transcription."
+    def get_word_timestamps(self, audio_file_path: str) -> List[Dict]:
+        """
+        Create word-level timestamps from transcribed text and audio duration
+        Args:
+            audio_file_path: Path to the audio file
+        Returns:
+            List of dictionaries with word, start, and end timestamps
+        """
+        try:
+            if not os.path.exists(audio_file_path):
+                raise FileNotFoundError(f"Audio file not found: {audio_file_path}")
+            # First get the transcription
+            transcription = self.transcribe_audio(audio_file_path)
+            if not transcription:
+                return []
+            # Get audio duration
+            audio_duration = self.get_audio_duration(audio_file_path)
+            if audio_duration <= 0:
+                return []
+            # Split transcription into words
+            words = transcription.split()
+            if not words:
+                return []
+            # Calculate timing for each word
+            word_timestamps = []
+            total_words = len(words)
+            for i, word in enumerate(words):
+                # Distribute words evenly across the audio duration
+                # Leave some silence at the beginning and end
+                start_offset = 0.5  # 0.5 seconds at start
+                end_offset = 0.5    # 0.5 seconds at end
+                usable_duration = audio_duration - start_offset - end_offset
+                if total_words == 1:
+                    start_time = start_offset
+                    end_time = audio_duration - end_offset
+                else:
+                    # Calculate word timing
+                    word_duration = usable_duration / total_words
+                    start_time = start_offset + (i * word_duration)
+                    end_time = start_offset + ((i + 1) * word_duration)
+                # Add some variation to make it more natural
+                if i > 0:
+                    # Small gap between words
+                    start_time += 0.05
+                word_data = {
+                    'word': word.strip(),
+                    'start': round(start_time, 3),
+                    'end': round(end_time, 3)
+                }
+                word_timestamps.append(word_data)
+            return word_timestamps
+        except Exception as e:
+            print(f"Error creating word timestamps: {str(e)}")
+            return []
+    def get_audio_duration(self, audio_file_path: str) -> float:
+        """
+        Get the duration of the audio file in seconds
+        Args:
+            audio_file_path: Path to the audio file
+        Returns:
+            Duration in seconds
+        """
+        try:
+            audio_data, sample_rate = librosa.load(audio_file_path)
+            duration = len(audio_data) / sample_rate
+            return duration
+        except Exception as e:
+            print(f"Error getting audio duration: {str(e)}")
+            return 0.0
+    def validate_timestamps(self, word_timestamps: List[Dict], audio_duration: float) -> List[Dict]:
+        """
+        Validate and clean word timestamps
+        Args:
+            word_timestamps: List of word timestamp dictionaries
+            audio_duration: Total duration of audio in seconds
+        Returns:
+            Cleaned list of word timestamps
+        """
+        cleaned_timestamps = []
+        for word_data in word_timestamps:
+            # Ensure start and end times are valid
+            start_time = max(0, word_data.get('start', 0))
+            end_time = min(audio_duration, word_data.get('end', start_time + 0.1))
+            # Ensure end time is after start time
+            if end_time <= start_time:
+                end_time = start_time + 0.1
+            cleaned_word = {
+                'word': word_data.get('word', '').strip(),
+                'start': round(start_time, 3),
+                'end': round(end_time, 3)
+            }
+            if cleaned_word['word']:
+                cleaned_timestamps.append(cleaned_word)
+        return cleaned_timestamps
+    def create_sentence_timestamps(self, word_timestamps: List[Dict], max_words_per_line: int = 8) -> List[Dict]:
+        """
+        Group words into sentences/lines for better video display
+        Args:
+            word_timestamps: List of word timestamp dictionaries
+            max_words_per_line: Maximum words per line
+        Returns:
+            List of sentence/line dictionaries with timestamps
+        """
+        if not word_timestamps:
+            return []
+        sentences = []
+        current_sentence = []
+        for word_data in word_timestamps:
+            current_sentence.append(word_data)
+            # Check if we should end this sentence
+            word = word_data.get('word', '')
+            if (len(current_sentence) >= max_words_per_line or
+                word.endswith('.') or word.endswith('!') or word.endswith('?')):
+                if current_sentence:
+                    sentence_data = {
+                        'text': ' '.join([w.get('word', '') for w in current_sentence]).strip(),
+                        'start': current_sentence[0].get('start', 0),
+                        'end': current_sentence[-1].get('end', 0),
+                        'words': current_sentence.copy()
+                    }
+                    sentences.append(sentence_data)
+                    current_sentence = []
+        # Add remaining words as final sentence
+        if current_sentence:
+            sentence_data = {
+                'text': ' '.join([w.get('word', '') for w in current_sentence]).strip(),
+                'start': current_sentence[0].get('start', 0),
+                'end': current_sentence[-1].get('end', 0),
+                'words': current_sentence.copy()
+            }
+            sentences.append(sentence_data)
+        return sentences

mp3_embedder.py ADDED Viewed

	@@ -0,0 +1,323 @@

+from mutagen.mp3 import MP3
+from mutagen.id3 import ID3, SYLT, USLT, Encoding
+import os
+import tempfile
+import shutil
+import subprocess
+from typing import List, Dict, Tuple
+# --- Helper function to check for ffmpeg ---
+def is_ffmpeg_available():
+    """Check if ffmpeg is installed and accessible in the system's PATH."""
+    return shutil.which("ffmpeg") is not None
+class MP3Embedder:
+    """Handles embedding SYLT synchronized lyrics into MP3 files with robust error handling."""
+    def __init__(self):
+        """Initialize the MP3 embedder."""
+        self.temp_dir = "/tmp/audio_sync"
+        os.makedirs(self.temp_dir, exist_ok=True)
+        self.ffmpeg_available = is_ffmpeg_available()
+    def embed_sylt_lyrics(self, audio_path: str, word_timestamps: List[Dict],
+                         text: str, output_filename: str) -> Tuple[str, List[str]]:
+        """
+        Embeds SYLT synchronized lyrics into an MP3 file and returns logs.
+        Returns:
+            A tuple containing:
+            - The path to the output MP3 file.
+            - A list of log messages detailing the process.
+        """
+        log_messages = []
+        def log_and_print(message):
+            log_messages.append(message)
+            print(f"MP3_EMBEDDER: {message}")
+        log_and_print(f"--- MP3Embedder initialized. ffmpeg available: {self.ffmpeg_available} ---")
+        log_and_print(f"--- Starting SYLT embedding for: {os.path.basename(audio_path)} ---")
+        output_path = os.path.join(self.temp_dir, output_filename)
+        try:
+            # --- Step 1: Ensure the file is in MP3 format ---
+            if not audio_path.lower().endswith('.mp3'):
+                if self.ffmpeg_available:
+                    log_and_print(f"'{os.path.basename(audio_path)}' is not an MP3. Converting with ffmpeg...")
+                    try:
+                        subprocess.run(
+                            ['ffmpeg', '-i', audio_path, '-codec:a', 'libmp3lame', '-q:a', '2', output_path],
+                            check=True, capture_output=True, text=True
+                        )
+                        log_and_print("--- ffmpeg conversion successful. ---")
+                    except subprocess.CalledProcessError as e:
+                        log_and_print("--- ERROR: ffmpeg conversion failed. ---")
+                        log_and_print(f"--- ffmpeg stderr: {e.stderr} ---")
+                        log_and_print("--- Fallback: Copying original file without conversion. ---")
+                        shutil.copy2(audio_path, output_path)
+                else:
+                    log_and_print("--- WARNING: ffmpeg is not available. Cannot convert non-MP3 file. Copying directly. ---")
+                    shutil.copy2(audio_path, output_path)
+            else:
+                log_and_print("--- Audio is already MP3. Copying to temporary location. ---")
+                shutil.copy2(audio_path, output_path)
+            # --- Step 2: Create SYLT data ---
+            log_and_print("--- Creating SYLT data from timestamps... ---")
+            sylt_data = self._create_sylt_data(word_timestamps)
+            if not sylt_data:
+                log_and_print("--- WARNING: No SYLT data could be created. Skipping embedding. ---")
+                return output_path, log_messages
+            log_and_print(f"--- Created {len(sylt_data)} SYLT entries. ---")
+            # --- Step 3: Embed data into the MP3 file ---
+            try:
+                log_and_print("--- Loading MP3 file with mutagen... ---")
+                audio_file = MP3(output_path, ID3=ID3)
+                if audio_file.tags is None:
+                    log_and_print("--- No ID3 tags found. Creating new ones. ---")
+                    audio_file.add_tags()
+                # --- Embed SYLT (Synchronized Lyrics) ---
+                log_and_print("--- Creating and adding SYLT frame... ---")
+                sylt_frame = SYLT(
+                    encoding=Encoding.UTF8,
+                    lang='eng',
+                    format=2,
+                    type=1,
+                    text=sylt_data
+                )
+                audio_file.tags.delall('SYLT')
+                audio_file.tags.add(sylt_frame)
+                # --- Embed USLT (Unsynchronized Lyrics) as a fallback ---
+                log_and_print("--- Creating and adding USLT frame... ---")
+                uslt_frame = USLT(
+                    encoding=Encoding.UTF8,
+                    lang='eng',
+                    desc='',
+                    text=text
+                )
+                audio_file.tags.delall('USLT')
+                audio_file.tags.add(uslt_frame)
+                audio_file.save()
+                log_and_print("--- Successfully embedded SYLT and USLT frames. ---")
+            except Exception as e:
+                log_and_print(f"--- ERROR: Failed to embed SYLT/USLT: {e} ---")
+            return output_path, log_messages
+        except Exception as e:
+            log_and_print(f"--- ERROR: Unexpected error in embed_sylt_lyrics: {e} ---")
+            return output_path, log_messages
+    def _create_sylt_data(self, word_timestamps: List[Dict]) -> List[tuple]:
+        """
+        Create SYLT data format from word timestamps
+        Args:
+            word_timestamps: List of word timestamp dictionaries
+        Returns:
+            List of tuples (text, timestamp_in_milliseconds)
+        """
+        # Debug print to check incoming data
+        print(f"DEBUG: word_timestamps received in _create_sylt_data: {word_timestamps}")
+        try:
+            sylt_data = []
+            for word_data in word_timestamps:
+                word = word_data.get('word', '').strip()
+                start_time = word_data.get('start', 0)
+                if word:
+                    # Convert seconds to milliseconds
+                    timestamp_ms = int(start_time * 1000)
+                    sylt_data.append((word, timestamp_ms))
+            return sylt_data
+        except Exception as e:
+            print(f"Error creating SYLT data: {str(e)}")
+            return []
+    def _create_line_based_sylt_data(self, word_timestamps: List[Dict], max_words_per_line: int = 6) -> List[tuple]:
+        """
+        Create line-based SYLT data (alternative approach)
+        Args:
+            word_timestamps: List of word timestamp dictionaries
+            max_words_per_line: Maximum words per line
+        Returns:
+            List of tuples (line_text, timestamp_in_milliseconds)
+        """
+        try:
+            sylt_data = []
+            current_line = []
+            for word_data in word_timestamps:
+                current_line.append(word_data)
+                # Check if we should end this line
+                if len(current_line) >= max_words_per_line:
+                    if current_line:
+                        line_text = ' '.join([w.get('word', '') for w in current_line]).strip()
+                        start_time = current_line[0].get('start', 0)
+                        timestamp_ms = int(start_time * 1000)
+                        if line_text:
+                            sylt_data.append((line_text, timestamp_ms))
+                        current_line = []
+            # Add remaining words as final line
+            if current_line:
+                line_text = ' '.join([w.get('word', '') for w in current_line]).strip()
+                start_time = current_line[0].get('start', 0)
+                timestamp_ms = int(start_time * 1000)
+                if line_text:
+                    sylt_data.append((line_text, timestamp_ms))
+            return sylt_data
+        except Exception as e:
+            print(f"Error creating line-based SYLT data: {str(e)}")
+            return []
+    def verify_sylt_embedding(self, mp3_path: str) -> Dict:
+        """
+        Verify that SYLT lyrics are properly embedded
+        Args:
+            mp3_path: Path to the MP3 file
+        Returns:
+            Dictionary with verification results
+        """
+        try:
+            audio_file = MP3(mp3_path)
+            result = {
+                'has_sylt': False,
+                'has_uslt': False,
+                'sylt_entries': 0,
+                'error': None
+            }
+            if audio_file.tags:
+                # Check for SYLT
+                sylt_frames = audio_file.tags.getall('SYLT')
+                if sylt_frames:
+                    result['has_sylt'] = True
+                    result['sylt_entries'] = len(sylt_frames[0].text) if sylt_frames[0].text else 0
+                # Check for USLT (fallback)
+                uslt_frames = audio_file.tags.getall('USLT')
+                if uslt_frames:
+                    result['has_uslt'] = True
+            return result
+        except Exception as e:
+            return {
+                'has_sylt': False,
+                'has_uslt': False,
+                'sylt_entries': 0,
+                'error': str(e)
+            }
+    def extract_sylt_lyrics(self, mp3_path: str) -> List[Dict]:
+        """
+        Extract SYLT lyrics from an MP3 file (for debugging)
+        Args:
+            mp3_path: Path to the MP3 file
+        Returns:
+            List of dictionaries with text and timestamp
+        """
+        try:
+            audio_file = MP3(mp3_path)
+            lyrics_data = []
+            if audio_file.tags:
+                sylt_frames = audio_file.tags.getall('SYLT')
+                for frame in sylt_frames:
+                    if frame.text:
+                        for text, timestamp_ms in frame.text:
+                            lyrics_data.append({
+                                'text': text,
+                                'timestamp': timestamp_ms / 1000.0  # Convert to seconds
+                            })
+            return lyrics_data
+        except Exception as e:
+            print(f"Error extracting SYLT lyrics: {str(e)}")
+            return []
+    def create_lrc_file(self, word_timestamps: List[Dict], output_path: str) -> str:
+        """
+        Create an LRC (lyrics) file as an additional export option
+        Args:
+            word_timestamps: List of word timestamp dictionaries
+            output_path: Path for the output LRC file
+        Returns:
+            Path to the created LRC file
+        """
+        try:
+            lrc_lines = []
+            # Group words into lines
+            current_line = []
+            for word_data in word_timestamps:
+                current_line.append(word_data)
+                if len(current_line) >= 8:  # 8 words per line
+                    if current_line:
+                        line_text = ' '.join([w.get('word', '') for w in current_line])
+                        start_time = current_line[0].get('start', 0)
+                        # Format timestamp as [mm:ss.xx]
+                        minutes = int(start_time // 60)
+                        seconds = start_time % 60
+                        timestamp_str = f"[{minutes:02d}:{seconds:05.2f}]"
+                        lrc_lines.append(f"{timestamp_str}{line_text}")
+                        current_line = []
+            # Add remaining words
+            if current_line:
+                line_text = ' '.join([w.get('word', '') for w in current_line])
+                start_time = current_line[0].get('start', 0)
+                minutes = int(start_time // 60)
+                seconds = start_time % 60
+                timestamp_str = f"[{minutes:02d}:{seconds:05.2f}]"
+                lrc_lines.append(f"{timestamp_str}{line_text}")
+            # Write LRC file
+            with open(output_path, 'w', encoding='utf-8') as f:
+                f.write('\n'.join(lrc_lines))
+            return output_path
+        except Exception as e:
+            raise Exception(f"Error creating LRC file: {str(e)}")
+    def __del__(self):
+        """Clean up temporary files"""
+        import shutil
+        if hasattr(self, 'temp_dir') and os.path.exists(self.temp_dir):
+            try:
+                shutil.rmtree(self.temp_dir)
+            except:
+                pass

package-lock.json ADDED Viewed

	@@ -0,0 +1,12 @@

+{
+  "name": "syncmaster",
+  "version": "0.1.0",
+  "lockfileVersion": 3,
+  "requires": true,
+  "packages": {
+    "": {
+      "name": "syncmaster",
+      "version": "0.1.0"
+    }
+  }
+}

package.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+  "name": "syncmaster",
+  "version": "0.1.0",
+  "private": true,
+  "description": "AI Audio-Text Synchronization Platform – convenience wrapper for Streamlit dev server",
+"scripts": {
+  "dev": "streamlit run app.py --server.port 5050 --server.address localhost"
+}
+}

packages.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+ffmpeg
+libavcodec-extra
+libavformat-dev
+libavutil-dev
+libmp3lame0

pyproject.toml ADDED Viewed

	@@ -0,0 +1,15 @@

+[project]
+name = "repl-nix-workspace"
+version = "0.1.0"
+description = "Add your description here"
+requires-python = ">=3.11"
+dependencies = [
+    "google-genai>=1.23.0",
+    "librosa>=0.11.0",
+    "moviepy>=2.2.1",
+    "mutagen>=1.47.0",
+    "numpy>=2.2.6",
+    "openai>=1.93.0",
+    "sift-stack-py>=0.7.0",
+    "streamlit>=1.46.1",
+]

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+google-genai==1.23.0
+librosa==0.11.0
+moviepy==2.2.1
+mutagen==1.47.0
+numpy==1.26.4
+openai==1.93.0
+streamlit==1.39.0
+altair==5.0.1
+python-dotenv==1.0.1

utils.py ADDED Viewed

	@@ -0,0 +1,355 @@

+import os
+import mimetypes
+import tempfile
+from pathlib import Path
+from typing import Optional, List, Dict
+import librosa
+import numpy as np
+def format_timestamp(seconds: float) -> str:
+    """
+    Format seconds into MM:SS.mmm format
+    Args:
+        seconds: Time in seconds
+    Returns:
+        Formatted timestamp string
+    """
+    minutes = int(seconds // 60)
+    remaining_seconds = seconds % 60
+    return f"{minutes:02d}:{remaining_seconds:06.3f}"
+def validate_audio_file(file_path: str) -> bool:
+    """
+    Validate if the file is a supported audio format
+    Args:
+        file_path: Path to the audio file
+    Returns:
+        True if valid, False otherwise
+    """
+    try:
+        if not os.path.exists(file_path):
+            return False
+        # Check file extension
+        supported_extensions = ['.mp3', '.wav', '.m4a', '.flac', '.ogg']
+        file_extension = Path(file_path).suffix.lower()
+        if file_extension not in supported_extensions:
+            return False
+        # Check MIME type
+        mime_type, _ = mimetypes.guess_type(file_path)
+        if mime_type and not mime_type.startswith('audio/'):
+            return False
+        # Try to load with librosa to verify it's a valid audio file
+        try:
+            librosa.load(file_path, duration=1.0)  # Load just 1 second for validation
+            return True
+        except:
+            return False
+    except Exception:
+        return False
+def get_audio_info(file_path: str) -> Dict:
+    """
+    Get information about the audio file
+    Args:
+        file_path: Path to the audio file
+    Returns:
+        Dictionary with audio information
+    """
+    try:
+        # Load audio file
+        y, sr = librosa.load(file_path)
+        duration = len(y) / sr
+        return {
+            'duration': duration,
+            'sample_rate': sr,
+            'channels': 1 if len(y.shape) == 1 else y.shape[0],
+            'file_size': os.path.getsize(file_path),
+            'format': Path(file_path).suffix.lower()
+        }
+    except Exception as e:
+        return {
+            'error': str(e),
+            'duration': 0,
+            'sample_rate': 0,
+            'channels': 0,
+            'file_size': 0,
+            'format': 'unknown'
+        }
+def clean_text(text: str) -> str:
+    """
+    Clean and normalize text for better processing
+    Args:
+        text: Input text
+    Returns:
+        Cleaned text
+    """
+    if not text:
+        return ""
+    # Remove extra whitespace
+    text = ' '.join(text.split())
+    # Remove common transcription artifacts
+    text = text.replace('[Music]', '')
+    text = text.replace('[Applause]', '')
+    text = text.replace('[Laughter]', '')
+    text = text.replace('(Music)', '')
+    text = text.replace('(Applause)', '')
+    text = text.replace('(Laughter)', '')
+    # Clean up extra spaces
+    text = ' '.join(text.split())
+    return text.strip()
+def split_text_into_chunks(text: str, max_chars_per_chunk: int = 100) -> List[str]:
+    """
+    Split text into chunks suitable for video display
+    Args:
+        text: Input text
+        max_chars_per_chunk: Maximum characters per chunk
+    Returns:
+        List of text chunks
+    """
+    if not text:
+        return []
+    words = text.split()
+    chunks = []
+    current_chunk = []
+    current_length = 0
+    for word in words:
+        word_length = len(word) + 1  # +1 for space
+        if current_length + word_length > max_chars_per_chunk and current_chunk:
+            # Add current chunk and start new one
+            chunks.append(' '.join(current_chunk))
+            current_chunk = [word]
+            current_length = len(word)
+        else:
+            current_chunk.append(word)
+            current_length += word_length
+    # Add final chunk
+    if current_chunk:
+        chunks.append(' '.join(current_chunk))
+    return chunks
+def convert_color_hex_to_rgb(hex_color: str) -> tuple:
+    """
+    Convert hex color to RGB tuple
+    Args:
+        hex_color: Hex color string (e.g., '#FF0000')
+    Returns:
+        RGB tuple (r, g, b)
+    """
+    hex_color = hex_color.lstrip('#')
+    if len(hex_color) != 6:
+        return (255, 255, 255)  # Default to white
+    try:
+        r = int(hex_color[0:2], 16)
+        g = int(hex_color[2:4], 16)
+        b = int(hex_color[4:6], 16)
+        return (r, g, b)
+    except ValueError:
+        return (255, 255, 255)  # Default to white
+def convert_rgb_to_hex(r: int, g: int, b: int) -> str:
+    """
+    Convert RGB values to hex color string
+    Args:
+        r, g, b: RGB color values (0-255)
+    Returns:
+        Hex color string
+    """
+    return f"#{r:02x}{g:02x}{b:02x}"
+def estimate_video_file_size(duration: float, resolution: tuple = (1280, 720),
+                           bitrate_kbps: int = 2000) -> int:
+    """
+    Estimate the file size of a video based on duration and quality
+    Args:
+        duration: Video duration in seconds
+        resolution: Video resolution tuple (width, height)
+        bitrate_kbps: Video bitrate in kbps
+    Returns:
+        Estimated file size in bytes
+    """
+    # Simple estimation: bitrate * duration / 8 (to convert bits to bytes)
+    estimated_size = (bitrate_kbps * 1000 * duration) / 8
+    return int(estimated_size)
+def create_safe_filename(filename: str) -> str:
+    """
+    Create a safe filename by removing/replacing invalid characters
+    Args:
+        filename: Original filename
+    Returns:
+        Safe filename
+    """
+    import re
+    # Remove or replace invalid characters
+    safe_filename = re.sub(r'[<>:"/\\|?*]', '_', filename)
+    # Remove extra underscores and spaces
+    safe_filename = re.sub(r'[_\s]+', '_', safe_filename)
+    # Trim leading/trailing underscores
+    safe_filename = safe_filename.strip('_')
+    # Ensure filename is not empty
+    if not safe_filename:
+        safe_filename = "output"
+    return safe_filename
+def format_file_size(size_bytes: int) -> str:
+    """
+    Format file size in human-readable format
+    Args:
+        size_bytes: File size in bytes
+    Returns:
+        Formatted file size string
+    """
+    if size_bytes == 0:
+        return "0 B"
+    size_names = ["B", "KB", "MB", "GB"]
+    i = int(np.floor(np.log(size_bytes) / np.log(1024)))
+    p = np.power(1024, i)
+    s = round(size_bytes / p, 2)
+    return f"{s} {size_names[i]}"
+def validate_word_timestamps(word_timestamps: List[Dict]) -> List[Dict]:
+    """
+    Validate and clean word timestamps data
+    Args:
+        word_timestamps: List of word timestamp dictionaries
+    Returns:
+        Cleaned and validated word timestamps
+    """
+    validated_timestamps = []
+    for word_data in word_timestamps:
+        # Ensure required fields exist
+        if not isinstance(word_data, dict):
+            continue
+        word = word_data.get('word', '').strip()
+        start = word_data.get('start', 0)
+        end = word_data.get('end', 0)
+        # Skip empty words
+        if not word:
+            continue
+        # Ensure numeric timestamps
+        try:
+            start = float(start)
+            end = float(end)
+        except (ValueError, TypeError):
+            continue
+        # Ensure logical timestamp order
+        if start < 0:
+            start = 0
+        if end <= start:
+            end = start + 0.1  # Minimum duration
+        validated_timestamps.append({
+            'word': word,
+            'start': round(start, 3),
+            'end': round(end, 3)
+        })
+    return validated_timestamps
+def merge_overlapping_timestamps(word_timestamps: List[Dict],
+                               overlap_threshold: float = 0.05) -> List[Dict]:
+    """
+    Merge overlapping or very close word timestamps
+    Args:
+        word_timestamps: List of word timestamp dictionaries
+        overlap_threshold: Threshold for merging close timestamps (seconds)
+    Returns:
+        List with merged timestamps
+    """
+    if not word_timestamps:
+        return []
+    merged_timestamps = []
+    current_group = [word_timestamps[0]]
+    for word_data in word_timestamps[1:]:
+        last_end = current_group[-1]['end']
+        current_start = word_data['start']
+        # Check if words should be merged
+        if current_start - last_end <= overlap_threshold:
+            current_group.append(word_data)
+        else:
+            # Merge current group and start new one
+            if len(current_group) == 1:
+                merged_timestamps.append(current_group[0])
+            else:
+                # Merge multiple words
+                merged_word = {
+                    'word': ' '.join([w['word'] for w in current_group]),
+                    'start': current_group[0]['start'],
+                    'end': current_group[-1]['end']
+                }
+                merged_timestamps.append(merged_word)
+            current_group = [word_data]
+    # Handle final group
+    if len(current_group) == 1:
+        merged_timestamps.append(current_group[0])
+    else:
+        merged_word = {
+            'word': ' '.join([w['word'] for w in current_group]),
+            'start': current_group[0]['start'],
+            'end': current_group[-1]['end']
+        }
+        merged_timestamps.append(merged_word)
+    return merged_timestamps

video_generator.py ADDED Viewed

	@@ -0,0 +1,33 @@

+# START OF video_generator.py
+import os
+import tempfile
+import shutil
+from typing import List, Dict
+class VideoGenerator:
+    """A simplified and safe video generator."""
+    def __init__(self):
+        self.temp_dir = tempfile.mkdtemp()
+    def create_synchronized_video(self, audio_path: str, word_timestamps: List[Dict],
+                                text: str, style_config: Dict, output_filename: str) -> str:
+        """
+        This is a fallback function. Instead of creating a video,
+        it copies the audio file to a .m4a format to indicate a processed file.
+        This avoids using ffmpeg and external fonts, which can cause errors.
+        """
+        try:
+            # The safest operation is to just provide the audio back in a different format
+            output_path = os.path.join(self.temp_dir, output_filename.replace('.mp4', '.m4a'))
+            shutil.copy2(audio_path, output_path)
+            print(f"Fallback successful: Created audio file at {output_path}")
+            return output_path
+        except Exception as e:
+            print(f"Critical error in fallback video generation: {e}")
+            raise
+    def __del__(self):
+        if hasattr(self, 'temp_dir') and os.path.exists(self.temp_dir):
+            shutil.rmtree(self.temp_dir, ignore_errors=True)
+# END OF video_generator.py