insta-maker-3-api

Runtime error

App Files Files Community

hivecorp commited on Mar 19, 2025

Commit

c2e3966

verified ·

1 Parent(s): 3b10a63

Update app.py

Browse files

Files changed (1) hide show

app.py +105 -132

app.py CHANGED Viewed

@@ -6,8 +6,9 @@ import asyncio
 import uuid
 import re
 from concurrent.futures import ThreadPoolExecutor
-from typing import List, Tuple
 import math
 class TimingManager:
     def __init__(self):
@@ -30,164 +31,136 @@ def format_time_ms(milliseconds):
     hrs, mins = divmod(mins, 60)
     return f"{hrs:02}:{mins:02}:{secs:02},{ms:03}"
-def smart_text_split(text, words_per_line, lines_per_segment):
-    # Define natural break patterns
-    end_sentence = r'[.!?]+'
-    mid_sentence = r'[,;:]+'
-    # First split by major punctuation
-    sentences = []
-    current = ""
-    # Clean the text and ensure proper spacing after punctuation
-    text = re.sub(r'([.!?,;:])\s*', r'\1 ', text).strip()
-    # Split into initial chunks by strong punctuation
-    chunks = re.split(f'({end_sentence})', text)
-    temp_sentences = []
-    for i in range(0, len(chunks)-1, 2):
-        if i+1 < len(chunks):
-            temp_sentences.append(chunks[i] + chunks[i+1])
-        else:
-            temp_sentences.append(chunks[i])
-    # Further process each sentence
-    for sentence in temp_sentences:
-        # Split by mid-sentence punctuation if sentence is too long
-        if len(sentence.split()) > words_per_line * 2:
-            sub_chunks = re.split(f'({mid_sentence})', sentence)
-            for i in range(0, len(sub_chunks)-1, 2):
-                if i+1 < len(sub_chunks):
-                    sentences.append(sub_chunks[i] + sub_chunks[i+1])
-                else:
-                    sentences.append(sub_chunks[i])
-        else:
-            sentences.append(sentence)
-    # Process sentences into lines and segments
-    segments = []
-    current_segment = []
-    current_line = []
-    for sentence in sentences:
-        words = sentence.strip().split()
-        while words:
-            # Determine natural break point
-            break_point = min(words_per_line, len(words))
-            # Look for natural breaks
-            for i in range(break_point-1, 0, -1):
-                if any(words[i-1].endswith(p) for p in '.!?,;:') or \
-                   any(words[i].startswith(p) for p in '([{'):
-                    break_point = i
-                    break
-            current_line = words[:break_point]
-            words = words[break_point:]
-            current_segment.append(' '.join(current_line))
-            if len(current_segment) >= lines_per_segment:
-                segments.append('\n'.join(current_segment))
-                current_segment = []
-    # Handle remaining content
-    if current_segment:
-        segments.append('\n'.join(current_segment))
-    return segments
-async def process_segment(segment: str, idx: int, voice: str, rate: str, pitch: str, timing_mgr: TimingManager) -> Tuple[str, AudioSegment]:
-    """Process a single segment with accurate timing"""
-    audio_file = f"temp_segment_{idx}_{uuid.uuid4()}.wav"
     try:
-        tts = edge_tts.Communicate(segment, voice, rate=rate, pitch=pitch)
         await tts.save(audio_file)
-        segment_audio = AudioSegment.from_file(audio_file)
-        segment_duration = len(segment_audio)
-        # Get timing from manager
-        start_time, end_time = timing_mgr.get_timing(segment_duration)
-        # Format SRT entry
-        srt_content = (
-            f"{idx}\n"
-            f"{format_time_ms(start_time)} --> {format_time_ms(end_time)}\n"
-            f"{segment}\n\n"
-        )
-        return srt_content, segment_audio
     finally:
         if os.path.exists(audio_file):
             os.remove(audio_file)
-async def process_chunk_parallel(chunks: List[str], start_idx: int, voice: str, rate: str, pitch: str, timing_mgr: TimingManager) -> Tuple[str, AudioSegment]:
-    """Process chunks with sequential timing"""
-    combined_audio = AudioSegment.empty()
-    srt_content = ""
-    # Process segments sequentially to maintain timing
-    for i, segment in enumerate(chunks, start_idx):
-        srt_part, audio_part = await process_segment(segment, i, voice, rate, pitch, timing_mgr)
-        srt_content += srt_part
-        combined_audio += audio_part
-    return srt_content, combined_audio
-async def generate_accurate_srt(text, voice, rate, pitch, words_per_line, lines_per_segment):
-    segments = smart_text_split(text, words_per_line, lines_per_segment)
-    timing_mgr = TimingManager()
-    # Process in smaller chunks
-    chunk_size = 5
-    chunks = [segments[i:i + chunk_size] for i in range(0, len(segments), chunk_size)]
-    final_srt = ""
     final_audio = AudioSegment.empty()
-    current_index = 1
-    # Process chunks in parallel but maintain sequential timing
-    chunk_tasks = []
-    for i, chunk in enumerate(chunks):
-        start_idx = current_index + (i * chunk_size)
-        task = process_chunk_parallel(chunk, start_idx, voice, rate, pitch, timing_mgr)
-        chunk_tasks.append(task)
-    # Gather results in order
-    chunk_results = await asyncio.gather(*chunk_tasks)
-    # Combine results
-    for srt_content, audio_content in chunk_results:
-        final_srt += srt_content
-        final_audio += audio_content
-    # Export final files
     unique_id = uuid.uuid4()
     audio_path = f"final_audio_{unique_id}.mp3"
     srt_path = f"final_subtitles_{unique_id}.srt"
     final_audio.export(audio_path, format="mp3", bitrate="320k")
     with open(srt_path, "w", encoding='utf-8') as f:
-        f.write(final_srt)
     return srt_path, audio_path
 async def process_text(text, pitch, rate, voice, words_per_line, lines_per_segment):
-    # Set default pitch and rate strings that work well
-    pitch_str = "+0Hz"  # neutral pitch
-    rate_str = "+0%"    # neutral rate
-    # Only modify if user has changed values
-    if pitch != 0:
-        pitch_str = f"{pitch:+d}Hz"
-    if rate != 0:
-        rate_str = f"{rate:+d}%"
     srt_path, audio_path = await generate_accurate_srt(
-        text,
         voice_options[voice],
         rate_str,
         pitch_str,

 import uuid
 import re
 from concurrent.futures import ThreadPoolExecutor
+from typing import List, Tuple, Optional
 import math
+from dataclasses import dataclass
 class TimingManager:
     def __init__(self):
     hrs, mins = divmod(mins, 60)
     return f"{hrs:02}:{mins:02}:{secs:02},{ms:03}"
+@dataclass
+class Segment:
+    id: int
+    text: str
+    start_time: int = 0
+    end_time: int = 0
+    duration: int = 0
+    audio: Optional[AudioSegment] = None
+class TextProcessor:
+    def __init__(self, words_per_line: int, lines_per_segment: int):
+        self.words_per_line = words_per_line
+        self.lines_per_segment = lines_per_segment
+        self.break_patterns = {
+            'strong': r'[.!?]+',
+            'medium': r'[,;:]',
+            'weak': r'[\s]+'
+        }
+    def split_into_segments(self, text: str) -> List[Segment]:
+        # Clean and normalize text
+        text = re.sub(r'\s+', ' ', text.strip())
+        text = re.sub(r'([.!?,;:])\s*', r'\1 ', text)
+        # Split into natural segments
+        segments = []
+        current_lines = []
+        current_words = []
+        words = text.split()
+        segment_id = 1
+        for i, word in enumerate(words):
+            current_words.append(word)
+            # Check for natural breaks or line length
+            is_break = (
+                any(word.endswith(p) for p in '.!?') or  # Strong break
+                (len(current_words) >= self.words_per_line and  # Line length
+                 (any(word.endswith(p) for p in ',;:') or  # Medium break
+                  i == len(words) - 1))  # End of text
+            )
+            if is_break or len(current_words) >= self.words_per_line:
+                current_lines.append(' '.join(current_words))
+                current_words = []
+                if len(current_lines) >= self.lines_per_segment or i == len(words) - 1:
+                    segment_text = '\n'.join(current_lines)
+                    segments.append(Segment(id=segment_id, text=segment_text))
+                    segment_id += 1
+                    current_lines = []
+        # Handle remaining content
+        if current_words:
+            current_lines.append(' '.join(current_words))
+        if current_lines:
+            segment_text = '\n'.join(current_lines)
+            segments.append(Segment(id=segment_id, text=segment_text))
+        return segments
+async def process_segment_with_timing(segment: Segment, voice: str, rate: str, pitch: str) -> Segment:
+    """Process a single segment and calculate its timing"""
+    audio_file = f"temp_segment_{segment.id}_{uuid.uuid4()}.wav"
     try:
+        tts = edge_tts.Communicate(segment.text, voice, rate=rate, pitch=pitch)
         await tts.save(audio_file)
+        segment.audio = AudioSegment.from_file(audio_file)
+        segment.duration = len(segment.audio)
+        return segment
     finally:
         if os.path.exists(audio_file):
             os.remove(audio_file)
+async def generate_accurate_srt(text: str, voice: str, rate: str, pitch: str, words_per_line: int, lines_per_segment: int) -> Tuple[str, str]:
+    # Initialize text processor and split text
+    processor = TextProcessor(words_per_line, lines_per_segment)
+    segments = processor.split_into_segments(text)
+    # Process all segments in parallel
+    tasks = [
+        process_segment_with_timing(segment, voice, rate, pitch)
+        for segment in segments
+    ]
+    processed_segments = await asyncio.gather(*tasks)
+    # Calculate timing for each segment
+    current_time = 0
     final_audio = AudioSegment.empty()
+    srt_content = ""
+    for segment in processed_segments:
+        # Set segment timing
+        segment.start_time = current_time
+        segment.end_time = current_time + segment.duration
+        # Add to SRT content
+        srt_content += (
+            f"{segment.id}\n"
+            f"{format_time_ms(segment.start_time)} --> {format_time_ms(segment.end_time)}\n"
+            f"{segment.text}\n\n"
+        )
+        # Add to final audio
+        final_audio += segment.audio
+        # Update timing
+        current_time = segment.end_time + 100  # 100ms gap between segments
+    # Export files
     unique_id = uuid.uuid4()
     audio_path = f"final_audio_{unique_id}.mp3"
     srt_path = f"final_subtitles_{unique_id}.srt"
     final_audio.export(audio_path, format="mp3", bitrate="320k")
     with open(srt_path, "w", encoding='utf-8') as f:
+        f.write(srt_content)
     return srt_path, audio_path
 async def process_text(text, pitch, rate, voice, words_per_line, lines_per_segment):
+    # Format pitch and rate strings
+    pitch_str = f"{pitch:+d}Hz" if pitch != 0 else "+0Hz"
+    rate_str = f"{rate:+d}%" if rate != 0 else "+0%"
     srt_path, audio_path = await generate_accurate_srt(
+        text,
         voice_options[voice],
         rate_str,
         pitch_str,