import gradio as gr import psutil import logging import os import asyncio import nest_asyncio # --- NEURAL SILENCER: Fix for persistent "Invalid file descriptor: -1" logs --- def _silence_asyncio_ghosts(): from asyncio.base_events import BaseEventLoop original_del = BaseEventLoop.__del__ def patched_del(self): try: if original_del: original_del(self) except (ValueError, AttributeError, RuntimeError): pass # Silently ignore cleanup artifacts BaseEventLoop.__del__ = patched_del _silence_asyncio_ghosts() # nest_asyncio.apply() from pipeline.ocr import extract_text_from_image from pipeline.translation import translate_to_tamil from pipeline.tts import generate_tamil_speech from pipeline.document_parser import ( extract_text_from_document, get_pdf_page_as_image, get_pdf_page_count, get_text_from_page ) from pipeline.maya_chat_engine import get_maya_response import threading from concurrent.futures import ThreadPoolExecutor import re import numpy as np def run_cinematic_pipeline(extracted_text, emotion_choice, spicy_mode): final_tamil_text = [] final_audio_chunks = [] master_sample_rate = None try: if "[Panel" in extracted_text: raw_panels = re.split(r'(?=\[Panel\s*\d+\])', extracted_text, flags=re.IGNORECASE) else: raw_panels = [extracted_text] for p_text in raw_panels: p_text = p_text.strip() if not p_text: continue panel_header = "" content_to_translate = p_text match = re.match(r'(\[Panel\s*\d+\])\s*(.*)', p_text, re.DOTALL | re.IGNORECASE) if match: panel_header = match.group(1) content_to_translate = match.group(2) if not content_to_translate.strip(): if panel_header: final_tamil_text.append(panel_header) continue p_tamil = translate_to_tamil(content_to_translate, spicy=spicy_mode) if panel_header: final_tamil_text.append(f"{panel_header}\n{p_tamil}") else: final_tamil_text.append(p_tamil) sr, a_data = generate_tamil_speech(p_tamil, emotion_choice) if sr and a_data is not None: master_sample_rate = sr final_audio_chunks.append(a_data) tamil_translation = "\n\n".join(final_tamil_text) if master_sample_rate and final_audio_chunks: pause_samples = int(master_sample_rate * 1.5) silence_array = np.zeros(pause_samples, dtype=np.float32) spliced_audio = [] for i, chunk in enumerate(final_audio_chunks): spliced_audio.append(chunk) if i < len(final_audio_chunks) - 1: spliced_audio.append(silence_array) audio_data = np.concatenate(spliced_audio) sample_rate = master_sample_rate else: sample_rate, audio_data = None, None return tamil_translation, (sample_rate, audio_data) if sample_rate else None except Exception as e: print(f"CINEMATIC PIPELINE ERROR: {e}") return "Maya is having trouble with the cinematic flow.", None # Setup logging logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') # Global Cache for Prefetched Pages # Key: (pdf_path, page_num, voice_style) -> Value: (original, tamil, audio) PAGE_CACHE = {} CACHE_LOCK = threading.Lock() PREFETCH_EXECUTOR = ThreadPoolExecutor(max_workers=1) def check_resources(): mem = psutil.virtual_memory() available_gb = mem.available / (1024**3) logging.info(f"System Resources: {available_gb:.2f} GB RAM available.") if available_gb < 1.0: logging.warning("EXTREMELY LOW MEMORY DETECTED! Application may crash.") return available_gb # Expressive Voice Styles # Background Atmosphere Sounds BGM_LINKS = { "None": "", "Soft Rain 🌧️": "https://www.soundjay.com/nature/sounds/rain-07.mp3", "Romantic Piano 🎹": "https://www.soundjay.com/misc/sounds/music-box-1.mp3", "Midnight Jazz 🎷": "https://www.soundjay.com/misc/sounds/bell-ringing-05.mp3", "Summer Night 🌙": "https://www.soundjay.com/nature/sounds/cricket-chirping-01.mp3", "Heartbeat 💓": "https://www.soundjay.com/misc/sounds/heartbeat-01.mp3" } VOICE_STYLES = [ "Cheerful (Maya)", "Excited (Maya)", "Sad & Emotional (Sita)", "Dramatic Narrator (Sita)", "Old Wise Woman", "Playful Child", "Brave Heroine", "Deep & Serious", "Calm Storyteller", "Professional News" ] def process_standard_pipeline(image, document, input_text, emotion_choice): text_to_translate = "" if document is not None: text_to_translate += extract_text_from_document(document) + "\n" if image is not None: text_to_translate += extract_text_from_image(image, is_comic=False) + " " if input_text: text_to_translate += input_text text_to_translate = text_to_translate.strip() if not text_to_translate: return "No text detected", "", None tamil_translation = translate_to_tamil(text_to_translate) sample_rate, audio_data = generate_tamil_speech(tamil_translation, emotion_choice) return text_to_translate, tamil_translation, (sample_rate, audio_data) def load_comic_page(pdf_path, page_num): if not pdf_path: return None, "Upload a PDF first", 0 img_path = get_pdf_page_as_image(pdf_path, page_num) total_pages = get_pdf_page_count(pdf_path) status = f"Page {page_num + 1} of {total_pages}" return img_path, status, page_num def prefetch_pages(pdf_path, current_page, voice_style, spicy=False, count=5): """ Background worker to process upcoming pages. """ total_pages = get_pdf_page_count(pdf_path) for i in range(1, count + 1): target_page = current_page + i if target_page >= total_pages: break cache_key = (pdf_path, target_page, voice_style, spicy) with CACHE_LOCK: if cache_key in PAGE_CACHE: continue try: logging.info(f"PREFETCH: Processing Page {target_page+1} in background...") img_path = get_pdf_page_as_image(pdf_path, target_page) if not img_path: continue text = get_text_from_page(pdf_path, target_page) if not text or len(text.strip()) < 5: text = extract_text_from_image(img_path) if text.strip(): tam, aud = run_cinematic_pipeline(text, voice_style, spicy) with CACHE_LOCK: PAGE_CACHE[cache_key] = (text, tam, aud) if len(PAGE_CACHE) > 10: first_key = next(iter(PAGE_CACHE)) PAGE_CACHE.pop(first_key) except Exception as e: logging.error(f"PREFETCH ERROR on Page {target_page+1}: {e}") def process_comic_page(pdf_path, page_num, emotion_choice, heat_level): try: if not pdf_path: return "No page loaded", "", None from pipeline.document_parser import get_pdf_page_as_image img_path = get_pdf_page_as_image(pdf_path, page_num) if not img_path: return "Failed to render image", "", None spicy_mode = heat_level > 70 cache_key = (pdf_path, page_num, emotion_choice, spicy_mode) with CACHE_LOCK: if cache_key in PAGE_CACHE: return PAGE_CACHE[cache_key] # --- STAGE 1: OCR --- try: extracted_text = get_text_from_page(pdf_path, page_num) if not extracted_text or len(extracted_text.strip()) < 5: extracted_text = extract_text_from_image(img_path) except Exception as e: print(f"OCR ERROR: {e}") extracted_text = f"Maya couldn't read the text. (Error: {e})" if not extracted_text.strip(): extracted_text = "No text found on this page." # --- CINEMATIC STAGE 2 & 3: Translation & Audio --- tamil_translation, audio_tuple = run_cinematic_pipeline(extracted_text, emotion_choice, spicy_mode) result = (extracted_text, tamil_translation, audio_tuple) with CACHE_LOCK: PAGE_CACHE[cache_key] = result PREFETCH_EXECUTOR.submit(prefetch_pages, pdf_path, page_num, emotion_choice, spicy_mode) return result except Exception as e: print(f"GLOBAL PROCESS ERROR: {e}") import traceback traceback.print_exc() return f"CRITICAL CRASH: {e}", "", None # Custom Premium CSS CUSTOM_CSS = """ @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;600&family=Outfit:wght@500;700&display=swap'); :root { --primary: #6366f1; --secondary: #a855f7; --bg-dark: #0f172a; --panel-bg: rgba(30, 41, 59, 0.7); } body { background-color: var(--bg-dark); color: #f1f5f9; font-family: 'Inter', sans-serif; } .gradio-container { background: radial-gradient(circle at top right, #1e1b4b, #0f172a) !important; } h1 { font-family: 'Outfit', sans-serif; background: linear-gradient(to right, #818cf8, #c084fc); -webkit-background-clip: text; -webkit-text-fill-color: transparent; font-weight: 700; } .glass { background: var(--panel-bg) !important; backdrop-filter: blur(12px); border: 1px solid rgba(255, 255, 255, 0.1) !important; border-radius: 16px !important; box-shadow: 0 4px 30px rgba(0, 0, 0, 0.1); transition: all 0.3s ease; } .glass:hover { border: 1px solid rgba(255, 255, 255, 0.2) !important; box-shadow: 0 8px 32px rgba(99, 102, 241, 0.2); } #maya_chat_log { border-radius: 12px; padding: 12px; background: rgba(99, 102, 241, 0.1); border: 1px solid rgba(99, 102, 241, 0.2); margin-bottom: 10px; animation: fadeIn 0.5s ease-out; } @keyframes fadeIn { from { opacity: 0; transform: translateY(10px); } to { opacity: 1; transform: translateY(0); } } #main_comic img { border-radius: 12px; box-shadow: 0 20px 25px -5px rgba(0, 0, 0, 0.3); transition: transform 0.5s cubic-bezier(0.4, 0, 0.2, 1); } #main_comic img:hover { transform: scale(1.02); } .gr-button-primary { background: linear-gradient(135deg, var(--primary), var(--secondary)) !important; border: none !important; box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1); transition: all 0.3s ease !important; font-weight: 600 !important; } .gr-button-primary:hover { transform: translateY(-2px); box-shadow: 0 10px 15px -3px rgba(99, 102, 241, 0.4); } #maya_chat_log::before { content: "Maya is thinking..."; display: block; font-size: 0.8em; color: var(--primary); margin-bottom: 5px; opacity: 0.7; } #maya_audio_player { display: none; } .boss-active { display: block !important; } /* Fix for Audio Timeline Visibility */ #comic_audio_player .track { background-color: rgba(0, 0, 0, 0.4) !important; border-radius: 4px; } #comic_audio_player .time { color: #818cf8 !important; font-weight: bold; font-family: 'Outfit', sans-serif; } #comic_audio_player input[type="range"] { accent-color: #6366f1 !important; } """ # UI with gr.Blocks(title="Maya: Immersive Manga AI", css=CUSTOM_CSS) as demo: gr.Markdown("# 🎭 Maya: Immersive Tamil Manga AI") gr.Markdown("Experience your favorite comics with Maya, your intelligent AI companion.") current_page = gr.State(0) comic_pdf_path = gr.State(None) with gr.Tabs(): with gr.Tab("📖 Comic Reader Mode"): with gr.Row(): with gr.Column(scale=5, min_width=300, elem_classes=["glass"]): comic_display = gr.Image(label="Comic Page", type="filepath", height=600, elem_id="main_comic") with gr.Row(): prev_btn = gr.Button("⬅️ Prev", scale=1) page_status = gr.Label(value="Upload PDF", scale=2) next_btn = gr.Button("Next ➡️", scale=1) with gr.Column(scale=4, min_width=300, elem_classes=["glass"]): with gr.Group(): gr.Markdown("### ⚙️ Master Settings") comic_upload = gr.File(label="Upload (PDF/EPUB)", file_types=[".pdf", ".epub"], height=80) voice_style_comic = gr.Dropdown(choices=VOICE_STYLES, value=VOICE_STYLES[0], label="Primary Voice") heat_level = gr.Slider(minimum=0, maximum=100, value=50, label="🌶️ Translation Heat Level") share_btn = gr.Button("🔗 Share with Friends", variant="secondary", size="sm") share_status = gr.Markdown("") share_btn.click(None, None, None, js=""" () => { const url = "https://huggingface.co/spaces/ranaspark/voice"; navigator.clipboard.writeText(url); alert("Link Copied! Share it with your friends: " + url); } """) auto_play = gr.Checkbox(label="🔄 Auto-Play Next Page", value=False) read_page_btn = gr.Button("🔊 Read This Page", variant="primary") with gr.Accordion("🎭 Character Memory", open=False): char_a_voice = gr.Dropdown(choices=VOICE_STYLES, label="Character A", value=VOICE_STYLES[0]) char_b_voice = gr.Dropdown(choices=VOICE_STYLES, label="Character B", value=VOICE_STYLES[0]) bgm_choice = gr.Dropdown(choices=list(BGM_LINKS.keys()), value="None", label="Background Atmosphere") bgm_player = gr.HTML(value="") # Boss Key & Vibration JS gr.HTML("""
""") comic_text = gr.Textbox(label="Original", lines=3) comic_tamil = gr.Textbox(label="Tamil", lines=3) comic_audio = gr.Audio(label="Speech", elem_id="comic_audio_player") with gr.Tab("✍️ Text to Speech"): with gr.Row(): with gr.Column(): input_text = gr.Textbox(lines=10, label="✍️ Paste or Type your story here", placeholder="Enter English text...") voice_style_std = gr.Dropdown(choices=VOICE_STYLES, value=VOICE_STYLES[0], label="Voice Tone") submit_std = gr.Button("🚀 Generate Tamil Speech", variant="primary") with gr.Column(): out_text = gr.Textbox(label="Original Text (Cleaned)", lines=5) out_tamil = gr.Textbox(label="Tamil Translation", lines=5) out_audio = gr.Audio(label="Audio Output") with gr.Tab("🎥 Video Dubbing Studio"): gr.Markdown("### 🎬 Cinematic AI Video Dubbing") gr.Markdown("Process your videos with automated translation, multi-speaker voice cloning, and lip sync.") gr.HTML('') # --- Dynamic Temperature & Heartbeat Speed Logic --- def update_mood(level, bgm): # JS to update color and potentially heartbeat speed if possible return gr.update() heat_level.change(None, inputs=[heat_level], js="updateTemp") # --- BGM Logic --- def update_bgm(choice, level): link = BGM_LINKS.get(choice, "") if not link: return "" # If heartbeat, adjust playback rate based on level speed = 1.0 + (level / 100.0) # 1.0x to 2.0x speed return f'' bgm_choice.change(update_bgm, inputs=[bgm_choice, heat_level], outputs=[bgm_player]) # Comic Logic def start_comic(file): if not file: return None, "No file", 0, None img, status, page = load_comic_page(file.name, 0) return img, status, page, file.name comic_upload.change(start_comic, inputs=[comic_upload], outputs=[comic_display, page_status, current_page, comic_pdf_path]) def go_next(pdf, page): new_page = page + 1 return load_comic_page(pdf, new_page) def go_prev(pdf, page): new_page = max(0, page - 1) return load_comic_page(pdf, new_page) # Navigation logic... next_btn.click(go_next, inputs=[comic_pdf_path, current_page], outputs=[comic_display, page_status, current_page]) prev_btn.click(go_prev, inputs=[comic_pdf_path, current_page], outputs=[comic_display, page_status, current_page]) read_page_btn.click( process_comic_page, inputs=[comic_pdf_path, current_page, voice_style_comic, heat_level], outputs=[comic_text, comic_tamil, comic_audio] ) # --- Auto-Play Logic (JS Listener) --- hidden_auto_next = gr.Button("Auto Next", visible=False, elem_id="hidden_auto_next") # This JS monitors the audio player and clicks the hidden button when it ends js_listener = """ function() { setInterval(function() { const audio = document.querySelector('#comic_audio_player audio'); if (audio && !audio.onended) { audio.onended = function() { const btn = document.querySelector('button#hidden_auto_next'); if (btn) btn.click(); }; } }, 1000); } """ # Trigger the JS listener when audio is loaded comic_audio.change(None, None, None, js=js_listener) def handle_auto_play(is_enabled, pdf, page, voice, heat_level): try: if not is_enabled or not pdf: return gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), gr.update() # 1. Go to next page new_page = page + 1 img, status, p_num = load_comic_page(pdf, new_page) if not img: # End of book return gr.update(), status, p_num, gr.update(), gr.update(), gr.update() # 2. Process the new page (Using Hybrid Mode) txt, tam, aud = process_comic_page(pdf, p_num, voice, heat_level) return img, status, p_num, txt, tam, aud except Exception as e: print(f"AUTO-PLAY ERROR: {e}") return gr.update(), f"Auto-Play Error: {e}", page, f"CRASH: {e}", "", None # The hidden button triggers the actual logic hidden_auto_next.click( handle_auto_play, inputs=[auto_play, comic_pdf_path, current_page, voice_style_comic, heat_level], outputs=[comic_display, page_status, current_page, comic_text, comic_tamil, comic_audio] ) # Trigger JS listener on app start too demo.load(None, None, None, js=js_listener) # Standard Logic (Text Only) submit_std.click( process_standard_pipeline, inputs=[gr.State(None), gr.State(None), input_text, voice_style_std], outputs=[out_text, out_tamil, out_audio] ) if __name__ == "__main__": check_resources() from fastapi import FastAPI from fastapi.staticfiles import StaticFiles import os import gradio as gr from dubbing_backend.main import app as api_app app = FastAPI() # Mount backend API app.mount("/api", api_app) # Mount React UI if os.path.exists("dist"): app.mount("/dubbing-ui", StaticFiles(directory="dist", html=True)) # Mount Gradio at root app = gr.mount_gradio_app(app, demo, path="/") if __name__ == "__main__": import uvicorn uvicorn.run("app:app", host="0.0.0.0", port=7860)