s2v / engine.py
diwash-barla1's picture
Initial commit with clean history
aa642ce
Raw
History Blame Contribute Delete
20.6 kB
# engine.py - [HYBRID ARCHITECTURE EDITION: GLOBAL VISION + ROLLING BATCHES]
import os
import json
import random
import shutil
import re
import time
import concurrent.futures
import subprocess
try:
from dotenv import load_dotenv
load_dotenv()
except ImportError:
pass
from core.constants import (
UPLOAD_FOLDER, OUTPUT_FOLDER, BGM_FOLDER, SFX_FOLDER,
DEFAULT_VERTICAL_RES, DEFAULT_HORIZONTAL_RES,
MAX_CLIPS_TO_CHECK_SETTING, PASSING_SCORE_SETTING, CINEMATIC_ART_STYLE
)
from core.database import init_db, create_task, get_task, update_task_log, update_task_final_status
from core.utils import load_api_keys, ensure_hindi_font
from core.api_clients import (
AwaazAPI, GroqAPI, HuggingFacePNGAPI,
CustomImageGenAPI, MetaSparkStudioAPI, StockClipAPI
)
from core.gemini_core import GeminiTeam
from core.media_workers import (
process_all_scenes_parallel, build_master_audio_mix,
generate_cinematic_subtitles, VideoAssembler, fast_concat_chunk_videos
)
def run_ai_engine_worker(task_id, script_text, script_file_path, orientation, story_mode=False, *args, **kwargs):
log = lambda message, progress: update_task_log(task_id, message, progress)
temp_dir = os.path.join(UPLOAD_FOLDER, task_id)
os.makedirs(temp_dir, exist_ok=True)
try:
log("Step 0: API Keys की पुष्टि...", 2)
gemini_keys = load_api_keys("gmni")
groq_keys = load_api_keys("groq")
hf_space_name = os.environ.get("HF_SPACE_NAME")
hf_master_key = os.environ.get("MY_MASTER_KEY")
awaaz_api_key = os.environ.get("AWAAZ_API_KEY")
imgen_api_key = os.environ.get("IMGEN_API_KEY")
mss_api_key = os.environ.get("META_API_KEY")
stockclip_api_key = os.environ.get("STOCKCLIP_API_KEY")
missing = [name for key, name in [
(gemini_keys, "gmni"),
(groq_keys, "groq"),
(hf_space_name, "HF Space"),
(awaaz_api_key, "Awaaz"),
(mss_api_key, "Meta API"),
(imgen_api_key, "Sparkling Image API"),
(stockclip_api_key, "StockClip API")
] if not key]
if missing:
raise Exception(f"API Key Error: ये कीज़ नहीं मिले: {', '.join(missing)}")
gemini = GeminiTeam(api_keys=gemini_keys)
png_api = HuggingFacePNGAPI(hf_space_name, hf_master_key)
awaaz = AwaazAPI(awaaz_api_key)
groq_api = GroqAPI(api_keys=groq_keys)
stockclip_api = StockClipAPI(stockclip_api_key)
stockclip_api.set_logger(log)
image_gen_client = CustomImageGenAPI(imgen_api_key)
mss_api_client = MetaSparkStudioAPI(mss_api_key)
mss_api_client.set_logger(log)
log("Phase 1: 'Global Overseer' (Visual Bible) तैयार किया जा रहा है...", 5)
is_raw_script_available = bool(script_text and script_text.strip())
if script_file_path and not is_raw_script_available:
log("-> 🎧 ऑडियो फ़ाइल डिटेक्ट हुई! Flash Lite से ट्रांसक्रिप्ट कर रहे हैं...", 10)
prelim_scenes = gemini.native_audio_scene_cutter(script_file_path)
script_text = " ".join([s.get('text', '') for s in prelim_scenes])
is_raw_script_available = True
global_context = gemini.generate_global_context(script_text, "Mystery / Space / Folklore")
global_summary = global_context
if not global_summary.get('core_theme'):
global_summary['core_theme'] = 'Cinematic'
producer_rules = gemini.get_chief_producer_rules(script_text, is_raw_script_available)
# ==============================================================================
# 🎙️ THE SINGLE PASS AUDIO GENERATION
# ==============================================================================
log("Phase 2: 🎙️ Awaaz API से पूरी स्क्रिप्ट का सिंगल मास्टर ऑडियो बन रहा है...", 15)
full_audio_path = os.path.join(temp_dir, f"{task_id}_master_audio.wav")
cleaned_script = re.sub(r'\[.*?\]|\(.*?\)', '', script_text).strip()
enhanced_script = awaaz.enhance_script(cleaned_script)
if not enhanced_script.startswith('['):
enhanced_script = f"[male] {enhanced_script}"
awaaz.generate_audio(enhanced_script, full_audio_path)
# ==============================================================================
# ✂️ FLASH LITE MASTER CUTTER (WITH SMART RETRY LOGIC)
# ==============================================================================
log("Phase 3: ✂️ Flash Lite मास्टर ऑडियो को सीन्स में काट रहा है...", 25)
MAX_RETRIES = 3
all_raw_scenes = []
transcription_success = False
for attempt in range(MAX_RETRIES):
try:
log(f"-> 🎙️ ट्रांसक्रिप्शन का प्रयास {attempt + 1}/{MAX_RETRIES}...", 26)
all_raw_scenes = gemini.native_audio_scene_cutter(full_audio_path)
# 🛡️ THE STRICT VALIDATOR (चेक करो कि कहीं API ने खाली तो नहीं भेज दिया)
is_timestamp_broken = all(s.get('start_time', 0.0) == 0.0 and s.get('end_time', 0.0) == 0.0 for s in all_raw_scenes)
is_text_empty = all(not s.get('text', '').strip() for s in all_raw_scenes)
if not is_timestamp_broken and not is_text_empty and len(all_raw_scenes) > 0:
transcription_success = True
log("-> ✅ ट्रांसक्रिप्शन सफल रहा! टाइमस्टैम्प्स और टेक्स्ट मिल गए।", 27)
break # अगर सब सही है, तो लूप से बाहर आ जाओ
else:
log(f"⚠️ चेतावनी: API ने खाली या 0.0 टाइमस्टैम्प वाला डेटा दिया है। (Attempt {attempt + 1} Failed)", 27)
time.sleep(3) # API को साँस लेने का टाइम दो
except Exception as e:
log(f"🚨 ट्रांसक्रिप्शन API क्रैश हो गया: {e}", 27)
time.sleep(3)
# अगर 3 बार के बाद भी API ने कचरा ही दिया, तो इंजन को रोक दो (Fail Fast)
if not transcription_success:
raise Exception("❌ FATAL ERROR: 3 प्रयासों के बाद भी ट्रांसक्रिप्शन API सही डेटा नहीं दे पाया। कृपया API Key या ऑडियो फाइल चेक करें।")
# अगर सब सही है, तो हमारा गैप-फिक्स (Gapless Timeline) चलाओ
for i in range(len(all_raw_scenes) - 1):
all_raw_scenes[i]['end_time'] = all_raw_scenes[i+1]['start_time']
# ==============================================================================
# 🌍 THE GOD'S EYE VIEW (Global Visual Assignment)
# ==============================================================================
log("Phase 4: 🧠 Master Director पूरे 100+ सीन्स को एक साथ देख रहा है (Global Reuse Logic)...", 35)
gemini.global_summary_cache = global_summary
all_assigned_scenes = gemini.assign_visuals_to_scenes(all_raw_scenes, producer_rules, past_clips=None)
if not all_assigned_scenes:
raise Exception("Master Director failed to assign visuals globally.")
# ==============================================================================
# 📦 BATCH PREPARATION & GLOBAL VAULT (Saving RAM and API Output Limits)
# ==============================================================================
BATCH_SIZE = 12
scene_batches = [all_assigned_scenes[i:i + BATCH_SIZE] for i in range(0, len(all_assigned_scenes), BATCH_SIZE)]
log(f"-> 📦 {len(all_assigned_scenes)} सीन्स को {len(scene_batches)} बैचेस में बाँटा गया है (सुरक्षित प्रोसेसिंग के लिए)।", 40)
# 💡 यहाँ हमारी ग्लोबल तिजोरी (Vault) बन रही है, जो पूरे 11 मिनट के वीडियो की मेमोरी रखेगी
# 💡 The 3-Tier Enterprise Vault System
GLOBAL_CLIP_VAULT = {} # SceneID -> Path
GLOBAL_SIGNATURE_INDEX = {} # Signature -> SceneID
GLOBAL_DNA_VAULT = {} # SceneID -> Raw DNA (For future embeddings
chunk_video_paths = []
all_master_timestamps = []
all_successful_scenes = []
all_gapless_timelines = []
past_prompts_history = []
width, height = DEFAULT_VERTICAL_RES if orientation == 'vertical' else DEFAULT_HORIZONTAL_RES
# ==============================================================================
# 🔄 BATCH PROCESSING LOOP
# ==============================================================================
for batch_idx, batch_scenes in enumerate(scene_batches):
current_progress = 40 + int((batch_idx / len(scene_batches)) * 40)
log(f"-> 🔄 Processing Batch {batch_idx + 1}/{len(scene_batches)}...", current_progress)
batch_timestamps = [{'word': s.get('text', ''), 'start': s.get('start_time', 0.0), 'end': s.get('end_time', 0.0)} for s in batch_scenes]
all_master_timestamps.extend(batch_timestamps)
batch_data_for_prompt = [{"id": str(i), "desc": s.get('emotion_and_metaphor', s.get('script_line', ''))} for i, s in enumerate(batch_scenes)]
enhanced_results = gemini.enhance_batch_cinematic_prompts(batch_data_for_prompt, global_summary.get('core_theme', 'Cinematic'), CINEMATIC_ART_STYLE, past_prompts_history[-5:])
for i, s in enumerate(batch_scenes):
prompt = enhanced_results.get(str(i)) or f"{s.get('emotion_and_metaphor', s.get('script_line', ''))}, {CINEMATIC_ART_STYLE}"
s['cinematic_prompt'] = prompt
past_prompts_history.append(prompt)
base_scene_config = {
'temp_dir': temp_dir, 'orientation': orientation, 'story_mode': story_mode,
'global_context': global_context, 'stockclip_api': stockclip_api, 'png_api': png_api,
'gemini': gemini, 'groq_api': groq_api, 'video_api': mss_api_client, 'image_api': image_gen_client,
'mss_api': mss_api_client, 'thread_safe_log': log, 'MAX_CLIPS_TO_CHECK': MAX_CLIPS_TO_CHECK_SETTING,
'PASSING_SCORE': PASSING_SCORE_SETTING, 'chunk_idx': batch_idx,
# 💡 तिजोरी की चाबी मज़दूरों को दे दी गई है
'clip_vault': GLOBAL_CLIP_VAULT,
'signature_index': GLOBAL_SIGNATURE_INDEX,
'dna_vault': GLOBAL_DNA_VAULT
}
processed_batch_scenes = process_all_scenes_parallel(batch_scenes, base_scene_config)
successful_batch_scenes = [res for res in processed_batch_scenes if res and (os.path.exists(res.get('downloaded_path', '')) or res.get('downloaded_path') == 'REUSE_PLACEHOLDER')]
if not successful_batch_scenes: continue
all_successful_scenes.extend(successful_batch_scenes)
# ✅ AI की जगह Pure Python Mapping (No Hallucination)
validated_timeline = []
for i, ts in enumerate(batch_timestamps):
# processed_batch_scenes और batch_timestamps का इंडेक्स हमेशा समान (Parallel) रहता है
scene_data = processed_batch_scenes[i] if i < len(processed_batch_scenes) else None
if scene_data and scene_data.get('downloaded_path') and os.path.exists(scene_data['downloaded_path']):
validated_timeline.append({
"scene_index": i + 1,
"word": ts['word'],
"start": ts['start'],
"end": ts['end'],
"matched_clip": scene_data['downloaded_path'],
"start_offset_seconds": 0.0
})
if not validated_timeline: continue
all_gapless_timelines.extend(validated_timeline)
# 🎬 AI TRANSITIONS
timeline_context_for_ai = [{"line": s.get('script_line', ''), "vibe": s.get('emotion_and_metaphor', '')} for s in successful_batch_scenes]
ai_transitions = gemini.get_dynamic_transitions(timeline_context_for_ai)
chunk_output_path = os.path.join(temp_dir, f"chunk_{batch_idx}_silent.mp4")
assembler = VideoAssembler(validated_timeline, chunk_output_path, width, height, temp_dir, ai_transitions)
prepared_data_ordered = [None] * len(validated_timeline)
with concurrent.futures.ThreadPoolExecutor(max_workers=6) as executor:
prep_futures = {executor.submit(assembler.prepare_clip_parallel, i, clip): i for i, clip in enumerate(validated_timeline)}
for future in concurrent.futures.as_completed(prep_futures):
idx = prep_futures[future]
prepared_data_ordered[idx] = future.result()
# 🔇 THE MAGIC: Passing None for audio and subs to create a SILENT CHUNK
assembler.assemble_final_video(prepared_data_ordered, None, None, log)
if os.path.exists(chunk_output_path):
chunk_video_paths.append(chunk_output_path)
# ==============================================================================
# 🔗 THE ULTIMATE FEVICOL (Fast Concat of Silent Chunks)
# ==============================================================================
if not chunk_video_paths:
raise Exception("Media Generation Failed. कोई भी वीडियो चंक नहीं बन पाया।")
log("Step Final: 🚀 सारे साइलेंट चंक्स को 'महा-फेविकोल' से जोड़ा जा रहा है...", 85)
master_silent_video = os.path.join(temp_dir, f"{task_id}_master_silent.mp4")
fast_concat_chunk_videos(chunk_video_paths, master_silent_video, log)
# ==============================================================================
# 🎵 THE GRAND FINALE (Master Audio, SFX, BGM & Subtitles)
# ==============================================================================
log("-> 🎵 मास्टर ऑडियो, बैकग्राउंड म्यूज़िक और सिनेमैटिक सबटाइटल्स लगाए जा रहे हैं...", 90)
# 1. Master Audio Design (BGM/SFX for the ENTIRE video at once)
total_duration_full = float(all_master_timestamps[-1]['end']) if all_master_timestamps else 0
transition_times_full = [float(clip['start']) for clip in all_gapless_timelines[1:]]
available_bgms = [f for f in os.listdir(BGM_FOLDER) if f.endswith(('.mp3', '.wav'))]
available_sfxs = [f for f in os.listdir(SFX_FOLDER) if f.endswith(('.mp3', '.wav'))]
timeline_context_full = [{"line": s.get('script_line', ''), "vibe": s.get('emotion_and_metaphor', '')} for s in all_successful_scenes]
audio_design = {"bgm": [], "sfx": []}
if available_bgms:
audio_design = gemini.get_audio_design(timeline_context_full, available_bgms, available_sfxs, total_duration_full, transition_times_full)
if available_sfxs and transition_times_full:
perfect_sfx = [{"file": random.choice(available_sfxs), "time": t} for t in transition_times_full]
audio_design["sfx"] = perfect_sfx
final_mixed_audio_path = os.path.join(temp_dir, f"{task_id}_master_mixed.wav")
build_master_audio_mix(full_audio_path, audio_design, BGM_FOLDER, SFX_FOLDER, final_mixed_audio_path)
# 2. Master Subtitles (For the ENTIRE video at once)
final_subs_path = os.path.join(temp_dir, f"{task_id}_master_subs.ass")
dummy_assembler = VideoAssembler([], "", width, height, temp_dir, [])
dummy_assembler.font_dir, font_family_name = ensure_hindi_font()
generate_cinematic_subtitles(all_master_timestamps, final_subs_path, width, height, font_family_name)
# 3. The Ultimate Merge (Silent Video + Mixed Audio + Subtitles -> Final Video)
output_filename = f"{task_id}_final_video.mp4"
final_output_path = os.path.join(OUTPUT_FOLDER, output_filename)
ff_subs_path = final_subs_path.replace('\\', '/').replace(':', '\\:')
ff_font_dir = getattr(dummy_assembler, 'font_dir', '').replace('\\', '/').replace(':', '\\:')
ass_filter = f"ass='{ff_subs_path}':fontsdir='{ff_font_dir}'" if ff_font_dir else f"ass='{ff_subs_path}'"
final_merge_cmd = [
'ffmpeg', '-y',
'-i', master_silent_video,
'-i', final_mixed_audio_path,
'-vf', ass_filter,
'-c:v', 'libx264', '-preset', 'faster', '-crf', '24', '-pix_fmt', 'yuv420p',
'-c:a', 'aac', '-b:a', '128k',
'-shortest', '-movflags', '+faststart', '-threads', '0',
final_output_path
]
subprocess.run(final_merge_cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
log("-> 📝 अंतिम विस्तृत JSON रिपोर्ट बनाई जा रही है...", 98)
try:
full_transcribed_script = " ".join([w['word'] for w in all_master_timestamps])
report_data = {
"producer_rules": producer_rules,
"full_transcribed_script": full_transcribed_script,
"flash_scene_timestamps": all_master_timestamps,
"gemini_scene_analysis_and_downloads": all_successful_scenes,
"processed_gapless_timeline": all_gapless_timelines
}
report_file_path = os.path.join(OUTPUT_FOLDER, f'{task_id}_report.json')
with open(report_file_path, 'w', encoding='utf-8') as f:
json.dump(report_data, f, ensure_ascii=False, indent=4)
except Exception as e:
pass
log("-> ✅ वीडियो जनरेशन 100% पूरा हुआ!", 100)
update_task_final_status(task_id, 'complete', output_filename=output_filename)
except Exception as e:
import traceback
traceback.print_exc()
update_task_final_status(task_id, 'error', error_message=str(e))
finally:
if os.path.exists(temp_dir):
try: shutil.rmtree(temp_dir, ignore_errors=True)
except: pass
# ==============================================================================
# 🤖 THE MISSING FUNCTION RESTORED
# ==============================================================================
def generate_script_with_ai(topic, video_length):
try:
gemini_keys = load_api_keys("gmni")
if not gemini_keys:
raise Exception("Gemini API keys not found for script generation.")
return GeminiTeam(api_keys=gemini_keys).generate_script(topic, video_length)
except Exception as e:
raise e