Spaces:

ZeeAI1
/

t2a

Sleeping

App Files Files Community

ZeeAI1 commited on May 2, 2025

Commit

3c1cfdc

verified ·

1 Parent(s): 05bed8e

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -14

app.py CHANGED Viewed

@@ -5,38 +5,55 @@ import os
 import cv2
 import subprocess
-# --- Dynamic SpaCy model loading (avoids download at build time) ---
 try:
     nlp = spacy.load("en_core_web_sm")
 except OSError:
     subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
     nlp = spacy.load("en_core_web_sm")
-# --- Asset mapping (character/background library) ---
 ASSET_MAP = {
     "man": "assets/characters/man.png",
     "woman": "assets/characters/woman.png",
     "dog": "assets/characters/dog.png",
     "park": "assets/backgrounds/park.jpg",
-    "office": "assets/backgrounds/office.jpg"
 }
 FRAME_FOLDER = "frames"
 VIDEO_OUTPUT = "generated_video.mp4"
-# --- Extract characters/scenes from prompt ---
 def extract_entities(prompt):
     doc = nlp(prompt)
     characters = []
     scenes = []
     for ent in doc.ents:
         if ent.label_ in ["PERSON", "ORG"]:
             characters.append(ent.text.lower())
         elif ent.label_ in ["LOC", "GPE", "FAC"]:
             scenes.append(ent.text.lower())
     return characters, scenes
-# --- Compose a single frame ---
 def compose_frame(background_path, character_paths, output_path, char_positions=None):
     bg = Image.open(background_path).convert('RGBA')
     for idx, char_path in enumerate(character_paths):
@@ -45,7 +62,7 @@ def compose_frame(background_path, character_paths, output_path, char_positions=
         bg.paste(char_img, pos, char_img)
     bg.save(output_path)
-# --- Create video from frames ---
 def create_video_from_frames(frame_folder, output_path, fps=24):
     images = sorted([img for img in os.listdir(frame_folder) if img.endswith(".png")])
     if not images:
@@ -62,30 +79,29 @@ def create_video_from_frames(frame_folder, output_path, fps=24):
 # --- Main function triggered by Gradio ---
 def generate_video(prompt):
     characters, scenes = extract_entities(prompt)
-    if not scenes:
-        return None, "No scene detected! Please include a place in your prompt."
     os.makedirs(FRAME_FOLDER, exist_ok=True)
     bg_path = ASSET_MAP.get(scenes[0], ASSET_MAP["park"])
     char_paths = [ASSET_MAP.get(char, ASSET_MAP["man"]) for char in characters]
-    total_frames = 48  # ~2 seconds at 24fps; increase to 2880 for 2 min
     for i in range(total_frames):
-        positions = [(100 + i*2, 200) for _ in char_paths]  # Simple horizontal movement
         frame_path = os.path.join(FRAME_FOLDER, f"frame_{i:03d}.png")
         compose_frame(bg_path, char_paths, frame_path, char_positions=positions)
     create_video_from_frames(FRAME_FOLDER, VIDEO_OUTPUT)
-    return VIDEO_OUTPUT, f"Characters: {characters}, Scenes: {scenes}"
-# --- Gradio interface ---
 iface = gr.Interface(
     fn=generate_video,
     inputs=gr.Textbox(lines=3, placeholder="Describe your scene here..."),
     outputs=[gr.Video(), gr.Textbox()],
-    title="Text to Video AI App"
 )
 if __name__ == "__main__":

 import cv2
 import subprocess
+# --- Load SpaCy model dynamically (avoids build-time download issues) ---
 try:
     nlp = spacy.load("en_core_web_sm")
 except OSError:
     subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
     nlp = spacy.load("en_core_web_sm")
+# --- Define asset mapping (character and background files) ---
 ASSET_MAP = {
     "man": "assets/characters/man.png",
     "woman": "assets/characters/woman.png",
     "dog": "assets/characters/dog.png",
     "park": "assets/backgrounds/park.jpg",
+    "office": "assets/backgrounds/office.jpg",
+    "home": "assets/backgrounds/home.jpg",
+    "school": "assets/backgrounds/school.jpg",
+    "street": "assets/backgrounds/street.jpg"
 }
 FRAME_FOLDER = "frames"
 VIDEO_OUTPUT = "generated_video.mp4"
+# --- Extract characters and scenes from prompt ---
 def extract_entities(prompt):
     doc = nlp(prompt)
     characters = []
     scenes = []
+    # Named Entity Recognition
     for ent in doc.ents:
         if ent.label_ in ["PERSON", "ORG"]:
             characters.append(ent.text.lower())
         elif ent.label_ in ["LOC", "GPE", "FAC"]:
             scenes.append(ent.text.lower())
+    # If no scenes found → keyword matching from ASSET_MAP keys
+    if not scenes:
+        for keyword in ASSET_MAP.keys():
+            if keyword in prompt.lower() and keyword in ["park", "office", "home", "school", "street"]:
+                scenes.append(keyword)
+                break
+    # If still no scene → fallback default
+    if not scenes:
+        scenes.append("park")
     return characters, scenes
+# --- Compose a single image frame ---
 def compose_frame(background_path, character_paths, output_path, char_positions=None):
     bg = Image.open(background_path).convert('RGBA')
     for idx, char_path in enumerate(character_paths):
         bg.paste(char_img, pos, char_img)
     bg.save(output_path)
+# --- Create a video from image frames ---
 def create_video_from_frames(frame_folder, output_path, fps=24):
     images = sorted([img for img in os.listdir(frame_folder) if img.endswith(".png")])
     if not images:
 # --- Main function triggered by Gradio ---
 def generate_video(prompt):
     characters, scenes = extract_entities(prompt)
     os.makedirs(FRAME_FOLDER, exist_ok=True)
     bg_path = ASSET_MAP.get(scenes[0], ASSET_MAP["park"])
     char_paths = [ASSET_MAP.get(char, ASSET_MAP["man"]) for char in characters]
+    total_frames = 48  # 2 sec @ 24fps; increase to 2880 for 2 min
     for i in range(total_frames):
+        positions = [(100 + i*2, 200) for _ in char_paths]
         frame_path = os.path.join(FRAME_FOLDER, f"frame_{i:03d}.png")
         compose_frame(bg_path, char_paths, frame_path, char_positions=positions)
     create_video_from_frames(FRAME_FOLDER, VIDEO_OUTPUT)
+    details = f"Characters detected: {characters if characters else 'default'}, Scene: {scenes[0]}"
+    return VIDEO_OUTPUT, details
+# --- Gradio interface setup ---
 iface = gr.Interface(
     fn=generate_video,
     inputs=gr.Textbox(lines=3, placeholder="Describe your scene here..."),
     outputs=[gr.Video(), gr.Textbox()],
+    title="Text to Video AI App (with fallback scenes)"
 )
 if __name__ == "__main__":