Spaces:

Mayankuttam
/

cricket

Runtime error

Mayankuttam commited on Aug 2, 2025

Commit

f80e6a6

verified ·

1 Parent(s): 348ccc5

Update model_pipeline.py

Files changed (1) hide show

model_pipeline.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from transformers import LlavaForConditionalGeneration, LlavaProcessor, pipeline, AutoProcessor, AutoModelForVision2Seq
 from moviepy.editor import VideoFileClip, AudioFileClip, CompositeAudioClip
 from gtts import gTTS
 import torch
@@ -11,11 +11,11 @@ import numpy as np
 model_id = "may-ur08/llava-commentary-gen"
 processor = AutoProcessor.from_pretrained(model_id)
 model = AutoModelForVision2Seq.from_pretrained(
     model_id,
-    device_map="auto",
-    torch_dtype=torch.float16,
-    load_in_4bit=True
 )
 def run_model_on_video(video_path):
@@ -45,14 +45,16 @@ def run_model_on_video(video_path):
     for i, frame_path in enumerate(frames):
         image = Image.open(frame_path).convert("RGB")
-        prompt = ("<image>\n"
-        "USER: Analyze this image from a live cricket match.\n"
-        "Identify two things:\n"
-        "1. What specific type of cricket shot is being played?\n"
-        "2. What is the likely outcome?\n"
-        "Only use proper cricket terminology — avoid any football, baseball, or non-cricket references. "
-        "Now write a short, exciting cricket-style commentary line as if it's being broadcast on TV.\n"
-        "ASSISTANT:")
         inputs = processor(text=prompt, images=image, return_tensors="pt").to(model.device)
         output = model.generate(**inputs, max_new_tokens=50)
         caption = processor.decode(output[0], skip_special_tokens=True).strip()

+from transformers import AutoProcessor, AutoModelForVision2Seq, pipeline
 from moviepy.editor import VideoFileClip, AudioFileClip, CompositeAudioClip
 from gtts import gTTS
 import torch
 model_id = "may-ur08/llava-commentary-gen"
 processor = AutoProcessor.from_pretrained(model_id)
+use_gpu = torch.cuda.is_available()
 model = AutoModelForVision2Seq.from_pretrained(
     model_id,
+    device_map="auto" if use_gpu else None,
+    torch_dtype=torch.float16 if use_gpu else torch.float32
 )
 def run_model_on_video(video_path):
     for i, frame_path in enumerate(frames):
         image = Image.open(frame_path).convert("RGB")
+        prompt = (
+            "<image>\n"
+            "USER: Analyze this image from a live cricket match.\n"
+            "Identify two things:\n"
+            "1. What specific type of cricket shot is being played?\n"
+            "2. What is the likely outcome?\n"
+            "Only use proper cricket terminology — avoid any football, baseball, or non-cricket references. "
+            "Now write a short, exciting cricket-style commentary line as if it's being broadcast on TV.\n"
+            "ASSISTANT:"
+        )
         inputs = processor(text=prompt, images=image, return_tensors="pt").to(model.device)
         output = model.generate(**inputs, max_new_tokens=50)
         caption = processor.decode(output[0], skip_special_tokens=True).strip()