Spaces:

ignitariumcloud
/

Apollo_GenAI

Runtime error

App Files Files Community

VishalD1234 commited on Dec 11, 2024

Commit

3f60308

verified ·

1 Parent(s): c33b13b

Update app.py

Browse files

Files changed (1) hide show

app.py +19 -10

app.py CHANGED Viewed

@@ -10,6 +10,13 @@ MODEL_PATH = "THUDM/cogvlm2-llama3-caption"
 DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
 TORCH_TYPE = torch.bfloat16 if torch.cuda.is_available() and torch.cuda.get_device_capability()[0] >= 8 else torch.float16
 # Delay Reasons for Each Manufacturing Step
 DELAY_REASONS = {
     "Step 1": ["Delay in Bead Insertion", "Lack of raw material"],
@@ -100,15 +107,14 @@ def get_step_info(step_number):
 def load_video(video_data, strategy='chat'):
     """Loads and processes video data into a format suitable for model input."""
     bridge.set_bridge('torch')
-    num_frames = 24
     if isinstance(video_data, str):
         decord_vr = VideoReader(video_data, ctx=cpu(0))
     else:
         decord_vr = VideoReader(io.BytesIO(video_data), ctx=cpu(0))
     total_frames = len(decord_vr)
-    if total_frames < num_frames:
         raise ValueError("Uploaded video is too short for meaningful analysis.")
     timestamps = [i[0] for i in decord_vr.get_frame_timestamp(np.arange(total_frames))]
@@ -119,7 +125,7 @@ def load_video(video_data, strategy='chat'):
         closest_num = min(timestamps, key=lambda x: abs(x - second))
         index = timestamps.index(closest_num)
         frame_id_list.append(index)
-        if len(frame_id_list) >= num_frames:
             break
     video_data = decord_vr.get_batch(frame_id_list)
@@ -148,7 +154,10 @@ def load_model():
 def predict(prompt, video_data, temperature, model, tokenizer):
     """Generates predictions based on the video and textual prompt."""
-    video = load_video(video_data, strategy='chat')
     inputs = model.build_conversation_input_ids(
         tokenizer=tokenizer,
@@ -166,12 +175,12 @@ def predict(prompt, video_data, temperature, model, tokenizer):
     }
     gen_kwargs = {
-        "max_new_tokens": 2048,
         "pad_token_id": tokenizer.pad_token_id,
-        "top_k": 1,
         "do_sample": False,
-        "top_p": 0.1,
-        "temperature": 0.3,
     }
     with torch.no_grad():
@@ -208,5 +217,5 @@ Potential Delay Reasons:
 Task: Analyze the provided video to identify the delay reason. Use the following format:
 1. **Selected Reason:** [Choose the most likely reason from the list above]
-2. **Visual Evidence:** [Describe specific visual cues from the
 """

 DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
 TORCH_TYPE = torch.bfloat16 if torch.cuda.is_available() and torch.cuda.get_device_capability()[0] >= 8 else torch.float16
+# Configurable constants
+NUM_FRAMES = 24  # Default number of frames to extract
+MAX_NEW_TOKENS = 2048
+TOP_K = 1
+TOP_P = 0.1
+DEFAULT_TEMPERATURE = 1.0
 # Delay Reasons for Each Manufacturing Step
 DELAY_REASONS = {
     "Step 1": ["Delay in Bead Insertion", "Lack of raw material"],
 def load_video(video_data, strategy='chat'):
     """Loads and processes video data into a format suitable for model input."""
     bridge.set_bridge('torch')
     if isinstance(video_data, str):
         decord_vr = VideoReader(video_data, ctx=cpu(0))
     else:
         decord_vr = VideoReader(io.BytesIO(video_data), ctx=cpu(0))
     total_frames = len(decord_vr)
+    if total_frames < NUM_FRAMES:
         raise ValueError("Uploaded video is too short for meaningful analysis.")
     timestamps = [i[0] for i in decord_vr.get_frame_timestamp(np.arange(total_frames))]
         closest_num = min(timestamps, key=lambda x: abs(x - second))
         index = timestamps.index(closest_num)
         frame_id_list.append(index)
+        if len(frame_id_list) >= NUM_FRAMES:
             break
     video_data = decord_vr.get_batch(frame_id_list)
 def predict(prompt, video_data, temperature, model, tokenizer):
     """Generates predictions based on the video and textual prompt."""
+    try:
+        video = load_video(video_data, strategy='chat')
+    except ValueError as e:
+        return f"Error loading video: {str(e)}"
     inputs = model.build_conversation_input_ids(
         tokenizer=tokenizer,
     }
     gen_kwargs = {
+        "max_new_tokens": MAX_NEW_TOKENS,
         "pad_token_id": tokenizer.pad_token_id,
+        "top_k": TOP_K,
         "do_sample": False,
+        "top_p": TOP_P,
+        "temperature": temperature or DEFAULT_TEMPERATURE,
     }
     with torch.no_grad():
 Task: Analyze the provided video to identify the delay reason. Use the following format:
 1. **Selected Reason:** [Choose the most likely reason from the list above]
+2. **Visual Evidence:** [Describe specific visual cues from the video that support your analysis.]
 """