Spaces:

whyumesh
/

fusion

Configuration error

App Files Files Community

whyumesh commited on Nov 7, 2024

Commit

f68cd65

verified ·

1 Parent(s): 753217c

Update app.py

Browse files

Files changed (1) hide show

app.py +75 -34

app.py CHANGED Viewed

@@ -14,45 +14,57 @@ import spaces
 from huggingface_hub import login
 import os
 # Add login function at the start
 def init_huggingface_auth():
     # Get token from environment variable or set it directly
     token = os.getenv("HUGGINGFACE_TOKEN")
     if token:
         login(token=token)
     else:
-        print("Warning: HUGGINGFACE_TOKEN not found in environment variables")
 # Load both models and their processors/tokenizers
 def load_models():
-    # Initialize HF auth before loading models
-    init_huggingface_auth()
-    # Vision model
-    vision_model = Qwen2VLForConditionalGeneration.from_pretrained(
-        "Qwen/Qwen2-VL-2B-Instruct",
-        torch_dtype=torch.float16,
-        device_map="auto",
-        use_auth_token=True  # Add auth token usage
-    )
-    vision_processor = AutoProcessor.from_pretrained(
-        "Qwen/Qwen2-VL-2B-Instruct",
-        use_auth_token=True  # Add auth token usage
-    )
-    # Code model
-    code_model = AutoModelForCausalLM.from_pretrained(
-        "Qwen/Qwen2.5-Coder-1.5B-Instruct",
-        torch_dtype=torch.float16,
-        device_map="auto",
-        use_auth_token=True  # Add auth token usage
-    )
-    code_tokenizer = AutoTokenizer.from_pretrained(
-        "Qwen/Qwen2.5-Coder-1.5B-Instruct",
-        use_auth_token=True  # Add auth token usage
-    )
-    return vision_model, vision_processor, code_model, code_tokenizer
 vision_model, vision_processor, code_model, code_tokenizer = load_models()
@@ -189,11 +201,38 @@ def process_for_code(vision_description):
 @spaces.GPU
 def process_content(video, transcribed_text):
-    if video is None:
-        return "Please upload a video file of code with errors.", ""
-    vision_output, code_output = process_video_for_code(video.name, transcribed_text)
-    return vision_output, code_output
 # Gradio interface
 iface = gr.Interface(
@@ -207,7 +246,9 @@ iface = gr.Interface(
         gr.Code(label="Fixed Code", language="python")
     ],
     title="Vision Code Debugger",
-    description="Upload a video of code with errors and provide transcribed audio, and the AI will analyze and fix the issues."
 )
 if __name__ == "__main__":

 from huggingface_hub import login
 import os
+# Add quota management constants
+MAX_GPU_TIME_PER_REQUEST = 30  # seconds
+COOLDOWN_PERIOD = 300  # 5 minutes in seconds
 # Add login function at the start
 def init_huggingface_auth():
     # Get token from environment variable or set it directly
     token = os.getenv("HUGGINGFACE_TOKEN")
     if token:
         login(token=token)
+        print("Successfully authenticated with Hugging Face")
     else:
+        raise ValueError("HUGGINGFACE_TOKEN not found in environment variables")
 # Load both models and their processors/tokenizers
 def load_models():
+    try:
+        # Initialize HF auth before loading models
+        init_huggingface_auth()
+        # Vision model
+        vision_model = Qwen2VLForConditionalGeneration.from_pretrained(
+            "Qwen/Qwen2-VL-2B-Instruct",
+            torch_dtype=torch.float16,
+            device_map="auto",
+            use_auth_token=True  # Add auth token usage
+        )
+        vision_processor = AutoProcessor.from_pretrained(
+            "Qwen/Qwen2-VL-2B-Instruct",
+            use_auth_token=True  # Add auth token usage
+        )
+        # Code model
+        code_model = AutoModelForCausalLM.from_pretrained(
+            "Qwen/Qwen2.5-Coder-1.5B-Instruct",
+            torch_dtype=torch.float16,
+            device_map="auto",
+            use_auth_token=True  # Add auth token usage
+        )
+        code_tokenizer = AutoTokenizer.from_pretrained(
+            "Qwen/Qwen2.5-Coder-1.5B-Instruct",
+            use_auth_token=True  # Add auth token usage
+        )
+        # Free up CUDA memory after loading
+        torch.cuda.empty_cache()
+        return vision_model, vision_processor, code_model, code_tokenizer
+    except Exception as e:
+        print(f"Error loading models: {str(e)}")
+        raise
 vision_model, vision_processor, code_model, code_tokenizer = load_models()
 @spaces.GPU
 def process_content(video, transcribed_text):
+    try:
+        if video is None:
+            return "Please upload a video file of code with errors.", ""
+        # Add GPU memory management
+        torch.cuda.empty_cache()
+        # Check available GPU memory
+        if torch.cuda.is_available():
+            available_memory = torch.cuda.get_device_properties(0).total_memory
+            if available_memory < 1e9:  # Less than 1GB available
+                raise RuntimeError("Insufficient GPU memory available")
+        vision_output, code_output = process_video_for_code(
+            video.name,
+            transcribed_text,
+            max_frames=8  # Reduced from 16 to lower GPU usage
+        )
+        return vision_output, code_output
+    except spaces.zero.gradio.HTMLError as e:
+        if "exceeded your GPU quota" in str(e):
+            return (
+                "GPU quota exceeded. Please try again later or consider upgrading to a paid plan.",
+                ""
+            )
+    except Exception as e:
+        return f"Error processing content: {str(e)}", ""
+    finally:
+        # Clean up GPU memory
+        torch.cuda.empty_cache()
 # Gradio interface
 iface = gr.Interface(
         gr.Code(label="Fixed Code", language="python")
     ],
     title="Vision Code Debugger",
+    description="Upload a video of code with errors and provide transcribed audio, and the AI will analyze and fix the issues.",
+    allow_flagging="never",  # Disable flagging to reduce overhead
+    cache_examples=True  # Enable caching to reduce GPU usage
 )
 if __name__ == "__main__":