Update app.py
Browse files
app.py
CHANGED
|
@@ -14,45 +14,57 @@ import spaces
|
|
| 14 |
from huggingface_hub import login
|
| 15 |
import os
|
| 16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
# Add login function at the start
|
| 18 |
def init_huggingface_auth():
|
| 19 |
# Get token from environment variable or set it directly
|
| 20 |
token = os.getenv("HUGGINGFACE_TOKEN")
|
| 21 |
if token:
|
| 22 |
login(token=token)
|
|
|
|
| 23 |
else:
|
| 24 |
-
|
| 25 |
|
| 26 |
# Load both models and their processors/tokenizers
|
| 27 |
def load_models():
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
|
| 57 |
vision_model, vision_processor, code_model, code_tokenizer = load_models()
|
| 58 |
|
|
@@ -189,11 +201,38 @@ def process_for_code(vision_description):
|
|
| 189 |
|
| 190 |
@spaces.GPU
|
| 191 |
def process_content(video, transcribed_text):
|
| 192 |
-
|
| 193 |
-
|
|
|
|
| 194 |
|
| 195 |
-
|
| 196 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 197 |
|
| 198 |
# Gradio interface
|
| 199 |
iface = gr.Interface(
|
|
@@ -207,7 +246,9 @@ iface = gr.Interface(
|
|
| 207 |
gr.Code(label="Fixed Code", language="python")
|
| 208 |
],
|
| 209 |
title="Vision Code Debugger",
|
| 210 |
-
description="Upload a video of code with errors and provide transcribed audio, and the AI will analyze and fix the issues."
|
|
|
|
|
|
|
| 211 |
)
|
| 212 |
|
| 213 |
if __name__ == "__main__":
|
|
|
|
| 14 |
from huggingface_hub import login
|
| 15 |
import os
|
| 16 |
|
| 17 |
+
# Add quota management constants
|
| 18 |
+
MAX_GPU_TIME_PER_REQUEST = 30 # seconds
|
| 19 |
+
COOLDOWN_PERIOD = 300 # 5 minutes in seconds
|
| 20 |
+
|
| 21 |
# Add login function at the start
|
| 22 |
def init_huggingface_auth():
|
| 23 |
# Get token from environment variable or set it directly
|
| 24 |
token = os.getenv("HUGGINGFACE_TOKEN")
|
| 25 |
if token:
|
| 26 |
login(token=token)
|
| 27 |
+
print("Successfully authenticated with Hugging Face")
|
| 28 |
else:
|
| 29 |
+
raise ValueError("HUGGINGFACE_TOKEN not found in environment variables")
|
| 30 |
|
| 31 |
# Load both models and their processors/tokenizers
|
| 32 |
def load_models():
|
| 33 |
+
try:
|
| 34 |
+
# Initialize HF auth before loading models
|
| 35 |
+
init_huggingface_auth()
|
| 36 |
+
|
| 37 |
+
# Vision model
|
| 38 |
+
vision_model = Qwen2VLForConditionalGeneration.from_pretrained(
|
| 39 |
+
"Qwen/Qwen2-VL-2B-Instruct",
|
| 40 |
+
torch_dtype=torch.float16,
|
| 41 |
+
device_map="auto",
|
| 42 |
+
use_auth_token=True # Add auth token usage
|
| 43 |
+
)
|
| 44 |
+
vision_processor = AutoProcessor.from_pretrained(
|
| 45 |
+
"Qwen/Qwen2-VL-2B-Instruct",
|
| 46 |
+
use_auth_token=True # Add auth token usage
|
| 47 |
+
)
|
| 48 |
+
|
| 49 |
+
# Code model
|
| 50 |
+
code_model = AutoModelForCausalLM.from_pretrained(
|
| 51 |
+
"Qwen/Qwen2.5-Coder-1.5B-Instruct",
|
| 52 |
+
torch_dtype=torch.float16,
|
| 53 |
+
device_map="auto",
|
| 54 |
+
use_auth_token=True # Add auth token usage
|
| 55 |
+
)
|
| 56 |
+
code_tokenizer = AutoTokenizer.from_pretrained(
|
| 57 |
+
"Qwen/Qwen2.5-Coder-1.5B-Instruct",
|
| 58 |
+
use_auth_token=True # Add auth token usage
|
| 59 |
+
)
|
| 60 |
+
|
| 61 |
+
# Free up CUDA memory after loading
|
| 62 |
+
torch.cuda.empty_cache()
|
| 63 |
+
|
| 64 |
+
return vision_model, vision_processor, code_model, code_tokenizer
|
| 65 |
+
except Exception as e:
|
| 66 |
+
print(f"Error loading models: {str(e)}")
|
| 67 |
+
raise
|
| 68 |
|
| 69 |
vision_model, vision_processor, code_model, code_tokenizer = load_models()
|
| 70 |
|
|
|
|
| 201 |
|
| 202 |
@spaces.GPU
|
| 203 |
def process_content(video, transcribed_text):
|
| 204 |
+
try:
|
| 205 |
+
if video is None:
|
| 206 |
+
return "Please upload a video file of code with errors.", ""
|
| 207 |
|
| 208 |
+
# Add GPU memory management
|
| 209 |
+
torch.cuda.empty_cache()
|
| 210 |
+
|
| 211 |
+
# Check available GPU memory
|
| 212 |
+
if torch.cuda.is_available():
|
| 213 |
+
available_memory = torch.cuda.get_device_properties(0).total_memory
|
| 214 |
+
if available_memory < 1e9: # Less than 1GB available
|
| 215 |
+
raise RuntimeError("Insufficient GPU memory available")
|
| 216 |
+
|
| 217 |
+
vision_output, code_output = process_video_for_code(
|
| 218 |
+
video.name,
|
| 219 |
+
transcribed_text,
|
| 220 |
+
max_frames=8 # Reduced from 16 to lower GPU usage
|
| 221 |
+
)
|
| 222 |
+
|
| 223 |
+
return vision_output, code_output
|
| 224 |
+
|
| 225 |
+
except spaces.zero.gradio.HTMLError as e:
|
| 226 |
+
if "exceeded your GPU quota" in str(e):
|
| 227 |
+
return (
|
| 228 |
+
"GPU quota exceeded. Please try again later or consider upgrading to a paid plan.",
|
| 229 |
+
""
|
| 230 |
+
)
|
| 231 |
+
except Exception as e:
|
| 232 |
+
return f"Error processing content: {str(e)}", ""
|
| 233 |
+
finally:
|
| 234 |
+
# Clean up GPU memory
|
| 235 |
+
torch.cuda.empty_cache()
|
| 236 |
|
| 237 |
# Gradio interface
|
| 238 |
iface = gr.Interface(
|
|
|
|
| 246 |
gr.Code(label="Fixed Code", language="python")
|
| 247 |
],
|
| 248 |
title="Vision Code Debugger",
|
| 249 |
+
description="Upload a video of code with errors and provide transcribed audio, and the AI will analyze and fix the issues.",
|
| 250 |
+
allow_flagging="never", # Disable flagging to reduce overhead
|
| 251 |
+
cache_examples=True # Enable caching to reduce GPU usage
|
| 252 |
)
|
| 253 |
|
| 254 |
if __name__ == "__main__":
|