Spaces:
Runtime error
Runtime error
Fix torch_compile parameter error
Browse files
app.py
CHANGED
|
@@ -20,7 +20,23 @@ TITLE = """
|
|
| 20 |
|
| 21 |
print("π Loading reliable JoyCaption system...")
|
| 22 |
|
| 23 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
@torch.no_grad()
|
| 25 |
def caption_image_optimized(image, style, length):
|
| 26 |
"""Ultra-optimized JoyCaption that won't get stuck"""
|
|
@@ -31,23 +47,7 @@ def caption_image_optimized(image, style, length):
|
|
| 31 |
start_time = time.time()
|
| 32 |
|
| 33 |
try:
|
| 34 |
-
print(f"
|
| 35 |
-
|
| 36 |
-
# Load with maximum optimization
|
| 37 |
-
processor = AutoProcessor.from_pretrained(
|
| 38 |
-
MODEL_PATH,
|
| 39 |
-
low_cpu_mem_usage=True
|
| 40 |
-
)
|
| 41 |
-
|
| 42 |
-
model = LlavaForConditionalGeneration.from_pretrained(
|
| 43 |
-
MODEL_PATH,
|
| 44 |
-
torch_dtype=torch.bfloat16,
|
| 45 |
-
device_map="auto",
|
| 46 |
-
low_cpu_mem_usage=True
|
| 47 |
-
)
|
| 48 |
-
model.eval()
|
| 49 |
-
|
| 50 |
-
print(f"β
Model loaded at {time.time() - start_time:.1f}s")
|
| 51 |
|
| 52 |
# Optimized prompts based on length
|
| 53 |
if length == "Short":
|
|
@@ -125,8 +125,8 @@ def caption_image_optimized(image, style, length):
|
|
| 125 |
result = result.split(split_marker)[-1].strip()
|
| 126 |
break
|
| 127 |
|
| 128 |
-
# Clean up
|
| 129 |
-
del
|
| 130 |
torch.cuda.empty_cache()
|
| 131 |
gc.collect()
|
| 132 |
|
|
@@ -141,10 +141,10 @@ def caption_image_optimized(image, style, length):
|
|
| 141 |
except Exception as e:
|
| 142 |
# Emergency cleanup
|
| 143 |
try:
|
| 144 |
-
if '
|
| 145 |
-
del
|
| 146 |
-
if '
|
| 147 |
-
del
|
| 148 |
torch.cuda.empty_cache()
|
| 149 |
gc.collect()
|
| 150 |
except:
|
|
|
|
| 20 |
|
| 21 |
print("π Loading reliable JoyCaption system...")
|
| 22 |
|
| 23 |
+
# Load model and processor at startup (ONCE)
|
| 24 |
+
print("π¦ Loading model and processor at startup...")
|
| 25 |
+
processor = AutoProcessor.from_pretrained(
|
| 26 |
+
MODEL_PATH,
|
| 27 |
+
low_cpu_mem_usage=True
|
| 28 |
+
)
|
| 29 |
+
|
| 30 |
+
model = LlavaForConditionalGeneration.from_pretrained(
|
| 31 |
+
MODEL_PATH,
|
| 32 |
+
torch_dtype=torch.bfloat16,
|
| 33 |
+
device_map="auto",
|
| 34 |
+
low_cpu_mem_usage=True
|
| 35 |
+
)
|
| 36 |
+
model.eval()
|
| 37 |
+
print("β
Model loaded and ready!")
|
| 38 |
+
|
| 39 |
+
@spaces.GPU(duration=30) # Shorter duration since no model loading
|
| 40 |
@torch.no_grad()
|
| 41 |
def caption_image_optimized(image, style, length):
|
| 42 |
"""Ultra-optimized JoyCaption that won't get stuck"""
|
|
|
|
| 47 |
start_time = time.time()
|
| 48 |
|
| 49 |
try:
|
| 50 |
+
print(f"π― Starting generation at {time.time() - start_time:.1f}s...")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
|
| 52 |
# Optimized prompts based on length
|
| 53 |
if length == "Short":
|
|
|
|
| 125 |
result = result.split(split_marker)[-1].strip()
|
| 126 |
break
|
| 127 |
|
| 128 |
+
# Clean up inputs and output (but NOT the global model/processor)
|
| 129 |
+
del inputs, output
|
| 130 |
torch.cuda.empty_cache()
|
| 131 |
gc.collect()
|
| 132 |
|
|
|
|
| 141 |
except Exception as e:
|
| 142 |
# Emergency cleanup
|
| 143 |
try:
|
| 144 |
+
if 'inputs' in locals():
|
| 145 |
+
del inputs
|
| 146 |
+
if 'output' in locals():
|
| 147 |
+
del output
|
| 148 |
torch.cuda.empty_cache()
|
| 149 |
gc.collect()
|
| 150 |
except:
|