Update app.py
Browse files
app.py
CHANGED
|
@@ -408,8 +408,8 @@ def process_image_with_gradcam(image, model, device, pred_class):
|
|
| 408 |
def load_blip_model():
|
| 409 |
with st.spinner("Loading BLIP captioning model..."):
|
| 410 |
try:
|
| 411 |
-
processor = BlipProcessor.from_pretrained("
|
| 412 |
-
model = BlipForConditionalGeneration.from_pretrained("
|
| 413 |
return processor, model
|
| 414 |
except Exception as e:
|
| 415 |
st.error(f"Error loading BLIP model: {str(e)}")
|
|
@@ -418,40 +418,31 @@ def load_blip_model():
|
|
| 418 |
# Function to generate image caption using BLIP's VQA approach for GradCAM
|
| 419 |
def generate_gradcam_caption(image, processor, model, max_length=60):
|
| 420 |
"""
|
| 421 |
-
Generate a detailed analysis of GradCAM visualization using
|
| 422 |
"""
|
| 423 |
try:
|
| 424 |
# Check for available GPU
|
| 425 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 426 |
model = model.to(device)
|
| 427 |
|
| 428 |
-
#
|
| 429 |
-
|
| 430 |
-
"What facial features are highlighted by the red and yellow areas in this heatmap?",
|
| 431 |
-
"What does this facial heat map visualization show?",
|
| 432 |
-
"What patterns do you see in this facial heatmap visualization?"
|
| 433 |
-
]
|
| 434 |
|
| 435 |
-
#
|
| 436 |
-
|
| 437 |
-
|
| 438 |
-
inputs = processor(image, text=question, return_tensors="pt").to(device)
|
| 439 |
-
with torch.no_grad():
|
| 440 |
-
output = model.generate(**inputs, max_length=max_length, num_beams=5)
|
| 441 |
-
answer = processor.decode(output[0], skip_special_tokens=True)
|
| 442 |
-
answers.append(answer)
|
| 443 |
|
| 444 |
-
#
|
|
|
|
|
|
|
|
|
|
| 445 |
structured_output = f"""
|
| 446 |
-
**
|
| 447 |
-
|
| 448 |
-
**High Activation Regions**: The red/yellow areas highlight {answers[0]}
|
| 449 |
-
|
| 450 |
-
**Medium Activation Regions**: The green/cyan areas correspond to regions of medium importance in the detection process, typically including parts of the face and surrounding areas.
|
| 451 |
-
|
| 452 |
-
**Low Activation Regions**: The blue/dark blue areas represent features that have less impact on the model's decision, usually the background and peripheral elements.
|
| 453 |
|
| 454 |
-
**
|
|
|
|
|
|
|
|
|
|
| 455 |
"""
|
| 456 |
return structured_output.strip()
|
| 457 |
|
|
|
|
| 408 |
def load_blip_model():
|
| 409 |
with st.spinner("Loading BLIP captioning model..."):
|
| 410 |
try:
|
| 411 |
+
processor = BlipProcessor.from_pretrained("saakshigupta/deepfake-blip-large")
|
| 412 |
+
model = BlipForConditionalGeneration.from_pretrained("saakshigupta/deepfake-blip-large")
|
| 413 |
return processor, model
|
| 414 |
except Exception as e:
|
| 415 |
st.error(f"Error loading BLIP model: {str(e)}")
|
|
|
|
| 418 |
# Function to generate image caption using BLIP's VQA approach for GradCAM
|
| 419 |
def generate_gradcam_caption(image, processor, model, max_length=60):
|
| 420 |
"""
|
| 421 |
+
Generate a detailed analysis of GradCAM visualization using the fine-tuned BLIP model
|
| 422 |
"""
|
| 423 |
try:
|
| 424 |
# Check for available GPU
|
| 425 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 426 |
model = model.to(device)
|
| 427 |
|
| 428 |
+
# Process image with BLIP
|
| 429 |
+
inputs = processor(image, return_tensors="pt").to(device)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 430 |
|
| 431 |
+
# Generate caption
|
| 432 |
+
with torch.no_grad():
|
| 433 |
+
output = model.generate(**inputs, max_length=max_length, num_beams=5)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 434 |
|
| 435 |
+
# Decode the output
|
| 436 |
+
caption = processor.decode(output[0], skip_special_tokens=True)
|
| 437 |
+
|
| 438 |
+
# Format into structured analysis
|
| 439 |
structured_output = f"""
|
| 440 |
+
**Heatmap Analysis**: {caption}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 441 |
|
| 442 |
+
**Key Observations**:
|
| 443 |
+
- The red/yellow regions indicate areas of high importance in the detection process
|
| 444 |
+
- The green/cyan areas show regions of medium importance
|
| 445 |
+
- The blue/dark blue regions represent features with lower impact on the model's decision
|
| 446 |
"""
|
| 447 |
return structured_output.strip()
|
| 448 |
|