Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -360,39 +360,65 @@ def generate_gradcam_caption(image, processor, model, max_length=60):
|
|
| 360 |
Generate a detailed analysis of GradCAM visualization using the fine-tuned BLIP model
|
| 361 |
"""
|
| 362 |
try:
|
|
|
|
| 363 |
# Process image first
|
| 364 |
inputs = processor(image, return_tensors="pt")
|
| 365 |
|
| 366 |
# Check for available GPU and move model and inputs
|
| 367 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
|
|
| 368 |
model = model.to(device)
|
| 369 |
inputs = {k: v.to(device) if hasattr(v, 'to') else v for k, v in inputs.items()}
|
| 370 |
|
| 371 |
# Generate caption
|
|
|
|
| 372 |
with torch.no_grad():
|
| 373 |
output = model.generate(**inputs, max_length=max_length, num_beams=5)
|
| 374 |
|
| 375 |
# Decode the output
|
| 376 |
caption = processor.decode(output[0], skip_special_tokens=True)
|
|
|
|
| 377 |
|
| 378 |
-
#
|
| 379 |
-
|
| 380 |
-
|
| 381 |
-
|
| 382 |
-
|
| 383 |
-
|
| 384 |
-
|
| 385 |
-
|
| 386 |
-
|
| 387 |
-
|
| 388 |
-
|
| 389 |
-
if low_match:
|
| 390 |
-
formatted_text += f"**Low activation**:\n{low_match.strip()}"
|
| 391 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 392 |
return formatted_text.strip()
|
| 393 |
|
| 394 |
except Exception as e:
|
| 395 |
st.error(f"Error analyzing GradCAM: {str(e)}")
|
|
|
|
|
|
|
| 396 |
return "Error analyzing GradCAM visualization"
|
| 397 |
|
| 398 |
# Function to generate caption for original image
|
|
@@ -928,12 +954,18 @@ def main():
|
|
| 928 |
# Generate caption for GradCAM overlay image if BLIP model is loaded
|
| 929 |
if st.session_state.blip_model_loaded and overlay:
|
| 930 |
with st.spinner("Analyzing GradCAM visualization..."):
|
|
|
|
| 931 |
gradcam_caption = generate_gradcam_caption(
|
| 932 |
overlay,
|
| 933 |
st.session_state.finetuned_processor,
|
| 934 |
st.session_state.finetuned_model
|
| 935 |
)
|
| 936 |
st.session_state.gradcam_caption = gradcam_caption
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 937 |
except Exception as e:
|
| 938 |
st.error(f"Error generating GradCAM: {str(e)}")
|
| 939 |
import traceback
|
|
@@ -957,6 +989,11 @@ def main():
|
|
| 957 |
# Image Analysis Summary section - AFTER Stage 2
|
| 958 |
if hasattr(st.session_state, 'current_image') and (hasattr(st.session_state, 'image_caption') or hasattr(st.session_state, 'gradcam_caption')):
|
| 959 |
with st.expander("Image Analysis Summary", expanded=True):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 960 |
# Display images and analysis in organized layout
|
| 961 |
col1, col2 = st.columns([1, 2])
|
| 962 |
|
|
@@ -979,6 +1016,8 @@ def main():
|
|
| 979 |
st.markdown("### GradCAM Analysis")
|
| 980 |
st.markdown(st.session_state.gradcam_caption)
|
| 981 |
st.markdown("---")
|
|
|
|
|
|
|
| 982 |
|
| 983 |
# LLM Analysis section - AFTER Image Analysis Summary
|
| 984 |
with st.expander("Stage 3: Detailed Analysis with Vision LLM", expanded=False):
|
|
|
|
| 360 |
Generate a detailed analysis of GradCAM visualization using the fine-tuned BLIP model
|
| 361 |
"""
|
| 362 |
try:
|
| 363 |
+
st.write("Debug: Starting GradCAM caption generation")
|
| 364 |
# Process image first
|
| 365 |
inputs = processor(image, return_tensors="pt")
|
| 366 |
|
| 367 |
# Check for available GPU and move model and inputs
|
| 368 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 369 |
+
st.write(f"Debug: Using device: {device}")
|
| 370 |
model = model.to(device)
|
| 371 |
inputs = {k: v.to(device) if hasattr(v, 'to') else v for k, v in inputs.items()}
|
| 372 |
|
| 373 |
# Generate caption
|
| 374 |
+
st.write("Debug: Generating caption...")
|
| 375 |
with torch.no_grad():
|
| 376 |
output = model.generate(**inputs, max_length=max_length, num_beams=5)
|
| 377 |
|
| 378 |
# Decode the output
|
| 379 |
caption = processor.decode(output[0], skip_special_tokens=True)
|
| 380 |
+
st.write(f"Debug: Raw caption: {caption}")
|
| 381 |
|
| 382 |
+
# Try to parse the caption based on different possible formats
|
| 383 |
+
try:
|
| 384 |
+
# Original format with "high activation:" etc.
|
| 385 |
+
formatted_text = ""
|
| 386 |
+
if "high activation :" in caption:
|
| 387 |
+
high_match = caption.split("high activation :")[1].split("moderate")[0]
|
| 388 |
+
formatted_text += f"**High activation**:\n{high_match.strip()}\n\n"
|
| 389 |
+
|
| 390 |
+
if "moderate activation :" in caption:
|
| 391 |
+
moderate_match = caption.split("moderate activation :")[1].split("low")[0]
|
| 392 |
+
formatted_text += f"**Moderate activation**:\n{moderate_match.strip()}\n\n"
|
|
|
|
|
|
|
| 393 |
|
| 394 |
+
if "low activation :" in caption:
|
| 395 |
+
low_match = caption.split("low activation :")[1]
|
| 396 |
+
formatted_text += f"**Low activation**:\n{low_match.strip()}"
|
| 397 |
+
|
| 398 |
+
# If nothing was extracted using the original format, try alternative formats
|
| 399 |
+
if not formatted_text.strip():
|
| 400 |
+
st.write("Debug: Trying alternative format parsing")
|
| 401 |
+
|
| 402 |
+
# Check for newer format that might be in the Xception model
|
| 403 |
+
if ":" in caption:
|
| 404 |
+
parts = caption.split(":")
|
| 405 |
+
if len(parts) > 1:
|
| 406 |
+
formatted_text = f"**GradCAM Analysis**:\n{parts[1].strip()}"
|
| 407 |
+
else:
|
| 408 |
+
# As a fallback, just use the entire caption
|
| 409 |
+
formatted_text = f"**GradCAM Analysis**:\n{caption.strip()}"
|
| 410 |
+
except Exception as parsing_error:
|
| 411 |
+
st.write(f"Debug: Error parsing caption format: {str(parsing_error)}")
|
| 412 |
+
# Use the entire caption as is
|
| 413 |
+
formatted_text = f"**GradCAM Analysis**:\n{caption.strip()}"
|
| 414 |
+
|
| 415 |
+
st.write(f"Debug: Formatted caption complete. Length: {len(formatted_text)}")
|
| 416 |
return formatted_text.strip()
|
| 417 |
|
| 418 |
except Exception as e:
|
| 419 |
st.error(f"Error analyzing GradCAM: {str(e)}")
|
| 420 |
+
import traceback
|
| 421 |
+
st.error(traceback.format_exc())
|
| 422 |
return "Error analyzing GradCAM visualization"
|
| 423 |
|
| 424 |
# Function to generate caption for original image
|
|
|
|
| 954 |
# Generate caption for GradCAM overlay image if BLIP model is loaded
|
| 955 |
if st.session_state.blip_model_loaded and overlay:
|
| 956 |
with st.spinner("Analyzing GradCAM visualization..."):
|
| 957 |
+
st.write("Debug: Starting GradCAM analysis")
|
| 958 |
gradcam_caption = generate_gradcam_caption(
|
| 959 |
overlay,
|
| 960 |
st.session_state.finetuned_processor,
|
| 961 |
st.session_state.finetuned_model
|
| 962 |
)
|
| 963 |
st.session_state.gradcam_caption = gradcam_caption
|
| 964 |
+
st.write(f"Debug: Saved GradCAM caption to session state, length: {len(gradcam_caption) if gradcam_caption else 0}")
|
| 965 |
+
|
| 966 |
+
# Display the caption directly here as well for immediate feedback
|
| 967 |
+
st.markdown("### GradCAM Analysis (Direct)")
|
| 968 |
+
st.markdown(gradcam_caption)
|
| 969 |
except Exception as e:
|
| 970 |
st.error(f"Error generating GradCAM: {str(e)}")
|
| 971 |
import traceback
|
|
|
|
| 989 |
# Image Analysis Summary section - AFTER Stage 2
|
| 990 |
if hasattr(st.session_state, 'current_image') and (hasattr(st.session_state, 'image_caption') or hasattr(st.session_state, 'gradcam_caption')):
|
| 991 |
with st.expander("Image Analysis Summary", expanded=True):
|
| 992 |
+
st.write(f"Debug: Image caption exists: {hasattr(st.session_state, 'image_caption')}")
|
| 993 |
+
st.write(f"Debug: GradCAM caption exists: {hasattr(st.session_state, 'gradcam_caption')}")
|
| 994 |
+
if hasattr(st.session_state, 'gradcam_caption'):
|
| 995 |
+
st.write(f"Debug: GradCAM caption length: {len(st.session_state.gradcam_caption)}")
|
| 996 |
+
|
| 997 |
# Display images and analysis in organized layout
|
| 998 |
col1, col2 = st.columns([1, 2])
|
| 999 |
|
|
|
|
| 1016 |
st.markdown("### GradCAM Analysis")
|
| 1017 |
st.markdown(st.session_state.gradcam_caption)
|
| 1018 |
st.markdown("---")
|
| 1019 |
+
else:
|
| 1020 |
+
st.warning("GradCAM caption not found in session state.")
|
| 1021 |
|
| 1022 |
# LLM Analysis section - AFTER Image Analysis Summary
|
| 1023 |
with st.expander("Stage 3: Detailed Analysis with Vision LLM", expanded=False):
|