Update app.py
Browse files
app.py
CHANGED
|
@@ -403,17 +403,23 @@ def process_image_with_gradcam(image, model, device, pred_class):
|
|
| 403 |
|
| 404 |
# ----- BLIP Image Captioning -----
|
| 405 |
|
| 406 |
-
# Function to load BLIP captioning
|
| 407 |
@st.cache_resource
|
| 408 |
-
def
|
| 409 |
-
with st.spinner("Loading BLIP captioning
|
| 410 |
try:
|
| 411 |
-
|
| 412 |
-
|
| 413 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 414 |
except Exception as e:
|
| 415 |
-
st.error(f"Error loading BLIP
|
| 416 |
-
return None, None
|
| 417 |
|
| 418 |
# Function to generate image caption using BLIP's VQA approach for GradCAM
|
| 419 |
def generate_gradcam_caption(image, processor, model, max_length=60):
|
|
@@ -452,7 +458,7 @@ def generate_gradcam_caption(image, processor, model, max_length=60):
|
|
| 452 |
|
| 453 |
# Function to generate caption for original image
|
| 454 |
def generate_image_caption(image, processor, model, max_length=75, num_beams=5):
|
| 455 |
-
"""Generate a caption for the original image using BLIP model"""
|
| 456 |
try:
|
| 457 |
# Check for available GPU
|
| 458 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
@@ -594,8 +600,10 @@ def main():
|
|
| 594 |
|
| 595 |
if 'blip_model_loaded' not in st.session_state:
|
| 596 |
st.session_state.blip_model_loaded = False
|
| 597 |
-
st.session_state.
|
| 598 |
-
st.session_state.
|
|
|
|
|
|
|
| 599 |
|
| 600 |
# Initialize chat history
|
| 601 |
if 'chat_history' not in st.session_state:
|
|
@@ -625,17 +633,19 @@ def main():
|
|
| 625 |
with blip_col:
|
| 626 |
if not st.session_state.blip_model_loaded:
|
| 627 |
if st.button("π₯ Load BLIP for Captioning", type="primary"):
|
| 628 |
-
# Load BLIP
|
| 629 |
-
|
| 630 |
-
if
|
| 631 |
-
st.session_state.
|
| 632 |
-
st.session_state.
|
|
|
|
|
|
|
| 633 |
st.session_state.blip_model_loaded = True
|
| 634 |
-
st.success("β
BLIP captioning
|
| 635 |
else:
|
| 636 |
-
st.error("β Failed to load BLIP
|
| 637 |
else:
|
| 638 |
-
st.success("β
BLIP captioning
|
| 639 |
|
| 640 |
with llm_col:
|
| 641 |
if not st.session_state.llm_model_loaded:
|
|
@@ -672,8 +682,8 @@ def main():
|
|
| 672 |
with st.spinner("Generating image description..."):
|
| 673 |
caption = generate_image_caption(
|
| 674 |
image,
|
| 675 |
-
st.session_state.
|
| 676 |
-
st.session_state.
|
| 677 |
)
|
| 678 |
st.session_state.image_caption = caption
|
| 679 |
|
|
@@ -729,8 +739,8 @@ def main():
|
|
| 729 |
with st.spinner("Analyzing GradCAM visualization..."):
|
| 730 |
gradcam_caption = generate_gradcam_caption(
|
| 731 |
overlay,
|
| 732 |
-
st.session_state.
|
| 733 |
-
st.session_state.
|
| 734 |
)
|
| 735 |
st.session_state.gradcam_caption = gradcam_caption
|
| 736 |
|
|
|
|
| 403 |
|
| 404 |
# ----- BLIP Image Captioning -----
|
| 405 |
|
| 406 |
+
# Function to load BLIP captioning models
|
| 407 |
@st.cache_resource
|
| 408 |
+
def load_blip_models():
|
| 409 |
+
with st.spinner("Loading BLIP captioning models..."):
|
| 410 |
try:
|
| 411 |
+
# Load original BLIP model for general image captioning
|
| 412 |
+
original_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
|
| 413 |
+
original_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large")
|
| 414 |
+
|
| 415 |
+
# Load fine-tuned BLIP model for GradCAM analysis
|
| 416 |
+
finetuned_processor = BlipProcessor.from_pretrained("saakshigupta/deepfake-blip-large")
|
| 417 |
+
finetuned_model = BlipForConditionalGeneration.from_pretrained("saakshigupta/deepfake-blip-large")
|
| 418 |
+
|
| 419 |
+
return original_processor, original_model, finetuned_processor, finetuned_model
|
| 420 |
except Exception as e:
|
| 421 |
+
st.error(f"Error loading BLIP models: {str(e)}")
|
| 422 |
+
return None, None, None, None
|
| 423 |
|
| 424 |
# Function to generate image caption using BLIP's VQA approach for GradCAM
|
| 425 |
def generate_gradcam_caption(image, processor, model, max_length=60):
|
|
|
|
| 458 |
|
| 459 |
# Function to generate caption for original image
|
| 460 |
def generate_image_caption(image, processor, model, max_length=75, num_beams=5):
|
| 461 |
+
"""Generate a caption for the original image using the original BLIP model"""
|
| 462 |
try:
|
| 463 |
# Check for available GPU
|
| 464 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
|
|
| 600 |
|
| 601 |
if 'blip_model_loaded' not in st.session_state:
|
| 602 |
st.session_state.blip_model_loaded = False
|
| 603 |
+
st.session_state.original_processor = None
|
| 604 |
+
st.session_state.original_model = None
|
| 605 |
+
st.session_state.finetuned_processor = None
|
| 606 |
+
st.session_state.finetuned_model = None
|
| 607 |
|
| 608 |
# Initialize chat history
|
| 609 |
if 'chat_history' not in st.session_state:
|
|
|
|
| 633 |
with blip_col:
|
| 634 |
if not st.session_state.blip_model_loaded:
|
| 635 |
if st.button("π₯ Load BLIP for Captioning", type="primary"):
|
| 636 |
+
# Load BLIP models
|
| 637 |
+
original_processor, original_model, finetuned_processor, finetuned_model = load_blip_models()
|
| 638 |
+
if all([original_processor, original_model, finetuned_processor, finetuned_model]):
|
| 639 |
+
st.session_state.original_processor = original_processor
|
| 640 |
+
st.session_state.original_model = original_model
|
| 641 |
+
st.session_state.finetuned_processor = finetuned_processor
|
| 642 |
+
st.session_state.finetuned_model = finetuned_model
|
| 643 |
st.session_state.blip_model_loaded = True
|
| 644 |
+
st.success("β
BLIP captioning models loaded successfully!")
|
| 645 |
else:
|
| 646 |
+
st.error("β Failed to load BLIP models.")
|
| 647 |
else:
|
| 648 |
+
st.success("β
BLIP captioning models loaded and ready!")
|
| 649 |
|
| 650 |
with llm_col:
|
| 651 |
if not st.session_state.llm_model_loaded:
|
|
|
|
| 682 |
with st.spinner("Generating image description..."):
|
| 683 |
caption = generate_image_caption(
|
| 684 |
image,
|
| 685 |
+
st.session_state.original_processor,
|
| 686 |
+
st.session_state.original_model
|
| 687 |
)
|
| 688 |
st.session_state.image_caption = caption
|
| 689 |
|
|
|
|
| 739 |
with st.spinner("Analyzing GradCAM visualization..."):
|
| 740 |
gradcam_caption = generate_gradcam_caption(
|
| 741 |
overlay,
|
| 742 |
+
st.session_state.finetuned_processor,
|
| 743 |
+
st.session_state.finetuned_model
|
| 744 |
)
|
| 745 |
st.session_state.gradcam_caption = gradcam_caption
|
| 746 |
|