Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python | |
| # coding: utf-8 | |
| # In[5]: | |
| import streamlit as st | |
| from PIL import Image | |
| import torch | |
| import requests | |
| from transformers import BlipProcessor, BlipForQuestionAnswering,BlipImageProcessor, AutoProcessor | |
| from transformers import BlipConfig | |
| from datasets import load_dataset | |
| from torch.utils.data import DataLoader | |
| from tqdm.notebook import tqdm | |
| import numpy as np | |
| import matplotlib.pyplot as plt | |
| from IPython.display import display | |
| text_processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-base") | |
| image_processor = BlipImageProcessor.from_pretrained("Salesforce/blip-vqa-base") | |
| model = BlipForQuestionAnswering.from_pretrained(r"blip_model_v2_epo89" ) | |
| def preprocess_image(image): | |
| # Your image preprocessing logic here... | |
| # Example: Resize image to 128x128 pixels | |
| image = image.resize((128, 128)) | |
| image_encoding = image_processor(image, | |
| do_resize=True, | |
| size=(128, 128), | |
| return_tensors="pt") | |
| return image_encoding["pixel_values"][0] | |
| def preprocess_text(text, max_length=32): | |
| # Your text preprocessing logic here... | |
| encoding = text_processor( | |
| None, | |
| text, | |
| padding="max_length", | |
| truncation=True, | |
| max_length=max_length, | |
| return_tensors="pt" | |
| ) | |
| for k, v in encoding.items(): | |
| encoding[k] = v.squeeze() | |
| return encoding | |
| def predict(image, question): | |
| # Preprocess image | |
| pixel_values = preprocess_image(image).unsqueeze(0) | |
| # Preprocess text | |
| encoding = preprocess_text(question) | |
| # Print shapes for debugging | |
| #print("Pixel Values Shape:", pixel_values.shape) | |
| #print("Input IDs Shape:", encoding['input_ids'].unsqueeze(0).shape) | |
| # Perform prediction using your model | |
| # Example: Replace this with your actual prediction logic | |
| model.eval() | |
| outputs = model.generate(pixel_values=pixel_values, input_ids=encoding['input_ids'].unsqueeze(0)) | |
| prediction_result = text_processor.decode(outputs[0], skip_special_tokens=True) | |
| return prediction_result | |
| def main(): | |
| st.title("PathoAgent") | |
| # Image upload | |
| st.subheader("Upload Image") | |
| uploaded_file = st.file_uploader("Choose a file", type=["jpg", "png", "jpeg"]) | |
| # Text input | |
| st.subheader("Input Question") | |
| text_input = st.text_area("Enter text here:") | |
| # Display uploaded image | |
| if uploaded_file is not None: | |
| image = Image.open(uploaded_file).convert('RGB') | |
| #resized_img = image.resize((10,10)) | |
| st.image(image, caption="Uploaded Image.", use_column_width=True) | |
| # Predict button | |
| if st.button("Predict"): | |
| if uploaded_file is not None and text_input: | |
| # Perform prediction | |
| prediction_result = predict(image, text_input) | |
| # Display input text | |
| st.subheader("Input Question:") | |
| st.write(text_input) | |
| # Display prediction result | |
| st.subheader("Prediction Result:") | |
| st.write(prediction_result) | |
| if __name__ == "__main__": | |
| main() | |
| # streamlit run Streamlit.py |