Spaces:

Sayed121
/

PathoAgent

Sleeping

App Files Files Community

PathoAgent / app.py

Sayed121

Rename Streamlit.py to app.py

800cf29 about 2 years ago

raw

history blame contribute delete

3.15 kB

	#!/usr/bin/env python
	# coding: utf-8

	# In[5]:


	import streamlit as st
	from PIL import Image
	import torch
	import requests
	from transformers import BlipProcessor, BlipForQuestionAnswering,BlipImageProcessor, AutoProcessor
	from transformers import BlipConfig
	from datasets import load_dataset
	from torch.utils.data import DataLoader
	from tqdm.notebook import tqdm

	import numpy as np
	import matplotlib.pyplot as plt
	from IPython.display import display

	text_processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-base")
	image_processor = BlipImageProcessor.from_pretrained("Salesforce/blip-vqa-base")
	model = BlipForQuestionAnswering.from_pretrained(r"blip_model_v2_epo89" )


	def preprocess_image(image):
	# Your image preprocessing logic here...
	# Example: Resize image to 128x128 pixels
	image = image.resize((128, 128))
	image_encoding = image_processor(image,
	do_resize=True,
	size=(128, 128),
	return_tensors="pt")
	return image_encoding["pixel_values"][0]

	def preprocess_text(text, max_length=32):
	# Your text preprocessing logic here...
	encoding = text_processor(
	None,
	text,
	padding="max_length",
	truncation=True,
	max_length=max_length,
	return_tensors="pt"
	)

	for k, v in encoding.items():
	encoding[k] = v.squeeze()
	return encoding

	def predict(image, question):
	# Preprocess image
	pixel_values = preprocess_image(image).unsqueeze(0)

	# Preprocess text
	encoding = preprocess_text(question)

	# Print shapes for debugging
	#print("Pixel Values Shape:", pixel_values.shape)
	#print("Input IDs Shape:", encoding['input_ids'].unsqueeze(0).shape)

	# Perform prediction using your model
	# Example: Replace this with your actual prediction logic
	model.eval()
	outputs = model.generate(pixel_values=pixel_values, input_ids=encoding['input_ids'].unsqueeze(0))

	prediction_result = text_processor.decode(outputs[0], skip_special_tokens=True)

	return prediction_result

	def main():
	st.title("PathoAgent")

	# Image upload
	st.subheader("Upload Image")
	uploaded_file = st.file_uploader("Choose a file", type=["jpg", "png", "jpeg"])

	# Text input
	st.subheader("Input Question")
	text_input = st.text_area("Enter text here:")

	# Display uploaded image
	if uploaded_file is not None:
	image = Image.open(uploaded_file).convert('RGB')
	#resized_img = image.resize((10,10))
	st.image(image, caption="Uploaded Image.", use_column_width=True)



	# Predict button
	if st.button("Predict"):
	if uploaded_file is not None and text_input:
	# Perform prediction
	prediction_result = predict(image, text_input)

	# Display input text
	st.subheader("Input Question:")
	st.write(text_input)
	# Display prediction result
	st.subheader("Prediction Result:")
	st.write(prediction_result)

	if __name__ == "__main__":
	main()


	# streamlit run Streamlit.py