Spaces:

sergioska
/

image-classifier

Runtime error

App Files Files Community

image-classifier / app.py

sergioska

add a button to apply tags

4e2deeb over 2 years ago

raw

history blame

2.53 kB

	import torch
	import streamlit as st
	import numpy as np
	from PIL import Image
	from transformers import pipeline
	from transformers import OwlViTProcessor, OwlViTForObjectDetection
	from tempfile import NamedTemporaryFile

	audiopipe = pipeline("automatic-speech-recognition", model="openai/whisper-large-v3")
	#imagepipe = pipeline("image-classification", model="Kaludi/food-category-classification-v2.0")
	#imagepipe = pipeline("image-classification", model="nateraw/food")
	imagepipe = pipeline("image-classification", model="flatmoon102/fruits_and_vegetables_image_classification")

	processor = OwlViTProcessor.from_pretrained("google/owlvit-base-patch32")
	model = OwlViTForObjectDetection.from_pretrained("google/owlvit-base-patch32")

	st.title('Upload an audio file for speech recognition')

	uploaded_audio_file = st.file_uploader("Choose an audio file (wav)")
	if uploaded_audio_file is not None:
	with NamedTemporaryFile(suffix="wav") as temp:
	temp.write(uploaded_audio_file.getvalue())
	temp.seek(0)
	result = audiopipe(temp.name)
	st.write(result)

	st.title('Upload an image file to classification (food)')

	uploaded_image_file = st.file_uploader("Choose an image file")
	if uploaded_image_file is not None:
	with NamedTemporaryFile() as temp:
	temp.write(uploaded_image_file.getvalue())
	temp.seek(0)
	result = imagepipe(temp.name)
	st.write(result)

	st.title('Upload an image file to detection')

	uploaded_image_zero_file = st.file_uploader("Choose an image file (zero)")
	texts = st.text_input('tags')
	if uploaded_image_zero_file is not None:
	image = Image.open(uploaded_image_zero_file)
	outputImage = np.array(image)
	st.image(outputImage)

	if st.button('apply tag'):
	tags = [texts.split(", ")]
	inputs = processor(text=tags, images=image, return_tensors="pt")
	outputs = model(**inputs)
	target_sizes = torch.Tensor([image.size[::-1]])
	results = processor.post_process_object_detection(outputs=outputs, threshold=0.1, target_sizes=target_sizes)
	i = 0 # Retrieve predictions for the first image for the corresponding text queries
	text = texts[i]
	boxes, scores, labels = results[i]["boxes"], results[i]["scores"], results[i]["labels"]
	st.write(results)
	# Print detected objects and rescaled box coordinates
	for box, score, label in zip(boxes, scores, labels):
	box = [round(i, 2) for i in box.tolist()]
	print(f"Detected {text[label]} with confidence {round(score.item(), 3)} at location {box}")