Spaces:
Runtime error
Runtime error
| import torch | |
| import streamlit as st | |
| import numpy as np | |
| from PIL import Image | |
| from transformers import pipeline | |
| from transformers import OwlViTProcessor, OwlViTForObjectDetection | |
| from tempfile import NamedTemporaryFile | |
| audiopipe = pipeline("automatic-speech-recognition", model="openai/whisper-large-v3") | |
| #imagepipe = pipeline("image-classification", model="Kaludi/food-category-classification-v2.0") | |
| #imagepipe = pipeline("image-classification", model="nateraw/food") | |
| imagepipe = pipeline("image-classification", model="flatmoon102/fruits_and_vegetables_image_classification") | |
| processor = OwlViTProcessor.from_pretrained("google/owlvit-base-patch32") | |
| model = OwlViTForObjectDetection.from_pretrained("google/owlvit-base-patch32") | |
| st.title('Upload an audio file for speech recognition') | |
| uploaded_audio_file = st.file_uploader("Choose an audio file (wav)") | |
| if uploaded_audio_file is not None: | |
| with NamedTemporaryFile(suffix="wav") as temp: | |
| temp.write(uploaded_audio_file.getvalue()) | |
| temp.seek(0) | |
| result = audiopipe(temp.name) | |
| st.write(result) | |
| st.title('Upload an image file to classification (food)') | |
| uploaded_image_file = st.file_uploader("Choose an image file") | |
| if uploaded_image_file is not None: | |
| with NamedTemporaryFile() as temp: | |
| temp.write(uploaded_image_file.getvalue()) | |
| temp.seek(0) | |
| result = imagepipe(temp.name) | |
| st.write(result) | |
| st.title('Upload an image file to detection') | |
| uploaded_image_zero_file = st.file_uploader("Choose an image file (zero)") | |
| texts = st.text_input('tags') | |
| if uploaded_image_zero_file is not None: | |
| image = Image.open(uploaded_image_zero_file) | |
| outputImage = np.array(image) | |
| st.image(outputImage) | |
| if st.button('apply tag'): | |
| tags = [texts.split(", ")] | |
| inputs = processor(text=tags, images=image, return_tensors="pt") | |
| outputs = model(**inputs) | |
| target_sizes = torch.Tensor([image.size[::-1]]) | |
| results = processor.post_process_object_detection(outputs=outputs, threshold=0.1, target_sizes=target_sizes) | |
| i = 0 # Retrieve predictions for the first image for the corresponding text queries | |
| text = texts[i] | |
| boxes, scores, labels = results[i]["boxes"], results[i]["scores"], results[i]["labels"] | |
| st.write(results) | |
| # Print detected objects and rescaled box coordinates | |
| for box, score, label in zip(boxes, scores, labels): | |
| box = [round(i, 2) for i in box.tolist()] | |
| print(f"Detected {text[label]} with confidence {round(score.item(), 3)} at location {box}") | |