Spaces:

sergioska
/

image-classifier

Runtime error

App Files Files Community

sergioska commited on Dec 2, 2023

Commit

ab53b8d

1 Parent(s): 0a42478

trying zero object detection

Browse files

Files changed (1) hide show

app.py +27 -1

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import streamlit as st
 # Load model directly
-from transformers import pipeline
 from tempfile import NamedTemporaryFile
 audiopipe = pipeline("automatic-speech-recognition", model="openai/whisper-large-v3")
@@ -8,6 +9,9 @@ audiopipe = pipeline("automatic-speech-recognition", model="openai/whisper-large
 #imagepipe = pipeline("image-classification", model="nateraw/food")
 imagepipe = pipeline("image-classification", model="flatmoon102/fruits_and_vegetables_image_classification")
 st.title('Upload an audio file for speech recognition')
 uploaded_audio_file = st.file_uploader("Choose an audio file (wav)")
@@ -27,3 +31,25 @@ if uploaded_image_file is not None:
         temp.seek(0)
         result = imagepipe(temp.name)
         st.write(result)

 import streamlit as st
+import torch
 # Load model directly
+from transformers import pipeline, OwlViTProcessor, OwlViTForObjectDetection
 from tempfile import NamedTemporaryFile
 audiopipe = pipeline("automatic-speech-recognition", model="openai/whisper-large-v3")
 #imagepipe = pipeline("image-classification", model="nateraw/food")
 imagepipe = pipeline("image-classification", model="flatmoon102/fruits_and_vegetables_image_classification")
+processor = OwlViTProcessor.from_pretrained("google/owlvit-base-patch32")
+model = OwlViTForObjectDetection.from_pretrained("google/owlvit-base-patch32")
 st.title('Upload an audio file for speech recognition')
 uploaded_audio_file = st.file_uploader("Choose an audio file (wav)")
         temp.seek(0)
         result = imagepipe(temp.name)
         st.write(result)
+st.title('Upload an image file to detection')
+uploaded_image_zero_file = st.file_uploader("Choose an image file (zero)")
+texts = st.text_input('apple', 'eggs')
+if uploaded_image_zero_file is not None:
+    with NamedTemporaryFile() as temp:
+        temp.write(uploaded_image_zero_file.getvalue())
+        temp.seek(0)
+        image = temp.name;
+        inputs = processor(text=texts, images=image, return_tensors="pt")
+        outputs = model(**inputs)
+        target_sizes = torch.Tensor([image.size[::-1]])
+        results = processor.post_process_object_detection(outputs=outputs, threshold=0.1, target_sizes=target_sizes)
+        i = 0  # Retrieve predictions for the first image for the corresponding text queries
+        text = texts[i]
+        boxes, scores, labels = results[i]["boxes"], results[i]["scores"], results[i]["labels"]
+        st.write(results)
+        # Print detected objects and rescaled box coordinates
+        for box, score, label in zip(boxes, scores, labels):
+            box = [round(i, 2) for i in box.tolist()]
+            print(f"Detected {text[label]} with confidence {round(score.item(), 3)} at location {box}")