sergioska commited on
Commit
ab53b8d
·
1 Parent(s): 0a42478

trying zero object detection

Browse files
Files changed (1) hide show
  1. app.py +27 -1
app.py CHANGED
@@ -1,6 +1,7 @@
1
  import streamlit as st
 
2
  # Load model directly
3
- from transformers import pipeline
4
  from tempfile import NamedTemporaryFile
5
 
6
  audiopipe = pipeline("automatic-speech-recognition", model="openai/whisper-large-v3")
@@ -8,6 +9,9 @@ audiopipe = pipeline("automatic-speech-recognition", model="openai/whisper-large
8
  #imagepipe = pipeline("image-classification", model="nateraw/food")
9
  imagepipe = pipeline("image-classification", model="flatmoon102/fruits_and_vegetables_image_classification")
10
 
 
 
 
11
  st.title('Upload an audio file for speech recognition')
12
 
13
  uploaded_audio_file = st.file_uploader("Choose an audio file (wav)")
@@ -27,3 +31,25 @@ if uploaded_image_file is not None:
27
  temp.seek(0)
28
  result = imagepipe(temp.name)
29
  st.write(result)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
+ import torch
3
  # Load model directly
4
+ from transformers import pipeline, OwlViTProcessor, OwlViTForObjectDetection
5
  from tempfile import NamedTemporaryFile
6
 
7
  audiopipe = pipeline("automatic-speech-recognition", model="openai/whisper-large-v3")
 
9
  #imagepipe = pipeline("image-classification", model="nateraw/food")
10
  imagepipe = pipeline("image-classification", model="flatmoon102/fruits_and_vegetables_image_classification")
11
 
12
+ processor = OwlViTProcessor.from_pretrained("google/owlvit-base-patch32")
13
+ model = OwlViTForObjectDetection.from_pretrained("google/owlvit-base-patch32")
14
+
15
  st.title('Upload an audio file for speech recognition')
16
 
17
  uploaded_audio_file = st.file_uploader("Choose an audio file (wav)")
 
31
  temp.seek(0)
32
  result = imagepipe(temp.name)
33
  st.write(result)
34
+
35
+ st.title('Upload an image file to detection')
36
+
37
+ uploaded_image_zero_file = st.file_uploader("Choose an image file (zero)")
38
+ texts = st.text_input('apple', 'eggs')
39
+ if uploaded_image_zero_file is not None:
40
+ with NamedTemporaryFile() as temp:
41
+ temp.write(uploaded_image_zero_file.getvalue())
42
+ temp.seek(0)
43
+ image = temp.name;
44
+ inputs = processor(text=texts, images=image, return_tensors="pt")
45
+ outputs = model(**inputs)
46
+ target_sizes = torch.Tensor([image.size[::-1]])
47
+ results = processor.post_process_object_detection(outputs=outputs, threshold=0.1, target_sizes=target_sizes)
48
+ i = 0 # Retrieve predictions for the first image for the corresponding text queries
49
+ text = texts[i]
50
+ boxes, scores, labels = results[i]["boxes"], results[i]["scores"], results[i]["labels"]
51
+ st.write(results)
52
+ # Print detected objects and rescaled box coordinates
53
+ for box, score, label in zip(boxes, scores, labels):
54
+ box = [round(i, 2) for i in box.tolist()]
55
+ print(f"Detected {text[label]} with confidence {round(score.item(), 3)} at location {box}")