Vishal Singla commited on
Commit
252fb2b
Β·
1 Parent(s): e5e8791
Files changed (3) hide show
  1. app.py +312 -0
  2. requirements.txt +15 -0
  3. smartvision.ipynb +0 -0
app.py ADDED
@@ -0,0 +1,312 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # import streamlit as st
2
+ # from ultralytics import YOLO
3
+ # from PIL import Image
4
+ # import tempfile
5
+ # import cv2
6
+ # import av
7
+ # from streamlit_webrtc import webrtc_streamer, VideoProcessorBase
8
+ # import torch
9
+ # import torch.nn as nn
10
+ # from torchvision import models, transforms
11
+ # from PIL import Image
12
+ # import os
13
+ # from huggingface_hub import hf_hub_download
14
+
15
+ # cache_dir = "models_cache"
16
+ # os.makedirs(cache_dir, exist_ok=True)
17
+
18
+ # VGG16_best = hf_hub_download(
19
+ # repo_id="jgvghf/smartvision",
20
+ # filename="VGG16_best.pth",
21
+ # token=st.secrets["HuggingFace_token"],
22
+ # cache_dir=cache_dir
23
+ # )
24
+
25
+ # yolo_best = hf_hub_download(
26
+ # repo_id="jgvghf/smartvision",
27
+ # filename="best.pt",
28
+ # token=st.secrets["HuggingFace_token"],
29
+ # cache_dir=cache_dir
30
+ # )
31
+
32
+
33
+ # st.set_page_config(page_title="SmartVision AI", layout="centered")
34
+ # st.title("πŸ” SmartVision AI – Object Detection")
35
+
36
+ # # ---------------- LOAD MODEL ----------------
37
+ # model = YOLO(yolo_best)
38
+
39
+ # # ---------------- MODE SELECTION ----------------
40
+ # mode = st.radio("Select Mode", ["πŸ“ Image Upload", "πŸ“· Webcam","🧠 Image Classification"])
41
+
42
+ # # ==================================================
43
+ # # πŸ“ IMAGE UPLOAD MODE
44
+ # # ==================================================
45
+ # if mode == "πŸ“ Image Upload":
46
+
47
+ # uploaded_img = st.file_uploader(
48
+ # "Upload Image",
49
+ # type=["jpg", "jpeg", "png"]
50
+ # )
51
+
52
+ # if uploaded_img is not None:
53
+ # with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as tmp:
54
+ # tmp.write(uploaded_img.read())
55
+ # image_path = tmp.name
56
+
57
+ # results = model(image_path, conf=0.35)
58
+ # res = results[0]
59
+
60
+ # annotated_img = res.plot()
61
+ # annotated_img = cv2.cvtColor(annotated_img, cv2.COLOR_BGR2RGB)
62
+
63
+ # st.image(annotated_img, caption="Detected Objects", use_container_width=True)
64
+
65
+ # if res.boxes is not None:
66
+ # st.success(f"Detected Objects: {len(res.boxes)}")
67
+
68
+ # # ==================================================
69
+ # # πŸ“· WEBCAM MODE
70
+ # # ==================================================
71
+ # elif mode == "πŸ“· Webcam":
72
+
73
+ # class YOLOVideoProcessor(VideoProcessorBase):
74
+ # def recv(self, frame):
75
+ # img = frame.to_ndarray(format="bgr24")
76
+
77
+ # results = model(img, conf=0.35)
78
+ # res = results[0]
79
+
80
+ # annotated_frame = res.plot()
81
+ # return av.VideoFrame.from_ndarray(annotated_frame, format="bgr24")
82
+
83
+ # webrtc_streamer(
84
+ # key="yolo-webcam",
85
+ # video_processor_factory=YOLOVideoProcessor,
86
+ # media_stream_constraints={
87
+ # "video": True,
88
+ # "audio": False
89
+ # },
90
+ # async_processing=True
91
+ # )
92
+ # else:
93
+
94
+
95
+
96
+ # # =====================================================
97
+ # # PAGE CONFIG
98
+ # # =====================================================
99
+ # # st.set_page_config(page_title="SmartVision AI - Classification", layout="centered")
100
+ # st.title("🧠 SmartVision AI – Image Classification (VGG16)")
101
+
102
+ # # =====================================================
103
+ # # LOAD CLASS NAMES
104
+ # # =====================================================
105
+ # # CLASS_DIR = "smartvision_dataset/classification/train"
106
+ # class_names= ['airplane', 'bed', 'bench', 'bicycle', 'bird', 'bottle', 'bowl', 'bus', 'cake',
107
+ # 'car', 'cat', 'chair', 'couch', 'cow', 'cup', 'dog', 'elephant', 'horse', 'motorcycle',
108
+ # 'person', 'pizza', 'potted plant', 'stop sign', 'traffic light', 'truck']
109
+ # # class_names = sorted(os.listdir(CLASS_DIR))
110
+ # NUM_CLASSES = len(class_names)
111
+
112
+ # # =====================================================
113
+ # # LOAD VGG16 MODEL
114
+ # # =====================================================
115
+ # @st.cache_resource
116
+ # def load_vgg16():
117
+ # vggmodel = models.vgg16(pretrained=False)
118
+ # vggmodel.classifier[6] = nn.Linear(4096, NUM_CLASSES)
119
+
120
+ # vggmodel.load_state_dict(
121
+ # torch.load(VGG16_best, map_location="cpu")
122
+ # )
123
+
124
+ # vggmodel.eval()
125
+ # return vggmodel
126
+
127
+ # vggmodel = load_vgg16()
128
+
129
+ # # =====================================================
130
+ # # IMAGE TRANSFORMS
131
+ # # =====================================================
132
+ # transform = transforms.Compose([
133
+ # transforms.Resize((224, 224)),
134
+ # transforms.ToTensor(),
135
+ # transforms.Normalize(
136
+ # mean=[0.485, 0.456, 0.406],
137
+ # std=[0.229, 0.224, 0.225]
138
+ # )
139
+ # ])
140
+
141
+ # # =====================================================
142
+ # # IMAGE UPLOAD
143
+ # # =====================================================
144
+ # uploaded_img = st.file_uploader(
145
+ # "πŸ“ Upload an image for classification",
146
+ # type=["jpg", "jpeg", "png"]
147
+ # )
148
+
149
+ # if uploaded_img:
150
+
151
+ # image = Image.open(uploaded_img).convert("RGB")
152
+ # st.image(image, caption="Uploaded Image", use_container_width=True)
153
+
154
+ # input_tensor = transform(image).unsqueeze(0)
155
+
156
+ # with torch.no_grad():
157
+ # outputs = vggmodel(input_tensor)
158
+ # probs = torch.softmax(outputs, dim=1)
159
+ # confidence, predicted = torch.max(probs, 1)
160
+
161
+ # st.success(
162
+ # f"🧠 Prediction: **{class_names[predicted.item()]}**\n\n"
163
+ # f"🎯 Confidence: **{confidence.item()*100:.2f}%**"
164
+ # )
165
+
166
+
167
+ import streamlit as st
168
+ import os
169
+ import tempfile
170
+ import cv2
171
+ import av
172
+ import torch
173
+ import torch.nn as nn
174
+ from PIL import Image
175
+ from ultralytics import YOLO
176
+ from torchvision import models, transforms
177
+ from streamlit_webrtc import webrtc_streamer, VideoProcessorBase
178
+ from huggingface_hub import hf_hub_download
179
+
180
+ # =====================================================
181
+ # PAGE CONFIG
182
+ # =====================================================
183
+ st.set_page_config(page_title="SmartVision AI", layout="centered")
184
+ st.title("πŸš€ SmartVision AI")
185
+
186
+ # =====================================================
187
+ # DOWNLOAD MODELS (HF HUB)
188
+ # =====================================================
189
+ @st.cache_resource
190
+ def download_models():
191
+ cache_dir = "models_cache"
192
+ os.makedirs(cache_dir, exist_ok=True)
193
+
194
+ vgg_path = hf_hub_download(
195
+ repo_id="jgvghf/smartvision",
196
+ filename="VGG16_best.pth",
197
+ token=st.secrets["HuggingFace_token"],
198
+ cache_dir=cache_dir
199
+ )
200
+
201
+ yolo_path = hf_hub_download(
202
+ repo_id="jgvghf/smartvision",
203
+ filename="best.pt",
204
+ token=st.secrets["HuggingFace_token"],
205
+ cache_dir=cache_dir
206
+ )
207
+
208
+ return vgg_path, yolo_path
209
+
210
+ VGG16_PATH, YOLO_PATH = download_models()
211
+
212
+ # =====================================================
213
+ # LOAD MODELS
214
+ # =====================================================
215
+ @st.cache_resource
216
+ def load_yolo():
217
+ model = YOLO(YOLO_PATH)
218
+ return model
219
+
220
+ @st.cache_resource
221
+ def load_vgg16():
222
+ class_names = [
223
+ 'airplane','bed','bench','bicycle','bird','bottle','bowl','bus','cake',
224
+ 'car','cat','chair','couch','cow','cup','dog','elephant','horse',
225
+ 'motorcycle','person','pizza','potted plant','stop sign',
226
+ 'traffic light','truck'
227
+ ]
228
+
229
+ model = models.vgg16(pretrained=False)
230
+ model.classifier[6] = nn.Linear(4096, len(class_names))
231
+ model.load_state_dict(torch.load(VGG16_PATH, map_location="cpu"))
232
+ model.eval()
233
+
234
+ return model, class_names
235
+
236
+ yolo_model = load_yolo()
237
+ vgg_model, CLASS_NAMES = load_vgg16()
238
+
239
+ # =====================================================
240
+ # TABS (2 PAGE APP)
241
+ # =====================================================
242
+ tab1, tab2 = st.tabs(["πŸ” Object Detection", "🧠 Image Classification"])
243
+
244
+ # =====================================================
245
+ # πŸ” OBJECT DETECTION PAGE
246
+ # =====================================================
247
+ with tab1:
248
+ st.header("πŸ” Object Detection (YOLO)")
249
+
250
+ mode = st.radio("Select Mode", ["πŸ“ Image Upload", "πŸ“· Webcam"])
251
+
252
+ if mode == "πŸ“ Image Upload":
253
+ img_file = st.file_uploader("Upload Image", type=["jpg","jpeg","png"],key="detector_uploader")
254
+
255
+ if img_file:
256
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as tmp:
257
+ tmp.write(img_file.read())
258
+ img_path = tmp.name
259
+
260
+ results = yolo_model(img_path, conf=0.4)
261
+ annotated = results[0].plot()
262
+ annotated = cv2.cvtColor(annotated, cv2.COLOR_BGR2RGB)
263
+
264
+ st.image(annotated, caption="Detected Objects", use_container_width=True)
265
+ st.success(f"Objects Detected: {len(results[0].boxes)}")
266
+
267
+ else:
268
+ class YOLOProcessor(VideoProcessorBase):
269
+ def recv(self, frame):
270
+ img = frame.to_ndarray(format="bgr24")
271
+ results = yolo_model(img, conf=0.4)
272
+ return av.VideoFrame.from_ndarray(results[0].plot(), format="bgr24")
273
+
274
+ webrtc_streamer(
275
+ key="yolo-webcam",
276
+ video_processor_factory=YOLOProcessor,
277
+ media_stream_constraints={"video": True, "audio": False},
278
+ async_processing=True
279
+ )
280
+
281
+ # =====================================================
282
+ # 🧠 IMAGE CLASSIFICATION PAGE
283
+ # =====================================================
284
+ with tab2:
285
+ st.header("🧠 Image Classification (VGG16)")
286
+
287
+ transform = transforms.Compose([
288
+ transforms.Resize((224,224)),
289
+ transforms.ToTensor(),
290
+ transforms.Normalize(
291
+ mean=[0.485,0.456,0.406],
292
+ std=[0.229,0.224,0.225]
293
+ )
294
+ ])
295
+
296
+ img_file = st.file_uploader("Upload Image", type=["jpg","jpeg","png"],key="classifier_uploader")
297
+
298
+ if img_file:
299
+ image = Image.open(img_file).convert("RGB")
300
+ st.image(image, use_container_width=True)
301
+
302
+ tensor = transform(image).unsqueeze(0)
303
+
304
+ with torch.no_grad():
305
+ output = vgg_model(tensor)
306
+ probs = torch.softmax(output, dim=1)
307
+ conf, pred = torch.max(probs, 1)
308
+
309
+ st.success(
310
+ f"### 🧠 Prediction: **{CLASS_NAMES[pred.item()]}**\n"
311
+ f"### 🎯 Confidence: **{conf.item()*100:.2f}%**"
312
+ )
requirements.txt ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ torch
3
+ torchvision
4
+ ultralytics
5
+ opencv-python
6
+ pillow
7
+ numpy
8
+ matplotlib
9
+ scikit-learn
10
+ datasets
11
+ tqdm
12
+ seaborn
13
+ av
14
+ streamlit_webrtc
15
+ huggingface_hub
smartvision.ipynb ADDED
The diff for this file is too large to render. See raw diff