Update app.py
Browse files
app.py
CHANGED
|
@@ -1,6 +1,5 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
import torch
|
| 3 |
-
import cv2
|
| 4 |
import pytesseract
|
| 5 |
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
| 6 |
|
|
@@ -13,12 +12,10 @@ classification_model_name = "distilbert-base-uncased"
|
|
| 13 |
classification_tokenizer = AutoTokenizer.from_pretrained(classification_model_name)
|
| 14 |
classification_model = AutoModelForSequenceClassification.from_pretrained(classification_model_name)
|
| 15 |
|
| 16 |
-
# Define function for OCR
|
| 17 |
def perform_ocr(image):
|
| 18 |
-
#
|
| 19 |
-
|
| 20 |
-
# Perform OCR
|
| 21 |
-
text = pytesseract.image_to_string(gray_image)
|
| 22 |
return text
|
| 23 |
|
| 24 |
# Define function to process video and predict
|
|
@@ -36,15 +33,12 @@ def predict_runner_status(video_file):
|
|
| 36 |
results_detection = model(frame)
|
| 37 |
|
| 38 |
# Logic for determining runner status using detected objects
|
| 39 |
-
# Example: if person detected, extract text and classify
|
| 40 |
objects = results_detection.pred[0][:, -1].numpy()
|
| 41 |
if 0 in objects: # 0 corresponds to person class
|
| 42 |
-
#
|
| 43 |
person_bbox = results_detection.pred[0][np.where(objects == 0)][0][:4]
|
| 44 |
person_bbox = person_bbox.astype(int)
|
| 45 |
person_img = frame[person_bbox[1]:person_bbox[3], person_bbox[0]:person_bbox[2]]
|
| 46 |
-
|
| 47 |
-
# Perform OCR on the cropped image
|
| 48 |
text = perform_ocr(person_img)
|
| 49 |
|
| 50 |
# Classification using text classification model
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import torch
|
|
|
|
| 3 |
import pytesseract
|
| 4 |
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
| 5 |
|
|
|
|
| 12 |
classification_tokenizer = AutoTokenizer.from_pretrained(classification_model_name)
|
| 13 |
classification_model = AutoModelForSequenceClassification.from_pretrained(classification_model_name)
|
| 14 |
|
| 15 |
+
# Define function for text recognition (OCR)
|
| 16 |
def perform_ocr(image):
|
| 17 |
+
# Perform OCR using pytesseract
|
| 18 |
+
text = pytesseract.image_to_string(image)
|
|
|
|
|
|
|
| 19 |
return text
|
| 20 |
|
| 21 |
# Define function to process video and predict
|
|
|
|
| 33 |
results_detection = model(frame)
|
| 34 |
|
| 35 |
# Logic for determining runner status using detected objects
|
|
|
|
| 36 |
objects = results_detection.pred[0][:, -1].numpy()
|
| 37 |
if 0 in objects: # 0 corresponds to person class
|
| 38 |
+
# Perform OCR on the detected person
|
| 39 |
person_bbox = results_detection.pred[0][np.where(objects == 0)][0][:4]
|
| 40 |
person_bbox = person_bbox.astype(int)
|
| 41 |
person_img = frame[person_bbox[1]:person_bbox[3], person_bbox[0]:person_bbox[2]]
|
|
|
|
|
|
|
| 42 |
text = perform_ocr(person_img)
|
| 43 |
|
| 44 |
# Classification using text classification model
|