Commit ·
0dbfcb8
1
Parent(s): cd14c77
Added easyocr for videoframes
Browse files- handler.py +15 -0
- requirements.txt +2 -1
handler.py
CHANGED
|
@@ -14,6 +14,7 @@ from decord import VideoReader
|
|
| 14 |
from decord import cpu
|
| 15 |
|
| 16 |
import timeit
|
|
|
|
| 17 |
|
| 18 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 19 |
|
|
@@ -32,6 +33,7 @@ class EndpointHandler:
|
|
| 32 |
|
| 33 |
logging.set_verbosity_debug()
|
| 34 |
self.logger = logging.get_logger(__name__)
|
|
|
|
| 35 |
# Check if CUDA (GPU support) is available
|
| 36 |
if torch.cuda.is_available():
|
| 37 |
self.logger.info("GPU is available for inference.")
|
|
@@ -184,6 +186,19 @@ class EndpointHandler:
|
|
| 184 |
self.logger.info(
|
| 185 |
f"Embedding calculation took {embedding_end_time - embedding_start_time} seconds"
|
| 186 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 187 |
video_metadata["url"] = video_url
|
| 188 |
self.logger.info("Returning embeddings and metadata.")
|
| 189 |
return frame_embeddings, video_metadata
|
|
|
|
| 14 |
from decord import cpu
|
| 15 |
|
| 16 |
import timeit
|
| 17 |
+
import easyocr
|
| 18 |
|
| 19 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 20 |
|
|
|
|
| 33 |
|
| 34 |
logging.set_verbosity_debug()
|
| 35 |
self.logger = logging.get_logger(__name__)
|
| 36 |
+
self.reader = easyocr.Reader(["de", "en"]) # Add more languages if needed
|
| 37 |
# Check if CUDA (GPU support) is available
|
| 38 |
if torch.cuda.is_available():
|
| 39 |
self.logger.info("GPU is available for inference.")
|
|
|
|
| 186 |
self.logger.info(
|
| 187 |
f"Embedding calculation took {embedding_end_time - embedding_start_time} seconds"
|
| 188 |
)
|
| 189 |
+
# Extract text from each frame using EasyOCR
|
| 190 |
+
self.logger.info("Extracting text from frames.")
|
| 191 |
+
text_extraction_start_time = timeit.default_timer()
|
| 192 |
+
frame_texts = [self.reader.readtext(frame, detail=0) for frame in frames]
|
| 193 |
+
texts_set = set()
|
| 194 |
+
for text_list in frame_texts:
|
| 195 |
+
[texts_set.add(text) for text in text_list]
|
| 196 |
+
video_metadata["extracted_text"] = texts_set
|
| 197 |
+
text_extraction_end_time = timeit.default_timer()
|
| 198 |
+
self.logger.info(
|
| 199 |
+
f"Text extraction took {text_extraction_end_time - text_extraction_start_time} seconds"
|
| 200 |
+
)
|
| 201 |
+
|
| 202 |
video_metadata["url"] = video_url
|
| 203 |
self.logger.info("Returning embeddings and metadata.")
|
| 204 |
return frame_embeddings, video_metadata
|
requirements.txt
CHANGED
|
@@ -23,4 +23,5 @@ tqdm==4.66.1
|
|
| 23 |
transformers==4.27.2
|
| 24 |
typing_extensions==4.8.0
|
| 25 |
urllib3==2.0.7
|
| 26 |
-
decord==0.6.0
|
|
|
|
|
|
| 23 |
transformers==4.27.2
|
| 24 |
typing_extensions==4.8.0
|
| 25 |
urllib3==2.0.7
|
| 26 |
+
decord==0.6.0
|
| 27 |
+
easyocr==1.7.1
|