Commit
·
374a5b3
1
Parent(s):
f33eeea
Changed back to set, with jsondumps for serializing
Browse files- handler.py +11 -3
handler.py
CHANGED
|
@@ -15,6 +15,7 @@ from decord import cpu
|
|
| 15 |
|
| 16 |
import timeit
|
| 17 |
import easyocr
|
|
|
|
| 18 |
|
| 19 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 20 |
|
|
@@ -161,6 +162,11 @@ class EndpointHandler:
|
|
| 161 |
# self.logger.info("Returning batch_emb list")
|
| 162 |
return batch_emb
|
| 163 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 164 |
def process_video(self, video_url, video_metadata):
|
| 165 |
try:
|
| 166 |
self.logger.info("Downloading video as bytes.")
|
|
@@ -190,10 +196,12 @@ class EndpointHandler:
|
|
| 190 |
self.logger.info("Extracting text from frames.")
|
| 191 |
text_extraction_start_time = timeit.default_timer()
|
| 192 |
frame_texts = [self.reader.readtext(frame, detail=0) for frame in frames]
|
| 193 |
-
|
| 194 |
for text_list in frame_texts:
|
| 195 |
-
[
|
| 196 |
-
video_metadata["extracted_text"] =
|
|
|
|
|
|
|
| 197 |
text_extraction_end_time = timeit.default_timer()
|
| 198 |
self.logger.info(
|
| 199 |
f"Text extraction took {text_extraction_end_time - text_extraction_start_time} seconds"
|
|
|
|
| 15 |
|
| 16 |
import timeit
|
| 17 |
import easyocr
|
| 18 |
+
import json
|
| 19 |
|
| 20 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 21 |
|
|
|
|
| 162 |
# self.logger.info("Returning batch_emb list")
|
| 163 |
return batch_emb
|
| 164 |
|
| 165 |
+
def set_default(self, obj):
|
| 166 |
+
if isinstance(obj, set):
|
| 167 |
+
return list(obj)
|
| 168 |
+
raise TypeError
|
| 169 |
+
|
| 170 |
def process_video(self, video_url, video_metadata):
|
| 171 |
try:
|
| 172 |
self.logger.info("Downloading video as bytes.")
|
|
|
|
| 196 |
self.logger.info("Extracting text from frames.")
|
| 197 |
text_extraction_start_time = timeit.default_timer()
|
| 198 |
frame_texts = [self.reader.readtext(frame, detail=0) for frame in frames]
|
| 199 |
+
texts_set = set()
|
| 200 |
for text_list in frame_texts:
|
| 201 |
+
[texts_set.add(text) for text in text_list]
|
| 202 |
+
video_metadata["extracted_text"] = json.dumps(
|
| 203 |
+
texts_set, default=self.set_default
|
| 204 |
+
)
|
| 205 |
text_extraction_end_time = timeit.default_timer()
|
| 206 |
self.logger.info(
|
| 207 |
f"Text extraction took {text_extraction_end_time - text_extraction_start_time} seconds"
|