Spaces:
Paused
Paused
Update inference.py
Browse files- inference.py +9 -2
inference.py
CHANGED
|
@@ -208,7 +208,7 @@ import json
|
|
| 208 |
from model_loader import get_model
|
| 209 |
from processor_utils import load_input
|
| 210 |
from prompt import get_part_classifier_prompt, get_part_prompt
|
| 211 |
-
|
| 212 |
|
| 213 |
def _get_max_tokens(part_name):
|
| 214 |
limits = {
|
|
@@ -383,7 +383,11 @@ def process_document(file_path):
|
|
| 383 |
page_results = []
|
| 384 |
|
| 385 |
for idx, image in enumerate(pages, start=1):
|
|
|
|
|
|
|
| 386 |
part_name = classify_page(image, model, processor, device)
|
|
|
|
|
|
|
| 387 |
|
| 388 |
if part_name == "UNKNOWN":
|
| 389 |
page_results.append({
|
|
@@ -394,8 +398,11 @@ def process_document(file_path):
|
|
| 394 |
"parsed": None
|
| 395 |
})
|
| 396 |
continue
|
| 397 |
-
|
|
|
|
| 398 |
result = extract_part_json(image, part_name, model, processor, device)
|
|
|
|
|
|
|
| 399 |
result["page_number"] = idx
|
| 400 |
page_results.append(result)
|
| 401 |
|
|
|
|
| 208 |
from model_loader import get_model
|
| 209 |
from processor_utils import load_input
|
| 210 |
from prompt import get_part_classifier_prompt, get_part_prompt
|
| 211 |
+
import time
|
| 212 |
|
| 213 |
def _get_max_tokens(part_name):
|
| 214 |
limits = {
|
|
|
|
| 383 |
page_results = []
|
| 384 |
|
| 385 |
for idx, image in enumerate(pages, start=1):
|
| 386 |
+
print("first model has been called for",idx,"image")
|
| 387 |
+
start = time.time()
|
| 388 |
part_name = classify_page(image, model, processor, device)
|
| 389 |
+
end = time.time()
|
| 390 |
+
print("total time taken by the first model",end-start,"sec")
|
| 391 |
|
| 392 |
if part_name == "UNKNOWN":
|
| 393 |
page_results.append({
|
|
|
|
| 398 |
"parsed": None
|
| 399 |
})
|
| 400 |
continue
|
| 401 |
+
print("second model has been called for",idx,"image")
|
| 402 |
+
start = time.time()
|
| 403 |
result = extract_part_json(image, part_name, model, processor, device)
|
| 404 |
+
end = time.time()
|
| 405 |
+
print("total time taken by the second model",end-start,"sec")
|
| 406 |
result["page_number"] = idx
|
| 407 |
page_results.append(result)
|
| 408 |
|