Spaces:

adAstra144
/

OCR

Runtime error

adAstra144 commited on Sep 23, 2025

Commit

57228d5

verified ·

1 Parent(s): febefdd

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,10 +1,15 @@
 from fastapi import FastAPI, UploadFile
 from doctr.models import ocr_predictor
 from doctr.io import DocumentFile
 app = FastAPI()
-# Initialize DocTR OCR model
 model = ocr_predictor(pretrained=True)
 @app.post("/ocr")
@@ -14,11 +19,11 @@ async def extract_text(file: UploadFile):
     with open(image_path, "wb") as f:
         f.write(await file.read())
-    # Read the document
     doc = DocumentFile.from_images(image_path)
     result = model(doc)
-    # Extract text from all pages, blocks, lines, words
     text = " ".join([
         word.value
         for page in result.pages

+import os
 from fastapi import FastAPI, UploadFile
 from doctr.models import ocr_predictor
 from doctr.io import DocumentFile
+# Set DocTR cache directory before importing models
+os.environ["DOCTR_CACHE_DIR"] = "/app/.cache"
+os.makedirs("/app/.cache", exist_ok=True)
 app = FastAPI()
+# Initialize DocTR OCR model once at startup
 model = ocr_predictor(pretrained=True)
 @app.post("/ocr")
     with open(image_path, "wb") as f:
         f.write(await file.read())
+    # Read document and run OCR
     doc = DocumentFile.from_images(image_path)
     result = model(doc)
+    # Extract all words as a single string
     text = " ".join([
         word.value
         for page in result.pages