Spaces:

adAstra144
/

OCR

Runtime error

OCR / app.py

Update app.py

57228d5 verified 5 months ago

916 Bytes

	import os
	from fastapi import FastAPI, UploadFile
	from doctr.models import ocr_predictor
	from doctr.io import DocumentFile

	# Set DocTR cache directory before importing models
	os.environ["DOCTR_CACHE_DIR"] = "/app/.cache"
	os.makedirs("/app/.cache", exist_ok=True)

	app = FastAPI()

	# Initialize DocTR OCR model once at startup
	model = ocr_predictor(pretrained=True)

	@app.post("/ocr")
	async def extract_text(file: UploadFile):
	# Save uploaded file temporarily
	image_path = "temp.png"
	with open(image_path, "wb") as f:
	f.write(await file.read())

	# Read document and run OCR
	doc = DocumentFile.from_images(image_path)
	result = model(doc)

	# Extract all words as a single string
	text = " ".join([
	word.value
	for page in result.pages
	for block in page.blocks
	for line in block.lines
	for word in line.words
	])

	return {"text": text}