OCR-API / app.py
Inayatgaming's picture
Create app.py
6be258b verified
from fastapi import FastAPI, UploadFile, File, Query
from fastapi.responses import JSONResponse
import easyocr
import numpy as np
import cv2
import time
import requests
from io import BytesIO
from PIL import Image
import os
app = FastAPI(title="EasyOCR API")
# Use /tmp for writable storage
tmp_dir = "/tmp/EasyOCR"
model_dir = os.path.join(tmp_dir, "models")
user_net_dir = os.path.join(tmp_dir, "user_network")
os.makedirs(model_dir, exist_ok=True)
os.makedirs(user_net_dir, exist_ok=True)
# Initialize reader with custom directories
reader = easyocr.Reader(
['en'],
gpu=False,
model_storage_directory=model_dir,
user_network_directory=user_net_dir
)
def read_image_from_url(url: str):
response = requests.get(url)
response.raise_for_status()
img = np.array(Image.open(BytesIO(response.content)).convert("RGB"))
return img
def preprocess_image(image: np.ndarray) -> np.ndarray:
# Convert to grayscale
gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
# Denoise slightly
gray = cv2.GaussianBlur(gray, (3, 3), 0)
# Adaptive threshold (makes text sharper)
thresh = cv2.adaptiveThreshold(
gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY, 31, 2
)
return thresh
@app.get("/")
async def ocr_url(url: str = Query(None)):
start_time = time.time()
if not url:
return JSONResponse(
status_code=400,
content={"status": False, "error": "Provide ?url= for OCR."}
)
try:
image = read_image_from_url(url)
# Preprocess for better OCR
processed = preprocess_image(image)
# OCR
result = reader.readtext(processed)
# Format result (only text)
output = [r[1] for r in result]
end_time = time.time()
time_taken = round(end_time - start_time, 3)
return {
"status": True,
"time_taken": time_taken,
"results": output
}
except Exception as e:
return JSONResponse(
status_code=500,
content={"status": False, "error": str(e)}
)
@app.post("/")
async def ocr_file(file: UploadFile = File(...)):
start_time = time.time()
try:
image = np.array(Image.open(file.file).convert("RGB"))
# Preprocess for better OCR
processed = preprocess_image(image)
# OCR
result = reader.readtext(processed)
# Format result (only text)
output = [r[1] for r in result]
end_time = time.time()
time_taken = round(end_time - start_time, 3)
return {
"status": True,
"time_taken": time_taken,
"results": output
}
except Exception as e:
return JSONResponse(
status_code=500,
content={"status": False, "error": str(e)}
)