videostudioart's picture
Update app.py
112da67 verified
"""
Kiri OCR - FastAPI OCR API
"""
import io
import os
import cv2
import tempfile
import uvicorn
import numpy as np
from PIL import Image
from fastapi import (
FastAPI,
UploadFile,
File,
Form
)
from fastapi.responses import (
JSONResponse,
HTMLResponse
)
from fastapi.middleware.cors import CORSMiddleware
# =========================================================
# GLOBAL OCR INSTANCES
# =========================================================
ocr_instances = {}
# =========================================================
# LOAD OCR MODEL
# =========================================================
def load_ocr(decode_method="accurate"):
from kiri_ocr import OCR
print(
f"Loading OCR model with "
f"decode_method={decode_method}"
)
return OCR(
model_path="mrrtmob/kiri-ocr",
det_method="db",
decode_method=decode_method,
device="cpu",
verbose=False
)
# =========================================================
# GET OCR INSTANCE
# =========================================================
def get_ocr(decode_method="accurate"):
global ocr_instances
if decode_method not in ocr_instances:
ocr_instances[decode_method] = (
load_ocr(decode_method)
)
return ocr_instances[decode_method]
# =========================================================
# FASTAPI APP
# =========================================================
app = FastAPI(
title="Kiri OCR API",
description="Image OCR API using Kiri OCR",
version="1.0"
)
# =========================================================
# CORS
# =========================================================
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# =========================================================
# HOME PAGE
# =========================================================
@app.get("/")
async def home():
return HTMLResponse("""
<!DOCTYPE html>
<html>
<head>
<title>Kiri OCR API</title>
<style>
body{
font-family:Arial;
background:#f5f5f5;
padding:40px;
}
.box{
max-width:700px;
margin:auto;
background:#fff;
padding:30px;
border-radius:12px;
box-shadow:0 5px 20px rgba(0,0,0,.08);
}
h1{
margin-top:0;
}
input,
select,
button{
width:100%;
padding:12px;
margin-top:10px;
border-radius:8px;
border:1px solid #ddd;
}
button{
background:#6366f1;
color:#fff;
border:none;
cursor:pointer;
font-size:15px;
}
button:hover{
opacity:.9;
}
pre{
background:#111;
color:#0f0;
padding:20px;
border-radius:10px;
overflow:auto;
margin-top:20px;
white-space:pre-wrap;
}
</style>
</head>
<body>
<div class="box">
<h1>Kiri OCR API</h1>
<p>
Upload image and extract text
</p>
<form id="ocrForm">
<input
type="file"
id="file"
accept="image/*"
required
>
<select id="decode_method">
<option value="fast">
fast
</option>
<option
value="accurate"
selected
>
accurate
</option>
<option value="beam">
beam
</option>
</select>
<button type="submit">
Extract Text
</button>
</form>
<pre id="result"></pre>
</div>
<script>
const form =
document.getElementById(
"ocrForm"
);
form.addEventListener(
"submit",
async (e)=>{
e.preventDefault();
const file =
document.getElementById(
"file"
).files[0];
const decode_method =
document.getElementById(
"decode_method"
).value;
const formData =
new FormData();
formData.append(
"file",
file
);
formData.append(
"decode_method",
decode_method
);
const result =
document.getElementById(
"result"
);
result.textContent =
"Processing...";
try{
const response =
await fetch(
"/img2ocr",
{
method:"POST",
body:formData
}
);
const data =
await response.json();
result.textContent =
JSON.stringify(
data,
null,
2
);
}catch(err){
result.textContent =
"Error: " + err;
}
}
);
</script>
</body>
</html>
""")
# =========================================================
# OCR API
# =========================================================
@app.post("/img2ocr")
async def img2ocr(
file: UploadFile = File(...),
decode_method: str = Form("accurate")
):
try:
contents = await file.read()
image = Image.open(
io.BytesIO(contents)
).convert("RGB")
ocr_engine = get_ocr(decode_method)
image_np = np.array(image)
image_bgr = cv2.cvtColor(
image_np,
cv2.COLOR_RGB2BGR
)
with tempfile.NamedTemporaryFile(
suffix=".png",
delete=False
) as f:
temp_path = f.name
cv2.imwrite(
temp_path,
image_bgr
)
extracted_text = ""
# OCR STREAMING
for chunk in (
ocr_engine.extract_text_stream_chars(
temp_path,
mode="lines"
)
):
token = chunk.get(
"token",
""
)
if token:
extracted_text += token
# CLEANUP
if os.path.exists(temp_path):
os.unlink(temp_path)
return JSONResponse({
"success": True,
"decode_method":
decode_method,
"text":
extracted_text
})
except Exception as e:
return JSONResponse({
"success": False,
"error": str(e)
})
# =========================================================
# HEALTH CHECK
# =========================================================
@app.get("/health")
async def health():
return {
"status": "running"
}
# =========================================================
# MAIN
# =========================================================
if __name__ == "__main__":
uvicorn.run(
app,
host="0.0.0.0",
port=7860
)