Spaces:

videostudioart
/

img2text-ocr-api

Sleeping

App Files Files Community

img2text-ocr-api / app.py

videostudioart

Update app.py

112da67 verified 6 days ago

raw

history blame contribute delete

8.48 kB

	"""
	Kiri OCR - FastAPI OCR API
	"""

	import io
	import os
	import cv2
	import tempfile
	import uvicorn
	import numpy as np

	from PIL import Image

	from fastapi import (
	FastAPI,
	UploadFile,
	File,
	Form
	)

	from fastapi.responses import (
	JSONResponse,
	HTMLResponse
	)

	from fastapi.middleware.cors import CORSMiddleware

	# =========================================================
	# GLOBAL OCR INSTANCES
	# =========================================================

	ocr_instances = {}

	# =========================================================
	# LOAD OCR MODEL
	# =========================================================

	def load_ocr(decode_method="accurate"):

	from kiri_ocr import OCR

	print(
	f"Loading OCR model with "
	f"decode_method={decode_method}"
	)

	return OCR(
	model_path="mrrtmob/kiri-ocr",
	det_method="db",
	decode_method=decode_method,
	device="cpu",
	verbose=False
	)

	# =========================================================
	# GET OCR INSTANCE
	# =========================================================

	def get_ocr(decode_method="accurate"):

	global ocr_instances

	if decode_method not in ocr_instances:

	ocr_instances[decode_method] = (
	load_ocr(decode_method)
	)

	return ocr_instances[decode_method]

	# =========================================================
	# FASTAPI APP
	# =========================================================

	app = FastAPI(
	title="Kiri OCR API",
	description="Image OCR API using Kiri OCR",
	version="1.0"
	)

	# =========================================================
	# CORS
	# =========================================================

	app.add_middleware(
	CORSMiddleware,
	allow_origins=["*"],
	allow_credentials=True,
	allow_methods=["*"],
	allow_headers=["*"],
	)

	# =========================================================
	# HOME PAGE
	# =========================================================

	@app.get("/")
	async def home():

	return HTMLResponse("""

	<!DOCTYPE html>

	<html>

	<head>

	<title>Kiri OCR API</title>

	<style>

	body{
	font-family:Arial;
	background:#f5f5f5;
	padding:40px;
	}

	.box{
	max-width:700px;
	margin:auto;
	background:#fff;
	padding:30px;
	border-radius:12px;
	box-shadow:0 5px 20px rgba(0,0,0,.08);
	}

	h1{
	margin-top:0;
	}

	input,
	select,
	button{
	width:100%;
	padding:12px;
	margin-top:10px;
	border-radius:8px;
	border:1px solid #ddd;
	}

	button{
	background:#6366f1;
	color:#fff;
	border:none;
	cursor:pointer;
	font-size:15px;
	}

	button:hover{
	opacity:.9;
	}

	pre{
	background:#111;
	color:#0f0;
	padding:20px;
	border-radius:10px;
	overflow:auto;
	margin-top:20px;
	white-space:pre-wrap;
	}

	</style>

	</head>

	<body>

	<div class="box">

	<h1>Kiri OCR API</h1>

	<p>
	Upload image and extract text
	</p>

	<form id="ocrForm">

	<input
	type="file"
	id="file"
	accept="image/*"
	required
	>

	<select id="decode_method">

	<option value="fast">
	fast
	</option>

	<option
	value="accurate"
	selected
	>
	accurate
	</option>

	<option value="beam">
	beam
	</option>

	</select>

	<button type="submit">
	Extract Text
	</button>

	</form>

	<pre id="result"></pre>

	</div>

	<script>

	const form =
	document.getElementById(
	"ocrForm"
	);

	form.addEventListener(
	"submit",
	async (e)=>{

	e.preventDefault();

	const file =
	document.getElementById(
	"file"
	).files[0];

	const decode_method =
	document.getElementById(
	"decode_method"
	).value;

	const formData =
	new FormData();

	formData.append(
	"file",
	file
	);

	formData.append(
	"decode_method",
	decode_method
	);

	const result =
	document.getElementById(
	"result"
	);

	result.textContent =
	"Processing...";

	try{

	const response =
	await fetch(
	"/img2ocr",
	{
	method:"POST",
	body:formData
	}
	);

	const data =
	await response.json();

	result.textContent =
	JSON.stringify(
	data,
	null,
	2
	);

	}catch(err){

	result.textContent =
	"Error: " + err;

	}

	}
	);

	</script>

	</body>

	</html>

	""")

	# =========================================================
	# OCR API
	# =========================================================

	@app.post("/img2ocr")
	async def img2ocr(
	file: UploadFile = File(...),
	decode_method: str = Form("accurate")
	):

	try:

	contents = await file.read()

	image = Image.open(
	io.BytesIO(contents)
	).convert("RGB")

	ocr_engine = get_ocr(decode_method)

	image_np = np.array(image)

	image_bgr = cv2.cvtColor(
	image_np,
	cv2.COLOR_RGB2BGR
	)

	with tempfile.NamedTemporaryFile(
	suffix=".png",
	delete=False
	) as f:

	temp_path = f.name

	cv2.imwrite(
	temp_path,
	image_bgr
	)

	extracted_text = ""

	# OCR STREAMING
	for chunk in (
	ocr_engine.extract_text_stream_chars(
	temp_path,
	mode="lines"
	)
	):

	token = chunk.get(
	"token",
	""
	)

	if token:
	extracted_text += token

	# CLEANUP
	if os.path.exists(temp_path):
	os.unlink(temp_path)

	return JSONResponse({

	"success": True,

	"decode_method":
	decode_method,

	"text":
	extracted_text

	})

	except Exception as e:

	return JSONResponse({

	"success": False,

	"error": str(e)

	})

	# =========================================================
	# HEALTH CHECK
	# =========================================================

	@app.get("/health")
	async def health():

	return {
	"status": "running"
	}

	# =========================================================
	# MAIN
	# =========================================================

	if __name__ == "__main__":

	uvicorn.run(
	app,
	host="0.0.0.0",
	port=7860
	)