rapid-ocr / app.py
Naman712's picture
Update app.py
fd44c97 verified
from fastapi import FastAPI, UploadFile, File, Form
from fastapi.responses import StreamingResponse, HTMLResponse
from rapidocr_onnxruntime import RapidOCR
from pdf2image import convert_from_bytes
from PIL import Image
import numpy as np
import io
import time
import base64
import json
import asyncio
app = FastAPI()
# --- BACKEND LOGIC ---
engine = RapidOCR()
def pil_to_base64(img: Image.Image) -> str:
"""Convert PIL image to efficient base64 string."""
buffered = io.BytesIO()
img.save(buffered, format="JPEG", quality=70)
return base64.b64encode(buffered.getvalue()).decode('utf-8')
@app.get("/", response_class=HTMLResponse)
def home():
return html_content
@app.post("/predict")
async def predict(file: UploadFile = File(...), use_angle_cls: bool = Form(False)):
file_bytes = await file.read()
async def process_stream():
images = []
try:
if file.content_type == "application/pdf":
yield json.dumps({"type": "status", "message": "Converting PDF..."}) + "\n"
images = convert_from_bytes(file_bytes, last_page=5)
else:
yield json.dumps({"type": "status", "message": "Reading Image..."}) + "\n"
image = Image.open(io.BytesIO(file_bytes)).convert("RGB")
images = [image]
except Exception as e:
yield json.dumps({"type": "error", "message": str(e)}) + "\n"
return
yield json.dumps({"type": "meta", "total_pages": len(images)}) + "\n"
for i, img in enumerate(images):
start_page = time.time()
img_np = np.array(img)
try:
result, _ = engine(img_np, use_angle_cls=use_angle_cls, use_det=True, use_rec=True)
except TypeError:
result, _ = engine(img_np)
blocks = []
full_text_lines = []
if result:
for idx, line in enumerate(result):
box = line[0]
text = line[1]
conf = float(line[2])
full_text_lines.append(text)
blocks.append({
"id": idx,
"box": box,
"text": text,
"conf": round(conf, 4)
})
page_data = {
"type": "page",
"page_number": i + 1,
"img_base64": pil_to_base64(img),
"full_text": "\n".join(full_text_lines),
"blocks": blocks,
"time": f"{time.time() - start_page:.2f}s"
}
yield json.dumps(page_data) + "\n"
await asyncio.sleep(0.01)
yield json.dumps({"type": "complete"}) + "\n"
return StreamingResponse(process_stream(), media_type="application/x-ndjson")
# --- FRONTEND ---
html_content = """
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>OCR Studio</title>
<link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600&family=JetBrains+Mono:wght@400;500&display=swap" rel="stylesheet">
<script src="https://unpkg.com/@phosphor-icons/web"></script>
<style>
:root {
/* Minimalist Palette - No Gradients */
--bg-main: #09090b; /* Deepest black/gray */
--bg-panel: #18181b; /* Slightly lighter panel */
--border: #27272a; /* Subtle borders */
--accent: #ffffff; /* Stark white for key actions */
--accent-dim: #a1a1aa; /* Secondary text */
--highlight: #6366f1; /* Primary Action Color (Indigo) */
--success: #10b981;
--font-ui: 'Inter', sans-serif;
--font-mono: 'JetBrains Mono', monospace;
}
* { box-sizing: border-box; outline: none; }
body {
font-family: var(--font-ui);
background-color: var(--bg-main);
color: #e4e4e7;
margin: 0;
height: 100vh;
display: flex;
flex-direction: column;
overflow: hidden;
}
/* --- ANIMATIONS --- */
@keyframes fadeIn { from { opacity: 0; transform: translateY(5px); } to { opacity: 1; transform: translateY(0); } }
@keyframes slideInRight { from { opacity: 0; transform: translateX(20px); } to { opacity: 1; transform: translateX(0); } }
@keyframes scan {
0% { top: 0%; opacity: 0; }
10% { opacity: 1; box-shadow: 0 0 10px var(--highlight); }
90% { opacity: 1; box-shadow: 0 0 10px var(--highlight); }
100% { top: 100%; opacity: 0; }
}
@keyframes pulse { 0% { opacity: 0.5; } 50% { opacity: 1; } 100% { opacity: 0.5; } }
/* --- HEADER --- */
header {
height: 50px;
background: var(--bg-main);
border-bottom: 1px solid var(--border);
display: flex;
justify-content: space-between;
align-items: center;
padding: 0 20px;
z-index: 50;
}
.brand {
font-weight: 600;
font-size: 0.95rem;
letter-spacing: -0.02em;
display: flex; align-items: center; gap: 8px;
color: var(--accent);
}
.status-dot {
width: 6px; height: 6px;
background: var(--success);
border-radius: 50%;
box-shadow: 0 0 8px var(--success);
animation: pulse 2s infinite;
}
main { flex: 1; display: flex; overflow: hidden; }
/* --- SIDEBAR --- */
.sidebar {
width: 260px;
background: var(--bg-main);
border-right: 1px solid var(--border);
display: flex; flex-direction: column;
padding: 15px;
gap: 20px;
z-index: 20;
}
.upload-zone {
border: 1px dashed var(--border);
background: var(--bg-panel);
border-radius: 6px;
padding: 25px 15px;
text-align: center;
cursor: pointer;
transition: all 0.2s ease;
}
.upload-zone:hover { border-color: var(--highlight); background: #1e1e24; }
.upload-zone i { font-size: 24px; color: var(--accent-dim); margin-bottom: 8px; display: block; }
.upload-zone span { font-size: 0.8rem; color: var(--accent-dim); font-weight: 500; }
.control-section { display: flex; flex-direction: column; gap: 10px; }
.control-label {
font-size: 0.7rem;
text-transform: uppercase;
letter-spacing: 0.05em;
color: #52525b;
font-weight: 600;
margin-bottom: 5px;
}
/* Minimal Inputs */
.toggle-row { display: flex; align-items: center; justify-content: space-between; font-size: 0.85rem; }
input[type="checkbox"] { accent-color: var(--highlight); }
input[type="range"] { width: 100%; height: 4px; background: var(--border); border-radius: 2px; -webkit-appearance: none; }
input[type="range"]::-webkit-slider-thumb { -webkit-appearance: none; width: 12px; height: 12px; background: var(--accent); border-radius: 50%; cursor: pointer; }
/* Buttons Grid */
.action-grid { display: grid; grid-template-columns: 1fr 1fr; gap: 8px; }
.btn {
background: var(--bg-panel);
border: 1px solid var(--border);
color: var(--accent-dim);
padding: 8px;
border-radius: 4px;
font-size: 0.8rem;
cursor: pointer;
transition: 0.2s;
display: flex; align-items: center; justify-content: center; gap: 6px;
}
.btn:hover { border-color: var(--highlight); color: var(--highlight); }
.btn:active { transform: translateY(1px); }
.btn.primary { background: var(--highlight); color: white; border: none; }
.btn.primary:hover { background: #4f46e5; }
/* --- WORKSPACE --- */
.workspace {
flex: 1;
display: flex;
background: #000; /* Pitch black for contrast */
position: relative;
}
/* LEFT: Canvas */
.preview-area {
flex: 1;
position: relative;
overflow: auto;
display: grid;
place-items: center;
padding: 40px;
}
.canvas-wrapper {
position: relative;
box-shadow: 0 0 0 1px #333; /* Very subtle outline */
transition: width 0.2s cubic-bezier(0.25, 0.46, 0.45, 0.94), transform 0.3s ease;
width: 80%;
transform-origin: center center;
}
canvas { position: absolute; top: 0; left: 0; width: 100%; height: 100%; pointer-events: none; }
img { display: block; width: 100%; height: auto; }
.scan-line {
position: absolute; width: 100%; height: 1px;
background: var(--highlight);
box-shadow: 0 0 10px var(--highlight);
z-index: 5; display: none; pointer-events: none;
}
.scan-line.active { display: block; animation: scan 1.5s linear infinite; }
/* Floating Controls */
.floating-bar {
position: absolute; bottom: 25px; left: 50%; transform: translateX(-50%);
background: rgba(24, 24, 27, 0.9);
border: 1px solid var(--border);
padding: 6px 12px;
border-radius: 8px;
display: flex; gap: 12px; align-items: center;
backdrop-filter: blur(8px);
z-index: 30;
}
.icon-btn { background: none; border: none; color: var(--accent-dim); cursor: pointer; padding: 4px; border-radius: 4px; display: flex; }
.icon-btn:hover { color: var(--accent); background: rgba(255,255,255,0.1); }
/* RIGHT: Results */
.results-panel {
width: 380px;
background: var(--bg-panel);
border-left: 1px solid var(--border);
display: flex; flex-direction: column;
z-index: 20;
}
.panel-header {
padding: 10px 15px;
border-bottom: 1px solid var(--border);
display: flex; justify-content: space-between; align-items: center;
font-size: 0.8rem; font-weight: 600; color: var(--accent-dim);
}
.tab-group { display: flex; gap: 10px; }
.tab-btn { background: none; border: none; cursor: pointer; color: #52525b; font-size: 0.8rem; font-weight: 600; transition: 0.2s; }
.tab-btn:hover { color: var(--accent-dim); }
.tab-btn.active { color: var(--highlight); }
.results-content { flex: 1; overflow-y: auto; padding: 10px; }
.result-item {
display: flex; justify-content: space-between;
padding: 10px;
border-bottom: 1px solid var(--border);
font-size: 0.85rem;
color: var(--accent-dim);
cursor: pointer;
transition: 0.15s;
animation: slideInRight 0.3s backwards;
}
.result-item:hover { background: #27272a; color: var(--accent); padding-left: 14px; }
.result-item span { font-family: var(--font-mono); }
.conf-tag { font-size: 0.7rem; color: #52525b; }
/* LOADER */
.loader-overlay {
position: fixed; top: 0; left: 0; width: 100%; height: 100%;
background: rgba(9, 9, 11, 0.9); z-index: 100;
display: flex; justify-content: center; align-items: center; flex-direction: column;
opacity: 0; pointer-events: none; transition: 0.3s;
}
.loader-overlay.active { opacity: 1; pointer-events: all; }
.loader-bar {
width: 200px; height: 2px; background: #333;
overflow: hidden; border-radius: 2px; margin-top: 10px;
}
.loader-progress {
width: 100%; height: 100%; background: var(--highlight);
transform: translateX(-100%);
animation: loading 1.5s infinite ease-in-out;
}
@keyframes loading { 0% { transform: translateX(-100%); } 50% { transform: translateX(0); } 100% { transform: translateX(100%); } }
.raw-text { width: 100%; height: 100%; background: transparent; border: none; color: var(--accent-dim); resize: none; font-family: var(--font-mono); font-size: 0.8rem; line-height: 1.6; padding: 5px; }
</style>
</head>
<body>
<header>
<div class="brand">
<i class="ph-bold ph-read-cv-logo"></i> OCR STUDIO <span style="font-weight:400; opacity:0.5; font-size:0.8em; margin-left:5px;">PRO</span>
</div>
<div style="display:flex; align-items:center; gap:8px; font-size:0.75rem; color:#71717a;">
<div class="status-dot"></div> READY
</div>
</header>
<main>
<!-- SIDEBAR -->
<aside class="sidebar">
<div class="upload-zone" id="drop-zone">
<i class="ph ph-upload-simple"></i>
<span>Open File</span>
<input type="file" id="file-input" hidden accept="image/*,application/pdf">
</div>
<div class="control-section">
<div class="control-label">Settings</div>
<div class="toggle-row">
<span>Auto-Rotate</span>
<input type="checkbox" id="angle-cls">
</div>
<div class="toggle-row" style="margin-top:10px;">
<span>Contrast Mode</span>
<input type="checkbox" id="contrast-mode" onchange="toggleContrast()">
</div>
</div>
<div class="control-section">
<div class="control-label">Confidence Threshold <span id="conf-val" style="float:right">0%</span></div>
<input type="range" id="conf-slider" min="0" max="100" value="0">
</div>
<div class="control-section" style="margin-top:auto">
<div class="control-label">Tools</div>
<div class="action-grid">
<button class="btn" onclick="readAloud()"><i class="ph-bold ph-speaker-high"></i> Read</button>
<button class="btn" onclick="copyText()"><i class="ph-bold ph-copy"></i> Copy</button>
<button class="btn" onclick="downloadJSON()"><i class="ph-bold ph-file-json"></i> JSON</button>
<button class="btn" onclick="resetUI()"><i class="ph-bold ph-trash"></i> Reset</button>
</div>
</div>
</aside>
<!-- WORKSPACE -->
<div class="workspace">
<div class="preview-area">
<div class="canvas-wrapper" id="canvas-wrapper">
<img id="source-image" src="">
<canvas id="overlay"></canvas>
<div class="scan-line" id="scan-line"></div>
</div>
<!-- Floating Toolbar -->
<div class="floating-bar" id="pagination-bar" style="display:none">
<button class="icon-btn" onclick="changePage(-1)"><i class="ph-bold ph-caret-left"></i></button>
<span id="page-indicator" style="font-size:0.8rem; font-variant-numeric: tabular-nums; color:white;">1 / 1</span>
<button class="icon-btn" onclick="changePage(1)"><i class="ph-bold ph-caret-right"></i></button>
<div style="width:1px; height:14px; background:#333; margin:0 5px;"></div>
<button class="icon-btn" onclick="rotateView()"><i class="ph-bold ph-arrow-clockwise"></i></button>
<button class="icon-btn" onclick="zoom(-10)"><i class="ph-bold ph-minus"></i></button>
<button class="icon-btn" onclick="resetZoom()" style="font-size:0.7rem; width:30px;">100%</button>
<button class="icon-btn" onclick="zoom(10)"><i class="ph-bold ph-plus"></i></button>
</div>
</div>
<div class="results-panel">
<div class="panel-header">
<span>DATA</span>
<div class="tab-group">
<button class="tab-btn active" id="tab-blocks" onclick="setView('blocks')">LIST</button>
<button class="tab-btn" id="tab-raw" onclick="setView('raw')">RAW</button>
</div>
</div>
<div class="results-content" id="results-content"></div>
</div>
</div>
</main>
<!-- LOADER -->
<div class="loader-overlay" id="loader">
<div style="text-align:center">
<div style="font-size:0.9rem; letter-spacing:0.1em; color:var(--accent); font-weight:600; margin-bottom:10px" id="loader-msg">PROCESSING</div>
<div class="loader-bar"><div class="loader-progress"></div></div>
</div>
</div>
<script>
// --- STATE ---
let allPages = [];
let currentPageIdx = 0;
let currentZoom = 80;
let currentRotation = 0;
let currentView = 'blocks';
// --- DOM ---
const dropZone = document.getElementById('drop-zone');
const fileInput = document.getElementById('file-input');
const loader = document.getElementById('loader');
const loaderMsg = document.getElementById('loader-msg');
const imgEl = document.getElementById('source-image');
const canvas = document.getElementById('overlay');
const wrapper = document.getElementById('canvas-wrapper');
const resultsContent = document.getElementById('results-content');
const paginationBar = document.getElementById('pagination-bar');
const pageIndicator = document.getElementById('page-indicator');
const ctx = canvas.getContext('2d');
const scanLine = document.getElementById('scan-line');
const confSlider = document.getElementById('conf-slider');
const confVal = document.getElementById('conf-val');
// --- EVENTS ---
dropZone.addEventListener('click', () => fileInput.click());
fileInput.addEventListener('change', (e) => startUpload(e.target.files[0]));
confSlider.addEventListener('input', (e) => {
confVal.textContent = e.target.value + '%';
renderCurrentPage();
});
// --- UPLOAD & STREAMING ---
async function startUpload(file) {
if (!file) return;
allPages = [];
currentPageIdx = 0;
resultsContent.innerHTML = '';
imgEl.src = '';
paginationBar.style.display = 'none';
currentRotation = 0;
updateTransform();
loader.classList.add('active');
loaderMsg.textContent = "INITIALIZING STREAM";
const formData = new FormData();
formData.append('file', file);
formData.append('use_angle_cls', document.getElementById('angle-cls').checked);
try {
const response = await fetch('/predict', { method: 'POST', body: formData });
const reader = response.body.getReader();
const decoder = new TextDecoder();
let buffer = '';
while (true) {
const { done, value } = await reader.read();
if (done) break;
buffer += decoder.decode(value, { stream: true });
const lines = buffer.split('\\n');
buffer = lines.pop();
for (const line of lines) {
if (!line.trim()) continue;
try {
const data = JSON.parse(line);
handleStreamData(data);
} catch (e) { console.error("JSON parse error", e); }
}
}
} catch (err) {
alert("Error: " + err.message);
} finally {
loader.classList.remove('active');
}
}
function handleStreamData(data) {
if (data.type === 'status') {
loaderMsg.textContent = data.message.toUpperCase();
} else if (data.type === 'page') {
allPages.push(data);
if (allPages.length === 1) {
loader.classList.remove('active');
paginationBar.style.display = 'flex';
renderPage(0);
scanLine.classList.add('active');
setTimeout(() => scanLine.classList.remove('active'), 1500);
}
updatePaginationUI();
}
}
// --- RENDERING ---
function renderPage(idx) {
if (idx < 0 || idx >= allPages.length) return;
currentPageIdx = idx;
const pageData = allPages[idx];
imgEl.src = `data:image/jpeg;base64,${pageData.img_base64}`;
imgEl.onload = () => {
canvas.width = imgEl.naturalWidth;
canvas.height = imgEl.naturalHeight;
renderCurrentPage();
};
updatePaginationUI();
}
function renderCurrentPage(highlightId = -1) {
if (allPages.length === 0) return;
const pageData = allPages[currentPageIdx];
const minConf = confSlider.value / 100;
ctx.clearRect(0, 0, canvas.width, canvas.height);
if (currentView === 'blocks') {
pageData.blocks.forEach(block => {
if (block.conf < minConf) return;
const box = block.box;
ctx.beginPath();
ctx.moveTo(box[0][0], box[0][1]);
for(let i=1; i<4; i++) ctx.lineTo(box[i][0], box[i][1]);
ctx.closePath();
if (block.id === highlightId) {
ctx.strokeStyle = '#6366f1';
ctx.lineWidth = 4;
ctx.fillStyle = 'rgba(99, 102, 241, 0.3)';
} else {
ctx.strokeStyle = 'rgba(99, 102, 241, 0.5)';
ctx.lineWidth = 1;
ctx.fillStyle = 'rgba(99, 102, 241, 0.02)';
}
ctx.fill();
ctx.stroke();
});
}
}
function drawCanvas(highlightId = -1) {
renderCurrentPage(highlightId);
}
function renderList() {
resultsContent.innerHTML = '';
if (currentView === 'raw') {
resultsContent.innerHTML = `<textarea class="raw-text" readonly>${allPages[currentPageIdx].full_text}</textarea>`;
return;
}
const minConf = confSlider.value / 100;
allPages[currentPageIdx].blocks.forEach((block, index) => {
if (block.conf < minConf) return;
const div = document.createElement('div');
div.className = 'result-item';
div.style.animationDelay = `${index * 0.02}s`;
div.innerHTML = `<span>${block.text}</span><span class="conf-tag">${(block.conf*100).toFixed(0)}%</span>`;
div.addEventListener('mouseenter', () => drawCanvas(block.id));
div.addEventListener('mouseleave', () => drawCanvas(-1));
resultsContent.appendChild(div);
});
drawCanvas(-1);
}
// --- CONTROLS ---
function changePage(delta) {
const newIdx = currentPageIdx + delta;
if (newIdx >= 0 && newIdx < allPages.length) {
renderPage(newIdx);
}
}
function updatePaginationUI() {
pageIndicator.textContent = `${currentPageIdx + 1} / ${allPages.length}`;
renderList();
}
function setView(view) {
currentView = view;
document.getElementById('tab-blocks').classList.toggle('active', view === 'blocks');
document.getElementById('tab-raw').classList.toggle('active', view === 'raw');
renderList();
if(view === 'raw') ctx.clearRect(0, 0, canvas.width, canvas.height);
}
function zoom(delta) {
currentZoom = Math.max(20, Math.min(300, currentZoom + delta));
updateTransform();
}
function resetZoom() {
currentZoom = 80;
currentRotation = 0;
updateTransform();
}
function rotateView() {
currentRotation = (currentRotation + 90) % 360;
updateTransform();
}
function updateTransform() {
wrapper.style.width = `${currentZoom}%`;
wrapper.style.transform = `rotate(${currentRotation}deg)`;
}
function downloadJSON() {
const dataStr = "data:text/json;charset=utf-8," + encodeURIComponent(JSON.stringify(allPages, null, 2));
const node = document.createElement('a');
node.setAttribute("href", dataStr);
node.setAttribute("download", "ocr_result.json");
document.body.appendChild(node);
node.click();
node.remove();
}
function readAloud() {
if (allPages.length === 0) return;
const text = currentView === 'raw' ?
allPages[currentPageIdx].full_text :
allPages[currentPageIdx].blocks.map(b => b.text).join(' ');
if (!text) return alert("No text to read");
window.speechSynthesis.cancel();
const utterance = new SpeechSynthesisUtterance(text);
window.speechSynthesis.speak(utterance);
}
function copyText() {
if (allPages.length === 0) return;
const text = allPages[currentPageIdx].full_text;
navigator.clipboard.writeText(text);
const btn = event.currentTarget;
const originalHTML = btn.innerHTML;
btn.innerHTML = `<i class="ph-bold ph-check"></i> Copied`;
setTimeout(() => btn.innerHTML = originalHTML, 2000);
}
function toggleContrast() {
const isContrast = document.getElementById('contrast-mode').checked;
if (isContrast) {
document.documentElement.style.setProperty('--bg-main', '#000000');
document.documentElement.style.setProperty('--bg-panel', '#000000');
document.documentElement.style.setProperty('--border', '#ffffff');
document.documentElement.style.setProperty('--accent', '#ffff00');
document.documentElement.style.setProperty('--accent-dim', '#ffffff');
} else {
document.documentElement.style.removeProperty('--bg-main');
document.documentElement.style.removeProperty('--bg-panel');
document.documentElement.style.removeProperty('--border');
document.documentElement.style.removeProperty('--accent');
document.documentElement.style.removeProperty('--accent-dim');
}
}
function resetUI() {
location.reload();
}
</script>
</body>
</html>
"""