github-actions[bot] commited on
Commit
826cc86
·
1 Parent(s): b1e5920

Auto-deploy from GitHub: 48eb1bb0a56448763c25a67b22cabdf45eae8a7e

Browse files
Files changed (13) hide show
  1. .gitattributes +0 -35
  2. Dockerfile +29 -0
  3. README.md +28 -4
  4. app/__init__.py +1 -0
  5. app/api/routes.py +110 -0
  6. app/core/config.py +18 -0
  7. app/db/crud.py +157 -0
  8. app/db/database.py +21 -0
  9. app/main.py +31 -0
  10. app/services/worker.py +127 -0
  11. index.html +818 -0
  12. pyproject.toml +20 -0
  13. run.py +5 -0
.gitattributes DELETED
@@ -1,35 +0,0 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Dockerfile ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ # Set working directory
4
+ WORKDIR /app
5
+
6
+ # Install system dependencies
7
+ RUN apt-get update && apt-get install -y \
8
+ git \
9
+ curl \
10
+ build-essential \
11
+ libgl1 \
12
+ libglib2.0-0 \
13
+ && rm -rf /var/lib/apt/lists/*
14
+
15
+ # Copy project files
16
+ COPY pyproject.toml .
17
+ COPY . .
18
+
19
+ # Install dependencies and project
20
+ RUN pip install --no-cache-dir .
21
+
22
+ # Create necessary directories
23
+ RUN mkdir -p uploads temp_dir
24
+
25
+ # Expose port
26
+ EXPOSE 7860
27
+
28
+ # Run the FastAPI app (worker starts automatically on first upload)
29
+ CMD ["python", "run.py"]
README.md CHANGED
@@ -1,10 +1,34 @@
1
  ---
2
- title: OCR
3
- emoji: 🏢
4
- colorFrom: red
5
  colorTo: yellow
6
  sdk: docker
7
  pinned: false
 
8
  ---
9
 
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: OCR Text Extractor
3
+ emoji: 📝
4
+ colorFrom: green
5
  colorTo: yellow
6
  sdk: docker
7
  pinned: false
8
+ license: mit
9
  ---
10
 
11
+ # OCR Text Extractor
12
+
13
+ A Python-based OCR service with neobrutalist web interface.
14
+
15
+ ## Features
16
+ - 📷 Image upload via REST API
17
+ - 🤖 Automatic OCR using PaddleOCR/EasyOCR
18
+ - 💾 SQLite database for queue management
19
+ - 🎨 Neobrutalist UI with smooth animations
20
+ - 🔄 Real-time status updates
21
+
22
+ ## Usage
23
+ Access the web interface at the Space URL above.
24
+
25
+ ## API Endpoints
26
+ - POST `/api/upload` - Upload image file
27
+ - GET `/api/files` - Get all files
28
+ - GET `/api/files/<id>` - Get specific file
29
+
30
+ ## Supported Formats
31
+ PNG, JPG, JPEG, BMP, TIFF, WEBP, GIF, PDF
32
+
33
+ ---
34
+ *Auto-deployed from GitHub*
app/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # OCR App Package
app/api/routes.py ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, UploadFile, File, Form, HTTPException
2
+ from fastapi.responses import FileResponse, JSONResponse
3
+ import os
4
+ import uuid
5
+ import aiofiles
6
+ from app.core.config import settings
7
+ from custom_logger import logger_config as logger
8
+ from app.db import crud
9
+ from app.services.worker import start_worker, is_worker_running
10
+
11
+ router = APIRouter()
12
+
13
+ def allowed_file(filename):
14
+ return '.' in filename and filename.rsplit('.', 1)[1].lower() in settings.ALLOWED_EXTENSIONS
15
+
16
+ @router.get("/")
17
+ async def index():
18
+ return FileResponse('index.html')
19
+
20
+ @router.post("/api/tasks/upload")
21
+ async def upload_task(image: UploadFile = File(...), hide_from_ui: str = Form("")):
22
+ if not image.filename:
23
+ raise HTTPException(status_code=400, detail="No file selected")
24
+
25
+ if not allowed_file(image.filename):
26
+ raise HTTPException(status_code=400, detail="Invalid file type")
27
+
28
+ task_id = str(uuid.uuid4())
29
+ filename = image.filename
30
+ filepath = os.path.join(settings.UPLOAD_FOLDER, f"{task_id}_{filename}")
31
+
32
+ try:
33
+ async with aiofiles.open(filepath, 'wb') as out_file:
34
+ content = await image.read()
35
+ await out_file.write(content)
36
+ logger.info(f"File uploaded successfully: {filename} -> {filepath}")
37
+ except Exception as e:
38
+ logger.error(f"Error saving uploaded file {filename}: {e}")
39
+ raise HTTPException(status_code=500, detail="Could not save file")
40
+
41
+ hide_from_ui_val = 1 if hide_from_ui.lower() in ['true', '1'] else 0
42
+
43
+ await crud.insert_task(task_id, filename, filepath, 'not_started', hide_from_ui_val)
44
+
45
+ await start_worker()
46
+
47
+ return JSONResponse(status_code=201, content={
48
+ 'id': task_id,
49
+ 'filename': filename,
50
+ 'status': 'not_started',
51
+ 'message': 'File uploaded successfully'
52
+ })
53
+
54
+ @router.get("/api/tasks")
55
+ async def get_tasks():
56
+ rows, queue_ids, processing_count, avg_time = await crud.get_all_tasks()
57
+
58
+ tasks = []
59
+ for row in rows:
60
+ queue_position = None
61
+ estimated_start_seconds = None
62
+
63
+ if row['status'] == 'not_started' and row['id'] in queue_ids:
64
+ queue_position = queue_ids.index(row['id']) + 1
65
+ tasks_ahead = queue_position - 1 + processing_count
66
+ estimated_start_seconds = round(tasks_ahead * avg_time)
67
+
68
+ tasks.append({
69
+ 'id': row['id'],
70
+ 'filename': row['filename'],
71
+ 'status': row['status'],
72
+ 'result': "HIDDEN_IN_LIST_VIEW",
73
+ 'created_at': row['created_at'],
74
+ 'processed_at': row['processed_at'],
75
+ 'progress': row['progress'] or 0,
76
+ 'progress_text': row['progress_text'],
77
+ 'queue_position': queue_position,
78
+ 'estimated_start_seconds': estimated_start_seconds
79
+ })
80
+
81
+ return tasks
82
+
83
+ @router.get("/api/tasks/{task_id}")
84
+ async def get_task(task_id: str):
85
+ result = await crud.get_task_by_id(task_id)
86
+ if not result:
87
+ raise HTTPException(status_code=404, detail="Task not found")
88
+
89
+ row, queue_position, estimated_start_seconds = result
90
+
91
+ return {
92
+ 'id': row['id'],
93
+ 'filename': row['filename'],
94
+ 'status': row['status'],
95
+ 'result': row['result'],
96
+ 'created_at': row['created_at'],
97
+ 'processed_at': row['processed_at'],
98
+ 'progress': row['progress'] or 0,
99
+ 'progress_text': row['progress_text'],
100
+ 'queue_position': queue_position,
101
+ 'estimated_start_seconds': estimated_start_seconds
102
+ }
103
+
104
+ @router.get("/health")
105
+ async def health():
106
+ return {
107
+ 'status': 'healthy',
108
+ 'service': 'ocr-runner',
109
+ 'worker_running': is_worker_running()
110
+ }
app/core/config.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ class Config:
4
+ PORT = int(os.environ.get('PORT', 7860))
5
+ UPLOAD_FOLDER = 'uploads'
6
+ TEMP_DIR = 'temp_dir'
7
+ DATABASE_FILE = 'ocr_results.db'
8
+ ALLOWED_EXTENSIONS = {'png', 'jpg', 'jpeg', 'bmp', 'tiff', 'tif', 'webp', 'gif', 'pdf'}
9
+
10
+ CWD = "./"
11
+ PYTHON_PATH = "ocr-process"
12
+ OCR_MODEL_NAME = "paddleocr"
13
+ POLL_INTERVAL = 3
14
+
15
+ settings = Config()
16
+
17
+ os.makedirs(settings.UPLOAD_FOLDER, exist_ok=True)
18
+ os.makedirs(settings.TEMP_DIR, exist_ok=True)
app/db/crud.py ADDED
@@ -0,0 +1,157 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import aiosqlite
2
+ import os
3
+ from datetime import datetime, timedelta
4
+ from app.core.config import settings
5
+ from custom_logger import logger_config as logger
6
+
7
+ async def insert_task(task_id: str, filename: str, filepath: str, status: str, hide_from_ui: int):
8
+ async with aiosqlite.connect(settings.DATABASE_FILE) as db:
9
+ await db.execute('''INSERT INTO image_files
10
+ (id, filename, filepath, status, created_at, hide_from_ui)
11
+ VALUES (?, ?, ?, ?, ?, ?)''',
12
+ (task_id, filename, filepath, status, datetime.now().isoformat(), hide_from_ui))
13
+ await db.commit()
14
+ logger.debug(f"Inserted task {filename} (ID: {task_id}) into database.")
15
+
16
+ async def update_status(task_id: str, status: str, result: str = None, error: str = None):
17
+ async with aiosqlite.connect(settings.DATABASE_FILE) as db:
18
+ if status == 'completed':
19
+ await db.execute('''UPDATE image_files
20
+ SET status = ?, result = ?, processed_at = ?, progress = 100, progress_text = 'Completed'
21
+ WHERE id = ?''',
22
+ (status, result, datetime.now().isoformat(), task_id))
23
+ logger.info(f"Task ID {task_id} marked as completed.")
24
+ elif status == 'failed':
25
+ await db.execute('''UPDATE image_files
26
+ SET status = ?, result = ?, processed_at = ?, progress_text = 'Failed'
27
+ WHERE id = ?''',
28
+ (status, f"Error: {error}", datetime.now().isoformat(), task_id))
29
+ logger.error(f"Task ID {task_id} marked as failed. Error: {error}")
30
+ else:
31
+ await db.execute('UPDATE image_files SET status = ? WHERE id = ?', (status, task_id))
32
+ logger.debug(f"Task ID {task_id} status updated to {status}.")
33
+ await db.commit()
34
+
35
+ async def update_progress(task_id: str, progress: int, progress_text: str = None):
36
+ async with aiosqlite.connect(settings.DATABASE_FILE) as db:
37
+ await db.execute('UPDATE image_files SET progress = ?, progress_text = ? WHERE id = ?',
38
+ (progress, progress_text, task_id))
39
+ await db.commit()
40
+ logger.debug(f"Task ID {task_id} progress updated to {progress}% ({progress_text}).")
41
+
42
+ async def get_next_not_started():
43
+ async with aiosqlite.connect(settings.DATABASE_FILE) as db:
44
+ db.row_factory = aiosqlite.Row
45
+ async with db.execute('''SELECT * FROM image_files
46
+ WHERE status = 'not_started'
47
+ ORDER BY created_at ASC
48
+ LIMIT 1''') as cursor:
49
+ return await cursor.fetchone()
50
+
51
+ async def cleanup_old_entries():
52
+ try:
53
+ async with aiosqlite.connect(settings.DATABASE_FILE) as db:
54
+ db.row_factory = aiosqlite.Row
55
+ cutoff_date = (datetime.now() - timedelta(days=10)).isoformat()
56
+
57
+ async with db.execute('''SELECT id, filepath FROM image_files
58
+ WHERE created_at < ?''', (cutoff_date,)) as cursor:
59
+ old_entries = await cursor.fetchall()
60
+
61
+ if old_entries:
62
+ deleted_files = 0
63
+ deleted_rows = 0
64
+
65
+ for entry in old_entries:
66
+ filepath = entry['filepath']
67
+ if filepath and os.path.exists(filepath):
68
+ try:
69
+ os.remove(filepath)
70
+ deleted_files += 1
71
+ except Exception as e:
72
+ logger.warning(f"Failed to delete old file {filepath}: {e}")
73
+
74
+ async with db.execute('''DELETE FROM image_files WHERE created_at < ?''', (cutoff_date,)) as cursor:
75
+ deleted_rows = cursor.rowcount
76
+ await db.commit()
77
+
78
+ if deleted_rows > 0 or deleted_files > 0:
79
+ logger.info(f"Cleanup: Deleted {deleted_rows} old entries and {deleted_files} files (older than 10 days)")
80
+ except Exception as e:
81
+ logger.error(f"Cleanup error: {e}")
82
+
83
+ async def get_average_processing_time():
84
+ async with aiosqlite.connect(settings.DATABASE_FILE) as db:
85
+ db.row_factory = aiosqlite.Row
86
+ async with db.execute('''SELECT created_at, processed_at FROM image_files
87
+ WHERE status = 'completed' AND processed_at IS NOT NULL
88
+ ORDER BY processed_at DESC LIMIT 20''') as cursor:
89
+ completed_rows = await cursor.fetchall()
90
+
91
+ if not completed_rows:
92
+ return 30.0
93
+
94
+ total_seconds = 0
95
+ count = 0
96
+ for r in completed_rows:
97
+ try:
98
+ created = datetime.fromisoformat(r['created_at'])
99
+ processed = datetime.fromisoformat(r['processed_at'])
100
+ duration = (processed - created).total_seconds()
101
+ if duration > 0:
102
+ total_seconds += duration
103
+ count += 1
104
+ except:
105
+ continue
106
+
107
+ return total_seconds / count if count > 0 else 30.0
108
+
109
+ async def get_all_tasks():
110
+ async with aiosqlite.connect(settings.DATABASE_FILE) as db:
111
+ db.row_factory = aiosqlite.Row
112
+
113
+ avg_time = await get_average_processing_time()
114
+
115
+ async with db.execute('''SELECT id FROM image_files
116
+ WHERE status = 'not_started'
117
+ ORDER BY created_at ASC''') as cursor:
118
+ queue_ids = [row['id'] for row in await cursor.fetchall()]
119
+
120
+ async with db.execute('''SELECT COUNT(*) as count FROM image_files WHERE status = 'processing' ''') as cursor:
121
+ row = await cursor.fetchone()
122
+ processing_count = row['count']
123
+
124
+ async with db.execute('SELECT * FROM image_files WHERE hide_from_ui = 0 OR hide_from_ui IS NULL ORDER BY created_at DESC') as cursor:
125
+ rows = await cursor.fetchall()
126
+
127
+ return rows, queue_ids, processing_count, avg_time
128
+
129
+ async def get_task_by_id(task_id: str):
130
+ async with aiosqlite.connect(settings.DATABASE_FILE) as db:
131
+ db.row_factory = aiosqlite.Row
132
+ async with db.execute('SELECT * FROM image_files WHERE id = ?', (task_id,)) as cursor:
133
+ row = await cursor.fetchone()
134
+
135
+ if not row:
136
+ return None
137
+
138
+ queue_position = None
139
+ estimated_start_seconds = None
140
+
141
+ if row['status'] == 'not_started':
142
+ avg_time = await get_average_processing_time()
143
+
144
+ async with db.execute('''SELECT COUNT(*) as position FROM image_files
145
+ WHERE status = 'not_started' AND created_at < ?''',
146
+ (row['created_at'],)) as cursor:
147
+ position_row = await cursor.fetchone()
148
+ queue_position = position_row['position'] + 1
149
+
150
+ async with db.execute('''SELECT COUNT(*) as count FROM image_files WHERE status = 'processing' ''') as cursor:
151
+ count_row = await cursor.fetchone()
152
+ processing_count = count_row['count']
153
+
154
+ tasks_ahead = queue_position - 1 + processing_count
155
+ estimated_start_seconds = round(tasks_ahead * avg_time)
156
+
157
+ return row, queue_position, estimated_start_seconds
app/db/database.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import aiosqlite
2
+ from app.core.config import settings
3
+ from custom_logger import logger_config as logger
4
+
5
+ async def init_db():
6
+ logger.info(f"Initializing database at {settings.DATABASE_FILE}")
7
+ async with aiosqlite.connect(settings.DATABASE_FILE) as db:
8
+ await db.execute('''CREATE TABLE IF NOT EXISTS image_files
9
+ (id TEXT PRIMARY KEY,
10
+ filename TEXT NOT NULL,
11
+ filepath TEXT NOT NULL,
12
+ status TEXT NOT NULL,
13
+ result TEXT,
14
+ created_at TEXT NOT NULL,
15
+ processed_at TEXT,
16
+ progress INTEGER DEFAULT 0,
17
+ progress_text TEXT,
18
+ hide_from_ui INTEGER DEFAULT 0)'''
19
+ )
20
+ await db.commit()
21
+ logger.info("Database initialized successfully.")
app/main.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from contextlib import asynccontextmanager
2
+ from fastapi import FastAPI
3
+ from fastapi.middleware.cors import CORSMiddleware
4
+ from app.api.routes import router
5
+ from app.db.database import init_db
6
+ from custom_logger import logger_config as logger
7
+
8
+ @asynccontextmanager
9
+ async def lifespan(app: FastAPI):
10
+ logger.info("="*60)
11
+ logger.info("OCR Runner API Server Starting Up")
12
+ logger.info("="*60)
13
+ logger.info("Worker will start automatically on first upload")
14
+ logger.info("Image files will be deleted after successful processing")
15
+ logger.info("="*60)
16
+
17
+ await init_db()
18
+ yield
19
+ logger.info("OCR Runner API Server Shutting Down")
20
+
21
+ app = FastAPI(title="OCR Runner API", version="2.0.0", lifespan=lifespan)
22
+
23
+ app.add_middleware(
24
+ CORSMiddleware,
25
+ allow_origins=["*"],
26
+ allow_credentials=True,
27
+ allow_methods=["*"],
28
+ allow_headers=["*"],
29
+ )
30
+
31
+ app.include_router(router)
app/services/worker.py ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import os
3
+ import json
4
+ import shlex
5
+ import re
6
+ from app.core.config import settings
7
+ from custom_logger import logger_config as logger
8
+ from app.db import crud
9
+
10
+ worker_task = None
11
+ worker_running = False
12
+
13
+ def is_worker_running():
14
+ return worker_running
15
+
16
+ async def start_worker():
17
+ global worker_task, worker_running
18
+
19
+ logger.info(f"start_worker called: worker_running={worker_running}")
20
+
21
+ if not worker_running:
22
+ worker_running = True
23
+ worker_task = asyncio.create_task(worker_loop())
24
+ logger.info("Worker task started")
25
+ else:
26
+ logger.info("Worker already running")
27
+
28
+ async def worker_loop():
29
+ global worker_running
30
+ logger.info("OCR Worker started. Monitoring for new image files...")
31
+
32
+ while worker_running:
33
+ logger.debug("Worker loop iteration, checking for files...")
34
+ await crud.cleanup_old_entries()
35
+
36
+ try:
37
+ row = await crud.get_next_not_started()
38
+
39
+ if row:
40
+ task_id = row['id']
41
+ filepath = row['filepath']
42
+ filename = row['filename']
43
+
44
+ logger.info(f"\n{'='*60}\nProcessing: {filename}\nID: {task_id}\n{'='*60}")
45
+
46
+ await crud.update_status(task_id, 'processing')
47
+
48
+ try:
49
+ await crud.update_progress(task_id, 10, "Starting OCR...")
50
+
51
+ command = f"cd {settings.CWD} && {settings.PYTHON_PATH} --input {shlex.quote(os.path.abspath(filepath))} --model {settings.OCR_MODEL_NAME}"
52
+
53
+ logger.debug(f"Executing command: {command}")
54
+
55
+ process = await asyncio.create_subprocess_shell(
56
+ command,
57
+ stdout=asyncio.subprocess.PIPE,
58
+ stderr=asyncio.subprocess.STDOUT,
59
+ cwd=settings.CWD,
60
+ env={
61
+ **os.environ,
62
+ 'PYTHONUNBUFFERED': '1',
63
+ 'CUDA_LAUNCH_BLOCKING': '1',
64
+ 'USE_CPU_IF_POSSIBLE': 'true'
65
+ }
66
+ )
67
+
68
+ while True:
69
+ line = await process.stdout.readline()
70
+ if not line:
71
+ break
72
+
73
+ line_str = line.decode('utf-8', errors='replace').strip()
74
+ if line_str:
75
+ logger.info(f"[OCR] {line_str}")
76
+
77
+ percent_match = re.search(r'(\d+)%', line_str)
78
+ if percent_match:
79
+ try:
80
+ percent = int(percent_match.group(1))
81
+ await crud.update_progress(task_id, min(percent, 90), "Processing...")
82
+ except: pass
83
+
84
+ if 'initializing paddleocr' in line_str.lower():
85
+ await crud.update_progress(task_id, 15, "Initializing engine...")
86
+ elif 'loading model' in line_str.lower():
87
+ await crud.update_progress(task_id, 25, "Loading OCR models...")
88
+ elif 'model loaded successfully' in line_str.lower():
89
+ await crud.update_progress(task_id, 40, "Models ready.")
90
+ elif 'processing:' in line_str.lower():
91
+ await crud.update_progress(task_id, 50, "Analyzing image...")
92
+ elif 'ocr completed successfully' in line_str.lower():
93
+ await crud.update_progress(task_id, 90, "OCR completed.")
94
+ elif 'json ocr saved' in line_str.lower():
95
+ await crud.update_progress(task_id, 95, "Saving data...")
96
+
97
+ await process.wait()
98
+ if process.returncode != 0:
99
+ raise Exception(f"OCR process failed with return code {process.returncode}")
100
+
101
+ await crud.update_progress(task_id, 98, "Reading results...")
102
+
103
+ output_path = os.path.join(settings.CWD, settings.TEMP_DIR, 'output_ocr.json')
104
+ with open(output_path, 'r') as file:
105
+ result = json.loads(file.read().strip())
106
+
107
+ result_data = result.get('text', '') or str(result)
108
+
109
+ logger.success(f"Successfully processed: {filename}")
110
+ logger.info(f"Text preview: {result_data[:100]}...")
111
+
112
+ await crud.update_status(task_id, 'completed', result=json.dumps(result))
113
+
114
+ if os.path.exists(filepath):
115
+ os.remove(filepath)
116
+ logger.debug(f"Deleted image file: {filepath}")
117
+
118
+ except Exception as e:
119
+ logger.error(f"Failed to process {filename}: {str(e)}")
120
+ await crud.update_status(task_id, 'failed', error=str(e))
121
+
122
+ else:
123
+ await asyncio.sleep(settings.POLL_INTERVAL)
124
+
125
+ except Exception as e:
126
+ logger.error(f"Worker error: {str(e)}")
127
+ await asyncio.sleep(settings.POLL_INTERVAL)
index.html ADDED
@@ -0,0 +1,818 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+
4
+ <head>
5
+ <meta charset="utf-8" />
6
+ <meta content="width=device-width, initial-scale=1.0" name="viewport" />
7
+ <title>OCR - Optical Character Recognition</title>
8
+
9
+ <!-- External Assets -->
10
+ <script src="https://cdn.tailwindcss.com?plugins=forms,container-queries"></script>
11
+ <link href="https://fonts.googleapis.com/css2?family=Fredoka:wght@300..700&family=Caveat:wght@400..700&display=swap"
12
+ rel="stylesheet" />
13
+ <link
14
+ href="https://fonts.googleapis.com/css2?family=Material+Symbols+Outlined:wght,FILL@100..700,0..1&amp;display=swap"
15
+ rel="stylesheet" />
16
+
17
+ <!-- Tailwind Configuration -->
18
+ <script id="tailwind-config">
19
+ tailwind.config = {
20
+ darkMode: "class",
21
+ theme: {
22
+ extend: {
23
+ colors: {
24
+ surface: "#e6f0fd",
25
+ "crayon-blue": "#2563eb",
26
+ "crayon-red": "#dc2626",
27
+ "crayon-green": "#16a34a",
28
+ "crayon-yellow": "#ca8a04",
29
+ "crayon-orange": "#ea580c",
30
+ "crayon-purple": "#7c3aed",
31
+ "crayon-dark": "#1A1A1A"
32
+ },
33
+ fontFamily: {
34
+ "fredoka": ["Fredoka", "sans-serif"],
35
+ "caveat": ["Caveat", "cursive"]
36
+ },
37
+ fontSize: {
38
+ "headline-lg": ["42px", { lineHeight: "1.1", fontWeight: "700" }],
39
+ "headline-md": ["28px", { lineHeight: "1.2", fontWeight: "600" }],
40
+ "body-lg": ["24px", { lineHeight: "1.5", fontWeight: "500" }],
41
+ "label-sm": ["18px", { lineHeight: "1.2", letterSpacing: "0.01em", fontWeight: "500" }],
42
+ "body-md": ["20px", { lineHeight: "1.5", fontWeight: "400" }]
43
+ }
44
+ },
45
+ },
46
+ }
47
+ </script>
48
+
49
+ <svg height="0" style="position: absolute;" width="0">
50
+ <filter height="120%" id="crayon-texture" width="120%" x="-10%" y="-10%">
51
+ <feTurbulence baseFrequency="0.4" numOctaves="3" result="noise" type="fractalNoise"></feTurbulence>
52
+ <feDisplacementMap in="SourceGraphic" in2="noise" scale="2.5" xChannelSelector="R" yChannelSelector="G">
53
+ </feDisplacementMap>
54
+ </filter>
55
+ <filter height="120%" id="crayon-heavy" width="120%" x="-10%" y="-10%">
56
+ <feTurbulence baseFrequency="0.5" numOctaves="4" result="noise" type="fractalNoise"></feTurbulence>
57
+ <feDisplacementMap in="SourceGraphic" in2="noise" scale="4" xChannelSelector="R" yChannelSelector="G">
58
+ </feDisplacementMap>
59
+ </filter>
60
+ </svg>
61
+
62
+ <style>
63
+ /* Sketchbook Styles */
64
+ body {
65
+ background-color: #e6f0fd;
66
+ background-image: url("data:image/svg+xml,%3Csvg viewBox='0 0 200 200' xmlns='http://www.w3.org/2000/svg'%3E%3Cfilter id='noiseFilter'%3E%3CfeTurbulence type='fractalNoise' baseFrequency='0.8' numOctaves='4' stitchTiles='stitch'/%3E%3C/filter%3E%3Crect width='100%25' height='100%25' filter='url(%23noiseFilter)' opacity='0.08'/%3E%3C/svg%3E");
67
+ color: #1A1A1A;
68
+ font-family: 'Fredoka', sans-serif;
69
+ }
70
+
71
+ .bg-surface {
72
+ background-color: rgb(255 255 255 / 0%) !important;
73
+ backdrop-filter: blur(2px);
74
+ }
75
+
76
+ .crayon-filter {
77
+ filter: url('#crayon-texture');
78
+ }
79
+
80
+ .crayon-heavy {
81
+ filter: url('#crayon-heavy');
82
+ }
83
+
84
+ .crayon-border-green {
85
+ border: 4px solid #16a34a;
86
+ border-radius: 12px 8px 15px 10px / 8px 14px 10px 12px;
87
+ filter: url('#crayon-texture');
88
+ }
89
+
90
+ .task-card {
91
+ border: 3px solid rgba(124, 58, 237, 0.4);
92
+ border-radius: 20px 15px 25px 18px / 18px 25px 15px 20px;
93
+ filter: url('#crayon-texture');
94
+ }
95
+
96
+ .crayon-border-blue {
97
+ border: 4px dashed #2563eb;
98
+ border-radius: 15px 10px 12px 18px / 12px 18px 15px 10px;
99
+ filter: url('#crayon-texture');
100
+ }
101
+
102
+ .crayon-border-purple {
103
+ border: 4px solid #7c3aed;
104
+ border-radius: 10px 16px 12px 14px / 16px 12px 14px 10px;
105
+ filter: url('#crayon-texture');
106
+ }
107
+
108
+ .crayon-button {
109
+ border: 4px solid #2563eb;
110
+ border-radius: 12px 8px 14px 10px / 8px 14px 10px 12px;
111
+ transition: all 0.2s ease;
112
+ filter: url('#crayon-texture');
113
+ cursor: pointer;
114
+ }
115
+
116
+ .crayon-button:hover {
117
+ transform: scale(1.05) rotate(1deg);
118
+ box-shadow: 6px 6px 0px 0px rgba(0, 0, 0, 0.1);
119
+ }
120
+
121
+ .crayon-button:hover .material-symbols-outlined.spin-on-hover {
122
+ animation: spin 2s linear infinite;
123
+ }
124
+
125
+ @keyframes spin {
126
+ from {
127
+ transform: rotate(0deg);
128
+ }
129
+
130
+ to {
131
+ transform: rotate(360deg);
132
+ }
133
+ }
134
+
135
+ @keyframes drift {
136
+ 0% {
137
+ transform: translateX(0);
138
+ }
139
+
140
+ 50% {
141
+ transform: translateX(20px);
142
+ }
143
+
144
+ 100% {
145
+ transform: translateX(0);
146
+ }
147
+ }
148
+
149
+ .drift-slow {
150
+ animation: drift 8s ease-in-out infinite;
151
+ }
152
+
153
+ .drift-medium {
154
+ animation: drift 5s ease-in-out infinite;
155
+ }
156
+
157
+ .organic-shape {
158
+ border-radius: 255px 15px 225px 15px/15px 225px 15px 255px;
159
+ filter: url('#crayon-texture');
160
+ }
161
+
162
+ .scribble-fill-green {
163
+ background: repeating-linear-gradient(60deg, #16a34a, #16a34a 2px, #15803d 3px, #16a34a 4px);
164
+ }
165
+
166
+ .progress-fill {
167
+ background: repeating-linear-gradient(80deg, #2563eb, #2563eb 2px, #1d4ed8 3px, #2563eb 5px);
168
+ }
169
+
170
+ .material-symbols-outlined {
171
+ font-variation-settings: 'FILL' 1, 'wght' 700, 'GRAD' 0, 'opsz' 48;
172
+ filter: url('#crayon-texture');
173
+ }
174
+
175
+ /* --- Modals --- */
176
+ .modal {
177
+ position: fixed;
178
+ inset: 0;
179
+ background: rgba(15, 23, 42, 0.6);
180
+ display: none;
181
+ align-items: center;
182
+ justify-content: center;
183
+ z-index: 100;
184
+ padding: 2rem;
185
+ }
186
+
187
+ .modal.active {
188
+ display: flex;
189
+ }
190
+
191
+ .modal-content {
192
+ border: 4px solid #2563eb;
193
+ width: 100%;
194
+ max-width: 900px;
195
+ border-radius: 24px;
196
+ display: flex;
197
+ flex-direction: column;
198
+ max-height: 85vh;
199
+ box-shadow: 12px 12px 0px 0px rgba(0, 0, 0, 0.1);
200
+ position: relative;
201
+ }
202
+
203
+ .modal-sketch-bg {
204
+ position: absolute;
205
+ inset: -8px;
206
+ border: 6px solid #2563eb;
207
+ backdrop-filter: blur(10px);
208
+ border-radius: 255px 15px 225px 15px/15px 225px 15px 255px;
209
+ z-index: -1;
210
+ filter: url('#crayon-texture');
211
+ pointer-events: none;
212
+ }
213
+
214
+ .modal-header {
215
+ padding: 1.5rem 2rem;
216
+ border-bottom: 3px dashed #adc6ff;
217
+ display: flex;
218
+ justify-content: space-between;
219
+ align-items: center;
220
+ position: relative;
221
+ z-index: 10;
222
+ }
223
+
224
+ .modal-body {
225
+ padding: 2rem;
226
+ overflow-y: auto;
227
+ position: relative;
228
+ z-index: 10;
229
+ }
230
+
231
+ #resultText,
232
+ pre {
233
+ border: 3px dashed #adc6ff !important;
234
+ padding: 2rem !important;
235
+ border-radius: 20px !important;
236
+ font-family: 'Fredoka', sans-serif !important;
237
+ font-size: 1.5rem !important;
238
+ font-weight: 600 !important;
239
+ color: #fff !important;
240
+ white-space: pre-wrap !important;
241
+ word-break: break-all !important;
242
+ line-height: 1.6 !important;
243
+ filter: url('#crayon-texture');
244
+ }
245
+
246
+ .close-modal {
247
+ background: transparent;
248
+ border: none;
249
+ color: #dc2626;
250
+ font-size: 3rem;
251
+ cursor: pointer;
252
+ font-weight: 700;
253
+ }
254
+
255
+ .text-headline-lg {
256
+ filter: url('#crayon-texture');
257
+ }
258
+
259
+ .copy-btn {
260
+ background: #16a34a;
261
+ color: white;
262
+ padding: 0.5rem 1.5rem;
263
+ border-radius: 12px;
264
+ font-weight: 700;
265
+ box-shadow: 4px 4px 0px 0px #15803d;
266
+ transition: all 0.2s;
267
+ filter: url('#crayon-texture');
268
+ }
269
+
270
+ .copy-btn:hover {
271
+ transform: translate(-2px, -2px);
272
+ box-shadow: 6px 6px 0px 0px #15803d;
273
+ }
274
+
275
+ /* --- Specific UI Elements --- */
276
+ .status-modal-content {
277
+ max-width: 450px;
278
+ text-align: center;
279
+ padding: 3rem 2rem;
280
+ }
281
+
282
+ .status-modal-bg {
283
+ position: absolute;
284
+ inset: -12px;
285
+ border: 8px solid #2563eb;
286
+ background: #e6f0fd;
287
+ border-radius: 20px 40px 15px 35px / 35px 15px 40px 20px;
288
+ z-index: -1;
289
+ filter: url('#crayon-texture');
290
+ pointer-events: none;
291
+ }
292
+
293
+ .status-icon-container {
294
+ width: 100px;
295
+ height: 100px;
296
+ margin: 0 auto 1.5rem;
297
+ display: flex;
298
+ align-items: center;
299
+ justify-content: center;
300
+ border-radius: 20px 15px 25px 18px / 18px 25px 15px 20px;
301
+ border: 4px solid currentColor;
302
+ filter: url('#crayon-texture');
303
+ position: relative;
304
+ z-index: 20;
305
+ }
306
+
307
+ .status-icon-bg {
308
+ position: absolute;
309
+ inset: 0;
310
+ background: currentColor;
311
+ opacity: 0.15;
312
+ z-index: -1;
313
+ border-radius: inherit;
314
+ }
315
+
316
+ .modal-decoration {
317
+ position: absolute;
318
+ pointer-events: none;
319
+ opacity: 0.3;
320
+ z-index: 5;
321
+ filter: url('#crayon-texture');
322
+ }
323
+
324
+ .table-container::-webkit-scrollbar {
325
+ width: 10px;
326
+ }
327
+
328
+ .table-container::-webkit-scrollbar-track {
329
+ background: #f1f5f9;
330
+ border-radius: 10px;
331
+ }
332
+
333
+ .table-container::-webkit-scrollbar-thumb {
334
+ background: #cbd5e1;
335
+ border-radius: 10px;
336
+ border: 2px solid #f1f5f9;
337
+ }
338
+
339
+ .dragging {
340
+ border-color: #16a34a !important;
341
+ background-color: #f0fdf4 !important;
342
+ }
343
+ </style>
344
+ </head>
345
+
346
+ <body class="h-screen flex flex-col overflow-hidden relative">
347
+ <!-- Main Background Decorations -->
348
+ <div class="fixed inset-0 pointer-events-none overflow-hidden -z-10 opacity-40">
349
+ <!-- Stars -->
350
+ <span
351
+ class="material-symbols-outlined absolute text-5xl text-crayon-yellow top-20 left-[10%] rotate-12 crayon-heavy animate-pulse">star</span>
352
+ <span
353
+ class="material-symbols-outlined absolute text-3xl text-crayon-orange top-[40%] left-[5%] -rotate-12 crayon-filter">star</span>
354
+ <span
355
+ class="material-symbols-outlined absolute text-4xl text-crayon-yellow bottom-[20%] left-[15%] rotate-45 animate-pulse">star</span>
356
+ <span
357
+ class="material-symbols-outlined absolute text-6xl text-crayon-orange top-[15%] right-[15%] rotate-[-15deg] crayon-heavy">star</span>
358
+ <span
359
+ class="material-symbols-outlined absolute text-3xl text-crayon-yellow bottom-[30%] right-[10%] rotate-12 animate-pulse">star</span>
360
+
361
+ <!-- Clouds -->
362
+ <span
363
+ class="material-symbols-outlined absolute text-[120px] text-crayon-blue top-[10%] left-[25%] opacity-20 drift-slow">cloud</span>
364
+ <span
365
+ class="material-symbols-outlined absolute text-[80px] text-crayon-purple bottom-[15%] left-[40%] opacity-10 drift-medium">cloud</span>
366
+ <span
367
+ class="material-symbols-outlined absolute text-[100px] text-crayon-blue top-[60%] right-[25%] opacity-15 drift-slow">cloud</span>
368
+ <span
369
+ class="material-symbols-outlined absolute text-[150px] text-crayon-purple top-[30%] right-[5%] opacity-10 drift-medium">cloud</span>
370
+
371
+ <!-- Hearts -->
372
+ <span
373
+ class="material-symbols-outlined absolute text-4xl text-crayon-red top-[25%] left-[18%] rotate-[-15deg] crayon-filter animate-pulse">favorite</span>
374
+ <span
375
+ class="material-symbols-outlined absolute text-2xl text-crayon-red bottom-[10%] right-[20%] rotate-12 crayon-filter">favorite</span>
376
+ <span
377
+ class="material-symbols-outlined absolute text-5xl text-crayon-red top-[70%] left-[8%] rotate-[10deg] animate-pulse">favorite</span>
378
+ </div>
379
+
380
+ <!-- SVG Filters -->
381
+ <svg height="0" width="0" style="position: absolute;">
382
+ <filter id="crayon-texture" x="-10%" y="-10%" width="120%" height="120%">
383
+ <feTurbulence type="fractalNoise" baseFrequency="0.4" numOctaves="3" result="noise" />
384
+ <feDisplacementMap in="SourceGraphic" in2="noise" scale="2.5" xChannelSelector="R" yChannelSelector="G" />
385
+ </filter>
386
+ <filter id="crayon-heavy" x="-10%" y="-10%" width="120%" height="120%">
387
+ <feTurbulence type="fractalNoise" baseFrequency="0.5" numOctaves="4" result="noise" />
388
+ <feDisplacementMap in="SourceGraphic" in2="noise" scale="4" xChannelSelector="R" yChannelSelector="G" />
389
+ </filter>
390
+ </svg>
391
+
392
+ <!-- Header -->
393
+ <header
394
+ class="bg-surface flex justify-between items-center w-[calc(100%-48px)] mx-6 mt-6 px-8 py-5 crayon-border-green z-10 shrink-0 organic-shape shadow-sm">
395
+ <div class="flex items-center gap-5">
396
+ <div
397
+ class="bg-crayon-green text-white w-14 h-14 rounded-2xl flex items-center justify-center border-[3px] border-crayon-green rotate-[-4deg] crayon-filter scribble-fill-green shadow-md">
398
+ <span class="material-symbols-outlined text-4xl">document_scanner</span>
399
+ </div>
400
+ <div class="flex flex-col -rotate-1">
401
+ <h1 class="text-headline-lg text-[#4c1d95] leading-none mb-1">OCR</h1>
402
+ <span class="text-label-sm text-[#4b5563] font-bold">Optical Character Recognition</span>
403
+ </div>
404
+ </div>
405
+
406
+ <div class="flex items-center gap-10 relative">
407
+ <div class="absolute -left-48 top-2 rotate-12 crayon-heavy">
408
+ <span class="material-symbols-outlined text-4xl text-crayon-yellow">star</span>
409
+ </div>
410
+ <div class="absolute -left-24 top-0 -rotate-6 crayon-heavy opacity-80">
411
+ <span class="material-symbols-outlined text-5xl text-crayon-blue">cloud</span>
412
+ </div>
413
+ <button id="apiDocBtn"
414
+ class="flex items-center gap-2 text-headline-md text-crayon-purple px-8 py-3 bg-surface crayon-button rotate-1 shadow-md">
415
+ <span class="material-symbols-outlined text-3xl">menu_book</span>
416
+ API DOC
417
+ </button>
418
+ <div
419
+ class="flex items-center gap-4 bg-surface px-6 py-3 rounded-full border-[4px] border-crayon-green organic-shape shadow-md">
420
+ <div id="healthDot" class="w-4 h-4 rounded-full bg-crayon-green shadow-[0_0_12px_rgba(22,163,74,0.5)]">
421
+ </div>
422
+ <span id="healthText" class="text-headline-md text-crayon-green text-2xl">Service Online</span>
423
+ <div class="text-crayon-orange flex items-center justify-center rotate-[15deg]">
424
+ <span class="material-symbols-outlined text-4xl">light_mode</span>
425
+ </div>
426
+ </div>
427
+ </div>
428
+ </header>
429
+
430
+ <!-- Main Content -->
431
+ <main class="flex-1 flex overflow-hidden p-8 gap-8 mx-auto w-full relative">
432
+ <!-- Input Section -->
433
+ <section class="w-[450px] flex flex-col gap-8">
434
+ <div id="uploadZone"
435
+ class="flex flex-col gap-6 p-8 bg-surface crayon-border-blue flex-1 relative organic-shape cursor-pointer group shadow-sm hover:shadow-md transition-shadow">
436
+ <div class="flex items-center gap-4 mb-2 rotate-1">
437
+ <div
438
+ class="bg-crayon-blue text-white rounded-full w-14 h-14 flex items-center justify-center border-2 border-crayon-blue crayon-filter shadow-md">
439
+ <span class="material-symbols-outlined text-4xl">upload</span>
440
+ </div>
441
+ <div>
442
+ <h2 class="text-headline-md text-crayon-blue leading-none mb-1 flex items-center gap-2">
443
+ INPUT
444
+ <span class="material-symbols-outlined text-crayon-red text-2xl rotate-12">favorite</span>
445
+ </h2>
446
+ <p class="text-label-sm text-[#6b7280]">Upload your file</p>
447
+ </div>
448
+ </div>
449
+
450
+ <div
451
+ class="flex-1 flex flex-col items-center justify-center relative border-[4px] border-dashed border-[#adc6ff] rounded-[32px] bg-surface p-6 group-hover:bg-blue-50 transition-colors">
452
+ <div class="relative w-48 h-48 mb-8 flex items-center justify-center">
453
+ <div
454
+ class="absolute w-36 h-44 bg-blue-100 border-[3px] border-crayon-blue rounded-xl rotate-[12deg] right-4 bottom-4 organic-shape opacity-60">
455
+ </div>
456
+ <div
457
+ class="absolute w-36 h-44 bg-surface border-[3px] border-crayon-blue rounded-xl z-10 flex flex-col items-center p-4 organic-shape rotate-[-4deg] shadow-sm">
458
+ <div
459
+ class="w-8 h-8 rounded-full bg-crayon-yellow self-start mb-4 border-[2px] border-[#1A1A1A]">
460
+ </div>
461
+ <div class="w-full h-2 bg-[#adc6ff] rounded-full mb-3"></div>
462
+ <div class="w-2/3 h-2 bg-[#adc6ff] rounded-full self-start"></div>
463
+ </div>
464
+ <div
465
+ class="absolute -bottom-4 -right-4 bg-crayon-blue text-white rounded-full w-16 h-16 flex items-center justify-center border-[4px] border-white z-20 shadow-xl group-hover:scale-110 transition-transform rotate-12 crayon-filter">
466
+ <span class="material-symbols-outlined text-4xl">arrow_upward</span>
467
+ </div>
468
+ </div>
469
+ <p class="text-headline-md text-[#1A1A1A] mb-1 font-bold">Drag & drop here</p>
470
+ <p class="text-label-sm text-crayon-purple mb-8 font-bold">or click to browse</p>
471
+ <div class="flex gap-2 mt-auto w-full justify-center flex-wrap">
472
+ <span
473
+ class="px-4 py-1.5 bg-surface border-[2px] border-crayon-blue text-crayon-blue text-lg font-bold rounded-xl organic-shape -rotate-2">PNG</span>
474
+ <span
475
+ class="px-4 py-1.5 bg-surface border-[2px] border-crayon-green text-crayon-green text-lg font-bold rounded-xl organic-shape rotate-1">JPG</span>
476
+ <span
477
+ class="px-4 py-1.5 bg-surface border-[2px] border-crayon-red text-crayon-red text-lg font-bold rounded-xl organic-shape -rotate-1">PDF</span>
478
+ </div>
479
+ </div>
480
+ <input type="file" id="fileInput" hidden accept="image/*,application/pdf">
481
+ </div>
482
+ </section>
483
+
484
+ <!-- Activity Section -->
485
+ <section
486
+ class="flex-1 flex flex-col bg-surface crayon-border-purple p-8 relative organic-shape overflow-hidden shadow-sm">
487
+ <div class="flex items-center justify-between mb-8">
488
+ <div class="flex items-center gap-4 rotate-1">
489
+ <div
490
+ class="bg-crayon-purple text-white rounded-full w-14 h-14 flex items-center justify-center border-[3px] border-crayon-purple shadow-md">
491
+ <span class="material-symbols-outlined text-4xl">schedule</span>
492
+ </div>
493
+ <div>
494
+ <h2 class="text-headline-lg text-[#4c1d95] leading-none mb-1">ACTIVITY</h2>
495
+ <p class="text-label-sm text-[#6b7280] font-bold">Recent tasks</p>
496
+ </div>
497
+ </div>
498
+ <div class="relative w-40 h-20 flex items-center">
499
+ <svg class="absolute left-[-30px] top-4 w-32 h-16 crayon-filter text-crayon-yellow" fill="none"
500
+ stroke="currentColor" stroke-dasharray="6 6" stroke-linecap="round" stroke-width="3"
501
+ viewBox="0 0 100 50">
502
+ <path d="M10,40 Q40,50 60,30 T90,10" />
503
+ </svg>
504
+ <span
505
+ class="material-symbols-outlined text-5xl text-crayon-orange absolute right-0 top-0 rotate-12">send</span>
506
+ </div>
507
+ <button onclick="loadTasks()"
508
+ class="flex items-center gap-2 text-headline-md text-crayon-blue px-8 py-3 bg-surface crayon-button border-[4px] -rotate-1 shadow-md">
509
+ <span class="material-symbols-outlined text-3xl spin-on-hover">sync</span>
510
+ Refresh
511
+ </button>
512
+ </div>
513
+
514
+ <!-- List Header -->
515
+ <div class="flex w-full px-6 pb-4 border-b-[5px] text-[#4c1d95] font-bold text-xl uppercase tracking-wider"
516
+ style="border-color: rgba(124, 58, 237, 0.4); border-radius: 20px 15px 25px 18px / 18px 25px 15px 20px; filter: url(#crayon-texture);">
517
+ <div class="w-1/2">CONTENT</div>
518
+ <div class="w-1/6 text-center">STATUS</div>
519
+ <div class="w-1/4 text-center">PROGRESS</div>
520
+ <div class="w-[10%] text-center">ACTION</div>
521
+ </div>
522
+
523
+ <!-- Task List Body -->
524
+ <div id="queueBody" class="flex flex-col gap-5 mt-6 overflow-y-auto flex-1 table-container pr-4">
525
+ <div class="text-center py-32 text-headline-md text-[#94a3b8] font-bold opacity-60">No tasks found
526
+ yet...</div>
527
+ </div>
528
+ </section>
529
+ </main>
530
+
531
+ <!-- Modals -->
532
+ <!-- Result & API Modal -->
533
+ <div id="resultModal" class="modal">
534
+ <div class="modal-content">
535
+ <div class="modal-sketch-bg"></div>
536
+
537
+ <!-- Decorations -->
538
+ <span
539
+ class="material-symbols-outlined modal-decoration text-6xl text-crayon-yellow top-4 left-4 -rotate-12">star</span>
540
+ <span
541
+ class="material-symbols-outlined modal-decoration text-8xl text-crayon-blue top-12 right-20 opacity-20">cloud</span>
542
+ <span
543
+ class="material-symbols-outlined modal-decoration text-4xl text-crayon-orange bottom-10 left-10 rotate-45">star</span>
544
+ <span
545
+ class="material-symbols-outlined modal-decoration text-7xl text-crayon-purple bottom-4 right-4 -rotate-6 opacity-40">cloud</span>
546
+
547
+ <div class="modal-header">
548
+ <div class="flex items-center gap-6">
549
+ <span id="modalTitle" class="text-headline-lg text-[#1e1b4b]">Extraction Result</span>
550
+ <button id="copyBtn" onclick="copyResult()" class="copy-btn">📋 Copy Text</button>
551
+ </div>
552
+ <button class="close-modal" onclick="closeModal()">&times;</button>
553
+ </div>
554
+ <div class="modal-body">
555
+ <pre id="resultText"></pre>
556
+ </div>
557
+ </div>
558
+ </div>
559
+
560
+ <!-- Status Modal -->
561
+ <div id="statusModal" class="modal">
562
+ <div class="modal-content status-modal-content">
563
+ <div id="statusBg" class="status-modal-bg"></div>
564
+
565
+ <span
566
+ class="material-symbols-outlined modal-decoration text-4xl text-crayon-yellow top-6 right-6 rotate-12">star</span>
567
+ <span
568
+ class="material-symbols-outlined modal-decoration text-6xl text-crayon-blue top-10 left-4 opacity-30">cloud</span>
569
+ <span
570
+ class="material-symbols-outlined modal-decoration text-3xl text-crayon-orange bottom-8 right-10 -rotate-12">star</span>
571
+ <span
572
+ class="material-symbols-outlined modal-decoration text-5xl text-crayon-purple bottom-10 left-8 rotate-6 opacity-30">cloud</span>
573
+
574
+ <div id="statusIconContainer" class="status-icon-container text-crayon-blue">
575
+ <div class="status-icon-bg"></div>
576
+ <span id="statusIcon" class="material-symbols-outlined text-6xl animate-bounce">upload</span>
577
+ </div>
578
+ <h2 id="statusMessage" class="text-headline-lg text-crayon-blue mb-2">Uploading...</h2>
579
+ <p id="statusSubMessage" class="text-body-lg text-[#4b5563]">Processing your request, please wait.</p>
580
+ </div>
581
+ </div>
582
+
583
+ <!-- Application Logic -->
584
+ <script>
585
+ // --- Configuration ---
586
+ const API_BASE = '/api';
587
+
588
+ // --- DOM Elements ---
589
+ const UI = {
590
+ uploadZone: document.getElementById('uploadZone'),
591
+ fileInput: document.getElementById('fileInput'),
592
+ queueBody: document.getElementById('queueBody'),
593
+ resultModal: document.getElementById('resultModal'),
594
+ statusModal: document.getElementById('statusModal'),
595
+ resultText: document.getElementById('resultText'),
596
+ modalTitle: document.getElementById('modalTitle'),
597
+ statusMessage: document.getElementById('statusMessage'),
598
+ statusSubMessage: document.getElementById('statusSubMessage'),
599
+ statusIcon: document.getElementById('statusIcon'),
600
+ statusIconContainer: document.getElementById('statusIconContainer'),
601
+ statusBg: document.getElementById('statusBg'),
602
+ healthDot: document.getElementById('healthDot'),
603
+ healthText: document.getElementById('healthText'),
604
+ apiDocBtn: document.getElementById('apiDocBtn')
605
+ };
606
+
607
+ // --- UI Helpers ---
608
+ function updateStatusModal(type, msg, subMsg) {
609
+ UI.statusMessage.innerText = msg;
610
+ UI.statusSubMessage.innerText = subMsg || "Processing your request, please wait.";
611
+ UI.statusIconContainer.className = "status-icon-container";
612
+ UI.statusIcon.className = "material-symbols-outlined text-6xl";
613
+ UI.statusBg.style.borderColor = "";
614
+
615
+ if (type === 'uploading') {
616
+ UI.statusIconContainer.classList.add('text-crayon-blue');
617
+ UI.statusIcon.innerText = "upload";
618
+ UI.statusIcon.classList.add('animate-bounce');
619
+ UI.statusBg.style.borderColor = "#2563eb";
620
+ } else if (type === 'success') {
621
+ UI.statusIconContainer.classList.add('text-crayon-green');
622
+ UI.statusIcon.innerText = "check_circle";
623
+ UI.statusBg.style.borderColor = "#16a34a";
624
+ } else if (type === 'error') {
625
+ UI.statusIconContainer.classList.add('text-crayon-red');
626
+ UI.statusIcon.innerText = "error";
627
+ UI.statusBg.style.borderColor = "#dc2626";
628
+ }
629
+ }
630
+
631
+ function closeModal() {
632
+ UI.resultModal.classList.remove('active');
633
+ }
634
+
635
+ function copyResult() {
636
+ const text = UI.resultText.innerText;
637
+ const btn = document.getElementById('copyBtn');
638
+ navigator.clipboard.writeText(text).then(() => {
639
+ const orig = btn.innerText;
640
+ btn.innerText = '✓ Copied!';
641
+ setTimeout(() => { btn.innerText = orig; }, 2000);
642
+ });
643
+ }
644
+
645
+ // --- API Functions ---
646
+ async function loadTasks() {
647
+ try {
648
+ const res = await fetch(`${API_BASE}/tasks`);
649
+ const data = await res.json();
650
+ renderQueue(data);
651
+ } catch (err) {
652
+ console.error("Load tasks error:", err);
653
+ }
654
+ }
655
+
656
+ async function handleFile(file) {
657
+ if (!file) return;
658
+
659
+ UI.statusModal.classList.add('active');
660
+ updateStatusModal('uploading', "Uploading...", "Sending file to server...");
661
+
662
+ const formData = new FormData();
663
+ formData.append('image', file);
664
+
665
+ try {
666
+ const res = await fetch(`${API_BASE}/tasks/upload`, { method: 'POST', body: formData });
667
+ if (res.ok) {
668
+ updateStatusModal('success', "Success! ✨", "File uploaded and task created.");
669
+ setTimeout(() => {
670
+ UI.statusModal.classList.remove('active');
671
+ loadTasks();
672
+ }, 1200);
673
+ } else {
674
+ updateStatusModal('error', "Upload Failed ❌", "Something went wrong on our end.");
675
+ setTimeout(() => UI.statusModal.classList.remove('active'), 2000);
676
+ }
677
+ } catch (err) {
678
+ console.error("Upload error:", err);
679
+ updateStatusModal('error', "Connection Error ⚠️", "Could not reach the server.");
680
+ setTimeout(() => UI.statusModal.classList.remove('active'), 2000);
681
+ }
682
+ }
683
+
684
+ async function showResult(id) {
685
+ try {
686
+ const res = await fetch(`${API_BASE}/tasks/${id}`);
687
+ const data = await res.json();
688
+ const text = data.result;
689
+
690
+ UI.modalTitle.innerText = "Extracted Content";
691
+
692
+ let formatted = text;
693
+ try {
694
+ const parsed = JSON.parse(text);
695
+ formatted = JSON.stringify(parsed, null, 2);
696
+ } catch (e) { /* Not JSON, use raw text */ }
697
+
698
+ UI.resultText.innerText = formatted;
699
+ UI.resultModal.classList.add('active');
700
+ } catch (err) {
701
+ console.error("Show result error:", err);
702
+ }
703
+ }
704
+
705
+ async function checkHealth() {
706
+ try {
707
+ const res = await fetch('/health');
708
+ const data = await res.json();
709
+ const healthy = data.status === 'healthy';
710
+
711
+ UI.healthDot.className = `w-4 h-4 rounded-full ${healthy ? 'bg-crayon-green' : 'bg-crayon-red'} shadow-md`;
712
+ UI.healthText.innerText = healthy ? 'Service Online' : 'Service Down';
713
+ UI.healthText.className = `text-headline-md font-bold ${healthy ? 'text-crayon-green' : 'text-crayon-red'}`;
714
+ } catch (e) {
715
+ UI.healthDot.className = 'w-4 h-4 rounded-full bg-crayon-red shadow-md';
716
+ UI.healthText.innerText = 'Connection Error';
717
+ UI.healthText.className = 'text-headline-md font-bold text-crayon-red';
718
+ }
719
+ }
720
+
721
+ // --- Renderers ---
722
+ function renderQueue(tasks) {
723
+ if (tasks.length === 0) {
724
+ UI.queueBody.innerHTML = '<div class="text-center py-32 text-headline-md text-[#94a3b8] font-bold opacity-60">No tasks found yet...</div>';
725
+ return;
726
+ }
727
+
728
+ UI.queueBody.innerHTML = tasks.map((t, i) => {
729
+ const rotate = i % 2 === 0 ? 'rotate-[0.3deg]' : '-rotate-[0.3deg]';
730
+ const status = t.status.toLowerCase();
731
+ const colors = {
732
+ completed: { text: 'crayon-green', bg: 'bg-[#f0fdf4]' },
733
+ failed: { text: 'crayon-red', bg: 'bg-[#fef2f2]' },
734
+ processing: { text: 'crayon-blue', bg: 'bg-[#eff6ff]' },
735
+ pending: { text: 'crayon-purple', bg: 'bg-[#f5f3ff]' }
736
+ };
737
+ const theme = colors[status] || colors.pending;
738
+
739
+ return `
740
+ <div class="task-card flex items-center p-6 bg-surface hover:border-crayon-purple transition-colors shadow-sm ${rotate}">
741
+ <div class="flex items-center gap-5 w-1/2">
742
+ <div class="w-16 h-20 border-[3px] border-crayon-blue rounded-xl p-2 bg-surface flex shadow-sm organic-shape rotate-[-3deg]">
743
+ <div class="w-1/2 h-full border-r-[2px] border-[#adc6ff] flex flex-col gap-1.5 p-0.5">
744
+ <div class="w-full h-2 bg-[#adc6ff] rounded-sm"></div>
745
+ <div class="w-full h-2 bg-[#adc6ff] rounded-sm"></div>
746
+ <div class="w-full h-2 bg-[#adc6ff] rounded-sm"></div>
747
+ </div>
748
+ <div class="w-1/2 h-full flex flex-col gap-1.5 p-0.5 relative">
749
+ <div class="w-full h-2 bg-[#adc6ff] rounded-sm"></div>
750
+ <div class="mt-auto flex gap-1.5 bottom-1 absolute">
751
+ <div class="w-2.5 h-2.5 bg-crayon-yellow rounded-sm"></div>
752
+ <div class="w-2.5 h-2.5 bg-crayon-green rounded-sm"></div>
753
+ </div>
754
+ </div>
755
+ </div>
756
+ <div class="flex flex-col">
757
+ <span class="text-headline-md text-[#1A1A1A] leading-tight font-bold mb-1">${t.filename}</span>
758
+ <div class="text-label-sm text-[#94a3b8] font-bold">${t.id.substring(0, 12)}</div>
759
+ </div>
760
+ </div>
761
+ <div class="w-1/6 flex justify-center">
762
+ <div class="px-4 py-2 ${theme.bg} border-[3px] border-${theme.text} text-${theme.text} font-bold rounded-2xl uppercase tracking-tight crayon-filter">
763
+ ${status.replace('_', ' ')}
764
+ </div>
765
+ </div>
766
+ <div class="w-1/4 flex items-center justify-center gap-4">
767
+ <div class="flex-1 h-6 border-[3px] border-[#adc6ff] rounded-full overflow-hidden bg-surface p-[2px] crayon-filter">
768
+ <div class="h-full rounded-full progress-fill shadow-sm" style="width:${t.progress}%"></div>
769
+ </div>
770
+ <span class="text-headline-md text-2xl text-[#1A1A1A] font-bold w-12 text-right">${status === 'completed' ? '' : t.progress + '%'}</span>
771
+ </div>
772
+ <div class="w-[10%] flex justify-center">
773
+ ${status === 'completed' ? `
774
+ <button onclick="showResult('${t.id}')" class="flex items-center gap-2 px-5 py-2.5 bg-white border-[3px] border-crayon-blue text-crayon-blue font-bold rounded-2xl hover:bg-crayon-blue hover:text-white transition-all shadow-sm crayon-filter">
775
+ <span class="material-symbols-outlined text-2xl">visibility</span>
776
+ VIEW
777
+ </button>
778
+ ` : '—'}
779
+ </div>
780
+ </div>
781
+ `;
782
+ }).join('');
783
+ }
784
+
785
+ // --- Event Listeners ---
786
+ UI.uploadZone.onclick = () => UI.fileInput.click();
787
+ UI.uploadZone.ondragover = (e) => { e.preventDefault(); UI.uploadZone.classList.add('dragging'); };
788
+ UI.uploadZone.ondragleave = () => UI.uploadZone.classList.remove('dragging');
789
+ UI.uploadZone.ondrop = (e) => {
790
+ e.preventDefault();
791
+ UI.uploadZone.classList.remove('dragging');
792
+ handleFile(e.dataTransfer.files[0]);
793
+ };
794
+ UI.fileInput.onchange = (e) => handleFile(e.target.files[0]);
795
+ UI.apiDocBtn.onclick = () => {
796
+ const doc = {
797
+ base_url: window.location.origin,
798
+ endpoints: [
799
+ { method: "POST", path: "/api/tasks/upload", desc: "Upload image for OCR" },
800
+ { method: "GET", path: "/api/tasks", desc: "List all tasks" },
801
+ { method: "GET", path: "/api/tasks/{task_id}", desc: "Get OCR result" },
802
+ { method: "GET", path: "/health", desc: "Service health" }
803
+ ],
804
+ example_usage: `curl -X POST -F 'image=@file.jpg' ${window.location.origin}/api/tasks/upload`
805
+ };
806
+ UI.resultText.innerText = JSON.stringify(doc, null, 2);
807
+ UI.modalTitle.innerText = "API Documentation";
808
+ UI.resultModal.classList.add('active');
809
+ };
810
+
811
+ // --- Lifecycle ---
812
+ loadTasks();
813
+ setInterval(loadTasks, 5000);
814
+ setInterval(checkHealth, 10000);
815
+ </script>
816
+ </body>
817
+
818
+ </html>
pyproject.toml ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [build-system]
2
+ requires = ["setuptools>=61.0"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "ocr-runner-backend"
7
+ version = "2.0.0"
8
+ description = "FastAPI backend for OCR Runner"
9
+ dependencies = [
10
+ "fastapi",
11
+ "uvicorn",
12
+ "aiosqlite",
13
+ "aiofiles",
14
+ "python-multipart",
15
+ "custom_logger @ git+https://github.com/jebin2/custom_logger.git",
16
+ "ocr-runner[paddleocr] @ git+https://github.com/jebin2/OCR.git"
17
+ ]
18
+
19
+ [project.scripts]
20
+ ocr-backend = "app.main:app"
run.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ import uvicorn
2
+ from app.core.config import settings
3
+
4
+ if __name__ == "__main__":
5
+ uvicorn.run("app.main:app", host="0.0.0.0", port=settings.PORT, reload=False)