Sk4467 commited on
Commit
1e83c8a
·
verified ·
1 Parent(s): 2afd03c

Upload 108 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +50 -0
  2. Dockerfile +52 -0
  3. README.md +164 -6
  4. backend/app/__init__.py +2 -0
  5. backend/app/__pycache__/__init__.cpython-313.pyc +0 -0
  6. backend/app/api/__init__.py +2 -0
  7. backend/app/api/__pycache__/__init__.cpython-313.pyc +0 -0
  8. backend/app/api/routers/__init__.py +2 -0
  9. backend/app/api/routers/__pycache__/__init__.cpython-313.pyc +0 -0
  10. backend/app/api/routers/__pycache__/ocr.cpython-313.pyc +0 -0
  11. backend/app/api/routers/__pycache__/synthetic.cpython-313.pyc +0 -0
  12. backend/app/api/routers/ocr.py +166 -0
  13. backend/app/api/routers/synthetic.py +151 -0
  14. backend/app/data/.gitkeep +0 -0
  15. backend/app/data/annotations/annotations.csv +1 -0
  16. backend/app/data/synth_outputs/6dba8cf5-8e28-40e6-bf6c-5f1a5f62155c/enhanced_sanskrit_parchment_1.png +3 -0
  17. backend/app/data/synth_outputs/6dba8cf5-8e28-40e6-bf6c-5f1a5f62155c/enhanced_sanskrit_parchment_1_perspective.png +3 -0
  18. backend/app/data/synth_outputs/81aefb9a-aaaa-4d5d-b838-48ce5b586547/enhanced_sanskrit_birch_1.png +3 -0
  19. backend/app/data/synth_outputs/81aefb9a-aaaa-4d5d-b838-48ce5b586547/enhanced_sanskrit_birch_1_blur.png +3 -0
  20. backend/app/data/synth_outputs/81aefb9a-aaaa-4d5d-b838-48ce5b586547/enhanced_sanskrit_birch_1_brightness.png +3 -0
  21. backend/app/data/synth_outputs/81aefb9a-aaaa-4d5d-b838-48ce5b586547/enhanced_sanskrit_birch_1_combined.png +3 -0
  22. backend/app/data/synth_outputs/81aefb9a-aaaa-4d5d-b838-48ce5b586547/enhanced_sanskrit_birch_1_rotate.png +3 -0
  23. backend/app/main.py +68 -0
  24. backend/app/services/__pycache__/annotations.cpython-313.pyc +0 -0
  25. backend/app/services/__pycache__/ocr_processor.cpython-313.pyc +0 -0
  26. backend/app/services/annotations.py +117 -0
  27. backend/app/services/ocr_processor.py +102 -0
  28. backend/app/services/synthetic/__init__.py +22 -0
  29. backend/app/services/synthetic/__pycache__/__init__.cpython-313.pyc +0 -0
  30. backend/app/services/synthetic/__pycache__/backgrounds.cpython-313.pyc +0 -0
  31. backend/app/services/synthetic/__pycache__/config.cpython-313.pyc +0 -0
  32. backend/app/services/synthetic/__pycache__/core.cpython-313.pyc +0 -0
  33. backend/app/services/synthetic/__pycache__/effects.cpython-313.pyc +0 -0
  34. backend/app/services/synthetic/__pycache__/huggingface_processor.cpython-313.pyc +0 -0
  35. backend/app/services/synthetic/__pycache__/text_renderer.cpython-313.pyc +0 -0
  36. backend/app/services/synthetic/__pycache__/transformations.cpython-313.pyc +0 -0
  37. backend/app/services/synthetic/backgrounds.py +129 -0
  38. backend/app/services/synthetic/config.py +61 -0
  39. backend/app/services/synthetic/core.py +230 -0
  40. backend/app/services/synthetic/effects.py +218 -0
  41. backend/app/services/synthetic/huggingface_processor.py +228 -0
  42. backend/app/services/synthetic/text_renderer.py +112 -0
  43. backend/app/services/synthetic/transformations.py +249 -0
  44. backend/data/annotations/annotations.csv +22 -0
  45. backend/data/annotations/annotations.json +10 -0
  46. backend/data/uploaded_images/s1.png +0 -0
  47. backend/data/uploaded_images/s1_1.png +0 -0
  48. backend/data/uploaded_images/s2.png +0 -0
  49. backend/requirements.txt +17 -0
  50. content/static/NotoSansOriya-Black (2).ttf +3 -0
.gitattributes CHANGED
@@ -33,3 +33,53 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ backend/app/data/synth_outputs/6dba8cf5-8e28-40e6-bf6c-5f1a5f62155c/enhanced_sanskrit_parchment_1_perspective.png filter=lfs diff=lfs merge=lfs -text
37
+ backend/app/data/synth_outputs/6dba8cf5-8e28-40e6-bf6c-5f1a5f62155c/enhanced_sanskrit_parchment_1.png filter=lfs diff=lfs merge=lfs -text
38
+ backend/app/data/synth_outputs/81aefb9a-aaaa-4d5d-b838-48ce5b586547/enhanced_sanskrit_birch_1_blur.png filter=lfs diff=lfs merge=lfs -text
39
+ backend/app/data/synth_outputs/81aefb9a-aaaa-4d5d-b838-48ce5b586547/enhanced_sanskrit_birch_1_brightness.png filter=lfs diff=lfs merge=lfs -text
40
+ backend/app/data/synth_outputs/81aefb9a-aaaa-4d5d-b838-48ce5b586547/enhanced_sanskrit_birch_1_combined.png filter=lfs diff=lfs merge=lfs -text
41
+ backend/app/data/synth_outputs/81aefb9a-aaaa-4d5d-b838-48ce5b586547/enhanced_sanskrit_birch_1_rotate.png filter=lfs diff=lfs merge=lfs -text
42
+ backend/app/data/synth_outputs/81aefb9a-aaaa-4d5d-b838-48ce5b586547/enhanced_sanskrit_birch_1.png filter=lfs diff=lfs merge=lfs -text
43
+ content/static/NotoSansOriya_Condensed-Black.ttf filter=lfs diff=lfs merge=lfs -text
44
+ content/static/NotoSansOriya_Condensed-Bold.ttf filter=lfs diff=lfs merge=lfs -text
45
+ content/static/NotoSansOriya_Condensed-ExtraBold.ttf filter=lfs diff=lfs merge=lfs -text
46
+ content/static/NotoSansOriya_Condensed-ExtraLight.ttf filter=lfs diff=lfs merge=lfs -text
47
+ content/static/NotoSansOriya_Condensed-Light.ttf filter=lfs diff=lfs merge=lfs -text
48
+ content/static/NotoSansOriya_Condensed-Medium.ttf filter=lfs diff=lfs merge=lfs -text
49
+ content/static/NotoSansOriya_Condensed-Regular.ttf filter=lfs diff=lfs merge=lfs -text
50
+ content/static/NotoSansOriya_Condensed-SemiBold.ttf filter=lfs diff=lfs merge=lfs -text
51
+ content/static/NotoSansOriya_Condensed-Thin.ttf filter=lfs diff=lfs merge=lfs -text
52
+ content/static/NotoSansOriya_ExtraCondensed-Black.ttf filter=lfs diff=lfs merge=lfs -text
53
+ content/static/NotoSansOriya_ExtraCondensed-Bold.ttf filter=lfs diff=lfs merge=lfs -text
54
+ content/static/NotoSansOriya_ExtraCondensed-ExtraBold.ttf filter=lfs diff=lfs merge=lfs -text
55
+ content/static/NotoSansOriya_ExtraCondensed-ExtraLight.ttf filter=lfs diff=lfs merge=lfs -text
56
+ content/static/NotoSansOriya_ExtraCondensed-Light.ttf filter=lfs diff=lfs merge=lfs -text
57
+ content/static/NotoSansOriya_ExtraCondensed-Medium.ttf filter=lfs diff=lfs merge=lfs -text
58
+ content/static/NotoSansOriya_ExtraCondensed-Regular.ttf filter=lfs diff=lfs merge=lfs -text
59
+ content/static/NotoSansOriya_ExtraCondensed-SemiBold.ttf filter=lfs diff=lfs merge=lfs -text
60
+ content/static/NotoSansOriya_ExtraCondensed-Thin.ttf filter=lfs diff=lfs merge=lfs -text
61
+ content/static/NotoSansOriya_SemiCondensed-Black.ttf filter=lfs diff=lfs merge=lfs -text
62
+ content/static/NotoSansOriya_SemiCondensed-Bold.ttf filter=lfs diff=lfs merge=lfs -text
63
+ content/static/NotoSansOriya_SemiCondensed-ExtraBold.ttf filter=lfs diff=lfs merge=lfs -text
64
+ content/static/NotoSansOriya_SemiCondensed-ExtraLight.ttf filter=lfs diff=lfs merge=lfs -text
65
+ content/static/NotoSansOriya_SemiCondensed-Light.ttf filter=lfs diff=lfs merge=lfs -text
66
+ content/static/NotoSansOriya_SemiCondensed-Medium.ttf filter=lfs diff=lfs merge=lfs -text
67
+ content/static/NotoSansOriya_SemiCondensed-Regular.ttf filter=lfs diff=lfs merge=lfs -text
68
+ content/static/NotoSansOriya_SemiCondensed-SemiBold.ttf filter=lfs diff=lfs merge=lfs -text
69
+ content/static/NotoSansOriya_SemiCondensed-Thin.ttf filter=lfs diff=lfs merge=lfs -text
70
+ content/static/NotoSansOriya-Black[[:space:]](2).ttf filter=lfs diff=lfs merge=lfs -text
71
+ content/static/NotoSansOriya-Black.ttf filter=lfs diff=lfs merge=lfs -text
72
+ content/static/NotoSansOriya-Bold[[:space:]](2).ttf filter=lfs diff=lfs merge=lfs -text
73
+ content/static/NotoSansOriya-Bold.ttf filter=lfs diff=lfs merge=lfs -text
74
+ content/static/NotoSansOriya-ExtraBold[[:space:]](2).ttf filter=lfs diff=lfs merge=lfs -text
75
+ content/static/NotoSansOriya-ExtraBold.ttf filter=lfs diff=lfs merge=lfs -text
76
+ content/static/NotoSansOriya-ExtraLight[[:space:]](2).ttf filter=lfs diff=lfs merge=lfs -text
77
+ content/static/NotoSansOriya-ExtraLight.ttf filter=lfs diff=lfs merge=lfs -text
78
+ content/static/NotoSansOriya-Light[[:space:]](2).ttf filter=lfs diff=lfs merge=lfs -text
79
+ content/static/NotoSansOriya-Light.ttf filter=lfs diff=lfs merge=lfs -text
80
+ content/static/NotoSansOriya-Medium[[:space:]](2).ttf filter=lfs diff=lfs merge=lfs -text
81
+ content/static/NotoSansOriya-Medium.ttf filter=lfs diff=lfs merge=lfs -text
82
+ content/static/NotoSansOriya-Regular.ttf filter=lfs diff=lfs merge=lfs -text
83
+ content/static/NotoSansOriya-SemiBold.ttf filter=lfs diff=lfs merge=lfs -text
84
+ content/static/NotoSansOriya-Thin.ttf filter=lfs diff=lfs merge=lfs -text
85
+ frontend/public/logo.png filter=lfs diff=lfs merge=lfs -text
Dockerfile ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Multi-stage build for Hugging Face Spaces (Docker) with single URL serving
2
+
3
+ # --- Frontend build stage ---
4
+ FROM node:20-alpine AS frontend-build
5
+ WORKDIR /app
6
+ COPY frontend/package*.json ./
7
+ RUN npm ci
8
+ COPY frontend ./
9
+ RUN npm run build
10
+
11
+ # --- Backend stage ---
12
+ FROM python:3.11-slim AS runtime
13
+
14
+ # System deps for OpenCV and general libs
15
+ RUN apt-get update && apt-get install -y --no-install-recommends \
16
+ libgl1 \
17
+ libglib2.0-0 \
18
+ libsm6 \
19
+ libxext6 \
20
+ libxrender1 \
21
+ build-essential \
22
+ python3-dev \
23
+ git \
24
+ curl \
25
+ && rm -rf /var/lib/apt/lists/*
26
+
27
+ WORKDIR /app
28
+
29
+ # Install backend deps
30
+ COPY backend/requirements.txt /app/backend/requirements.txt
31
+ RUN pip install --upgrade pip && \
32
+ pip install --no-cache-dir -r /app/backend/requirements.txt
33
+
34
+ # Copy application code
35
+ COPY backend /app/backend
36
+ COPY content /app/content
37
+
38
+ # Copy built frontend
39
+ COPY --from=frontend-build /app/dist /app/frontend_dist
40
+
41
+ # Environment
42
+ ENV PORT=7860 \
43
+ DATA_DIR=/data \
44
+ FRONTEND_DIST=/app/frontend_dist
45
+
46
+ # Ensure data dir exists with permissive permissions (in case volume isn't mounted yet)
47
+ RUN mkdir -p /data && chmod -R 777 /data
48
+
49
+ EXPOSE 7860
50
+
51
+ # Run FastAPI (serve API + static frontend)
52
+ CMD ["python", "-m", "uvicorn", "app.main:app", "--app-dir", "backend", "--host", "0.0.0.0", "--port", "7860"]
README.md CHANGED
@@ -1,11 +1,169 @@
1
  ---
2
- title: OCR Annotation
3
- emoji: 📊
4
- colorFrom: gray
5
- colorTo: gray
6
  sdk: docker
 
7
  pinned: false
8
- short_description: OCR-annotation
9
  ---
10
 
11
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: Odia OCR Annotation + Synthetic Generator
3
+ emoji: 🧩
4
+ colorFrom: indigo
5
+ colorTo: yellow
6
  sdk: docker
7
+ sdk_version: "1.0.0"
8
  pinned: false
 
9
  ---
10
 
11
+ # Odia OCR Annotation + Synthetic Text Generator
12
+
13
+ A unified repository that provides:
14
+ - An OCR annotation tool (React frontend + FastAPI backend) to upload images, run OCR via Gemini, edit validated text, and export CSVs.
15
+ - A synthetic text generator (exposed via backend API) to render Odia/Sanskrit-like text with realistic paper/effects, including HuggingFace dataset processing.
16
+
17
+ ## Repository Structure
18
+
19
+ - `backend/`
20
+ - `app/main.py`: FastAPI app with two routers: `/api/ocr` and `/api/synthetic`
21
+ - `app/api/routers/ocr.py`: OCR endpoints (upload, OCR, annotations import/export)
22
+ - `app/api/routers/synthetic.py`: Synthetic generation endpoints
23
+ - `app/services/`: Shared services
24
+ - `ocr_processor.py`: Gemini OCR
25
+ - `annotations.py`: CSV/JSON I/O
26
+ - `synthetic/`: generator modules (config, core, effects, backgrounds, text_renderer, transformations, huggingface_processor)
27
+ - `data/`: runtime storage
28
+ - `uploaded_images/`: uploaded images (served at `/images`)
29
+ - `annotations/`: `annotations.csv` and JSON
30
+ - `synth_outputs/`: generated images and CSVs (served at `/static/synthetic`)
31
+ - `requirements.txt`: backend dependencies
32
+ - `frontend/`
33
+ - Vite + React + Tailwind app
34
+ - Routes: `/ocr` (annotation UI) and `/synthetic` (generator UI)
35
+ - `content/static/`: NotoSans Oriya fonts used by generator
36
+
37
+ ## Run Locally
38
+
39
+ 1) Backend
40
+ - `pip install -r backend/requirements.txt`
41
+ - From `backend/`: `uvicorn app.main:app --reload`
42
+ - Static mounts:
43
+ - `/images` → `backend/data/uploaded_images`
44
+ - `/static/synthetic` → `backend/data/synth_outputs`
45
+
46
+ 2) Frontend
47
+ - `cd frontend && npm install && npm run dev`
48
+ - Open `http://localhost:5173`
49
+ - Use navigation to switch between OCR and Synthetic pages
50
+
51
+ ## OCR API (FastAPI)
52
+
53
+ - `POST /api/ocr/upload`:
54
+ - Multipart files field: `files`
55
+ - Stores images in `backend/data/uploaded_images`
56
+ - `POST /api/ocr/process`:
57
+ - JSON: `{ "api_key": "<GEMINI_KEY>", "image_filenames": ["img1.png", ...] }`
58
+ - Returns: `{ "img1.png": "extracted text", ... }`
59
+ - `GET /api/ocr/annotations`:
60
+ - Returns current annotations, valid/missing images
61
+ - `POST /api/ocr/save`:
62
+ - JSON: `{ "<filename>": { "extracted_text": "...", "validated_text": "..." } }`
63
+ - Saves to CSV and JSON in `backend/data/annotations`
64
+ - `POST /api/ocr/import`:
65
+ - Multipart: `file` (CSV), `image_folder` (e.g., `uploaded_images`)
66
+ - Validates and returns annotations + image presence
67
+ - `POST /api/ocr/export`:
68
+ - JSON: `{ annotations: {...}, validated_texts: {...} }`
69
+ - Returns a downloadable CSV
70
+
71
+ Note: Legacy endpoints (`/upload/`, `/process-ocr/`, etc.) are temporarily supported for the older UI. Prefer `/api/ocr/...` going forward.
72
+
73
+ ## Synthetic API (FastAPI)
74
+
75
+ - `POST /api/synthetic/generate`
76
+ - Modes: `single`, `comprehensive`, `ultra-realistic`, `huggingface`
77
+ - Request body examples:
78
+ - Non-HF:
79
+ `{ "mode": "single", "text": "some Odia text", "output_subdir": "demo_run_01" }`
80
+ - HF CSV:
81
+ `{ "mode": "huggingface", "dataset_url": "https://.../data.csv", "text_column": "text", "max_samples": 100, "output_subdir": "hf_demo" }`
82
+ - Response:
83
+ - Non-HF: `{ "status": "ok", "output_dir": "/static/synthetic/<job_id>" }`
84
+ - HF: `{ "status": "ok", "output_dir": "/static/synthetic/<job_id>", "csv": "/static/synthetic/<job_id>/dataset.csv", "images_dir": "/static/synthetic/<job_id>/images" }`
85
+ - Outputs are stored under `backend/data/synth_outputs/<job_id>/` and publicly served at `/static/synthetic/<job_id>/...`.
86
+
87
+ ## Fonts
88
+
89
+ - Generator uses fonts from `content/static/`.
90
+ - Default: `NotoSansOriya_Condensed-Regular.ttf` (configurable). Ensure the directory exists.
91
+
92
+ ## Effects & Styles
93
+
94
+ - Paper styles: lined paper, old paper, birch, parchment
95
+ - Effects: rotation, brightness/contrast/noise/blur, fold/crease, ink bleed, perspective, shadows, morphological ops, scanner artifacts, lens distortion, washboard/cylinder warps
96
+
97
+ ## Notes
98
+
99
+ - The backend expects the Gemini API key to be provided per-request to `/api/ocr/process`. Do not hardcode keys server-side.
100
+ - For HuggingFace datasets, the backend uses `datasets` when possible, or downloads raw CSV URLs.
101
+ - You can browse generated outputs via the links returned by `/api/synthetic/generate`.
102
+
103
+ ## Deploy to Hugging Face Spaces (Docker)
104
+
105
+ This repo includes a multi-stage Dockerfile to deploy both backend and the built frontend as a single Space.
106
+
107
+ Steps:
108
+ - Create a new Space → Type: Docker
109
+ - Push this repository to the Space
110
+ - In Space Settings:
111
+ - Enable Persistent Storage
112
+ - (Optional) Add Secrets/Env Vars as needed, e.g., `DATA_DIR=/data` (default already) and `FRONTEND_DIST=/app/frontend_dist`
113
+ - The container exposes port `7860` by default.
114
+
115
+ What the image does:
116
+ - Builds the frontend (`frontend/`) and copies the `dist/` to `/app/frontend_dist`
117
+ - Installs backend dependencies and runs `uvicorn app.main:app` from `backend/`
118
+ - Serves:
119
+ - API at `/api/...`
120
+ - Uploaded images at `/images`
121
+ - Synthetic outputs at `/static/synthetic`
122
+ - Frontend SPA at `/` (served from `/app/frontend_dist`)
123
+
124
+
125
+ 1. **Paper Textures**: Realistic paper fiber patterns using Perlin noise
126
+ 2. **Aging Effects**: Edge darkening and aging patterns
127
+ 3. **Physical Damage**: Fold lines, creases, and ink bleeding
128
+ 4. **Scanner Artifacts**: Dust, compression artifacts, scanning lines
129
+ 5. **Geometric Distortions**: Perspective changes, cylindrical warping
130
+ 6. **Lighting Effects**: Shadows and lens distortions
131
+
132
+ ## Font Requirements
133
+
134
+ The generator requires appropriate fonts for text rendering. Default configuration expects:
135
+ - Font directory: `/content/static/`
136
+ - Font file: `NotoSansOriya_ExtraCondensed-Regular.ttf`
137
+
138
+ You can specify custom fonts using `--font-dir` and `--font` parameters.
139
+
140
+ ## Performance Tips
141
+
142
+ - Use `--max-samples` to limit processing for large datasets
143
+ - Disable advanced effects with `--no-advanced-effects` for faster generation
144
+ - Use multiprocessing with `--use-multiprocessing` for batch jobs
145
+ - Adjust image dimensions to balance quality and speed
146
+
147
+ ## Error Handling
148
+
149
+ The package includes comprehensive error handling:
150
+ - Graceful fallbacks for missing dependencies
151
+ - Detailed logging for debugging
152
+ - Validation of input parameters
153
+ - Safe handling of malformed datasets
154
+
155
+ ## Contributing
156
+
157
+ The modular structure makes it easy to extend:
158
+ - Add new effects in `effects.py`
159
+ - Implement new background styles in `backgrounds.py`
160
+ - Create custom transformations in `transformations.py`
161
+ - Extend dataset processing in `huggingface_processor.py`
162
+
163
+ ## License
164
+
165
+ [Add your license information here]
166
+
167
+ ---
168
+
169
+ **Note**: This is a complete rewrite of the original monolithic code into a modular, extensible package with added HuggingFace dataset processing capabilities.
backend/app/__init__.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ # Makes 'app' a package
2
+
backend/app/__pycache__/__init__.cpython-313.pyc ADDED
Binary file (160 Bytes). View file
 
backend/app/api/__init__.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ # api package
2
+
backend/app/api/__pycache__/__init__.cpython-313.pyc ADDED
Binary file (164 Bytes). View file
 
backend/app/api/routers/__init__.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ # routers package
2
+
backend/app/api/routers/__pycache__/__init__.cpython-313.pyc ADDED
Binary file (172 Bytes). View file
 
backend/app/api/routers/__pycache__/ocr.cpython-313.pyc ADDED
Binary file (9.04 kB). View file
 
backend/app/api/routers/__pycache__/synthetic.cpython-313.pyc ADDED
Binary file (7.57 kB). View file
 
backend/app/api/routers/ocr.py ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, UploadFile, File, Form, HTTPException
2
+ from fastapi.responses import FileResponse
3
+ from typing import List, Dict
4
+ import os
5
+ import shutil
6
+ import re
7
+ import unicodedata
8
+
9
+ from ...services.annotations import (
10
+ load_annotations_from_csv,
11
+ save_annotations_to_csv,
12
+ save_annotations,
13
+ )
14
+ from ...services.ocr_processor import batch_run_ocr
15
+
16
+
17
+ router = APIRouter(prefix="/api/ocr", tags=["ocr"])
18
+
19
+
20
+ SUPPORTED_IMAGE_TYPES = {"jpg", "jpeg", "png", "bmp", "webp", "tiff"}
21
+
22
+
23
+ BASE_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
24
+ DEFAULT_DATA_DIR = os.path.join(BASE_DIR, "data")
25
+ DATA_DIR = os.getenv("DATA_DIR", DEFAULT_DATA_DIR)
26
+ UPLOAD_DIR = os.path.join(DATA_DIR, "uploaded_images")
27
+ ANNOTATIONS_DIR = os.path.join(DATA_DIR, "annotations")
28
+ ANNOTATION_CSV_PATH = os.path.join(ANNOTATIONS_DIR, "annotations.csv")
29
+
30
+ os.makedirs(UPLOAD_DIR, exist_ok=True)
31
+ os.makedirs(ANNOTATIONS_DIR, exist_ok=True)
32
+
33
+ # Ensure CSV exists
34
+ if not os.path.exists(ANNOTATION_CSV_PATH):
35
+ with open(ANNOTATION_CSV_PATH, 'w', encoding='utf-8-sig') as f:
36
+ f.write('image_filename,extracted_text,validated_text\n')
37
+
38
+
39
+ @router.post("/upload")
40
+ async def upload_images(files: List[UploadFile] = File(...)):
41
+ image_names: List[str] = []
42
+ for file in files:
43
+ ext = file.filename.split('.')[-1].lower()
44
+ if ext not in SUPPORTED_IMAGE_TYPES:
45
+ continue
46
+
47
+ original_name = os.path.basename(file.filename)
48
+ safe_name = sanitize_filename(original_name)
49
+ path = os.path.join(UPLOAD_DIR, safe_name)
50
+
51
+ # Avoid collisions
52
+ counter = 1
53
+ base, ext_with_dot = os.path.splitext(safe_name)
54
+ while os.path.exists(path):
55
+ candidate = f"{base}_{counter}{ext_with_dot}"
56
+ path = os.path.join(UPLOAD_DIR, candidate)
57
+ safe_name = candidate
58
+ counter += 1
59
+ print("Saving upload to:", path)
60
+ with open(path, "wb") as f:
61
+ f.write(await file.read())
62
+ image_names.append(safe_name)
63
+ return {"status": "success", "images": image_names}
64
+
65
+
66
+ @router.post("/process")
67
+ def process_ocr(request: Dict[str, object]):
68
+ api_key = str(request.get("api_key", ""))
69
+ image_filenames = list(request.get("image_filenames", []))
70
+ if not api_key:
71
+ raise HTTPException(status_code=400, detail="api_key is required")
72
+ if not image_filenames:
73
+ raise HTTPException(status_code=400, detail="image_filenames is required")
74
+ results = batch_run_ocr(image_filenames, UPLOAD_DIR, api_key)
75
+ return results
76
+
77
+
78
+ @router.get("/annotations")
79
+ def get_annotations():
80
+ try:
81
+ annotations, valid_images, missing_images = load_annotations_from_csv(ANNOTATION_CSV_PATH, UPLOAD_DIR)
82
+ return {
83
+ "annotations": annotations,
84
+ "valid_images": valid_images,
85
+ "missing_images": missing_images
86
+ }
87
+ except Exception as e:
88
+ raise HTTPException(status_code=500, detail=str(e))
89
+
90
+
91
+ @router.post("/save")
92
+ def save_annotated(data: dict):
93
+ try:
94
+ save_annotations(ANNOTATION_CSV_PATH, data)
95
+ return {"status": "saved"}
96
+ except Exception as e:
97
+ raise HTTPException(status_code=500, detail=str(e))
98
+
99
+
100
+ @router.post("/import")
101
+ async def import_csv(file: UploadFile = File(...), image_folder: str = Form("uploaded_images")):
102
+ temp_dir = os.path.join(DATA_DIR, "temp")
103
+ os.makedirs(temp_dir, exist_ok=True)
104
+ temp_path = os.path.join(temp_dir, os.path.basename(file.filename))
105
+
106
+ with open(temp_path, "wb") as buffer:
107
+ shutil.copyfileobj(file.file, buffer)
108
+
109
+ try:
110
+ # If relative folder, resolve within DATA_DIR
111
+ folder = image_folder
112
+ if not os.path.isabs(folder):
113
+ folder = os.path.join(DATA_DIR, folder)
114
+ annotations, valid_images, missing_images = load_annotations_from_csv(temp_path, folder)
115
+ return {
116
+ "annotations": annotations,
117
+ "valid_images": valid_images,
118
+ "missing_images": missing_images
119
+ }
120
+ except Exception as e:
121
+ raise HTTPException(status_code=500, detail=str(e))
122
+
123
+
124
+ @router.post("/export")
125
+ async def export_csv(request: Dict[str, dict]):
126
+ try:
127
+ annotations = request.get("annotations", {})
128
+ validated_texts = request.get("validated_texts", {})
129
+
130
+ combined_data: Dict[str, Dict[str, str]] = {}
131
+ for image_name in annotations.keys():
132
+ combined_data[image_name] = {
133
+ "extracted_text": annotations[image_name],
134
+ "validated_text": validated_texts.get(image_name, "")
135
+ }
136
+
137
+ save_annotations_to_csv(ANNOTATION_CSV_PATH, combined_data)
138
+ return FileResponse(
139
+ ANNOTATION_CSV_PATH,
140
+ media_type='text/csv',
141
+ filename='annotations.csv'
142
+ )
143
+ except Exception as e:
144
+ raise HTTPException(status_code=500, detail=str(e))
145
+
146
+ def sanitize_filename(name: str) -> str:
147
+ """Normalize and sanitize a filename for safe cross-platform serving.
148
+
149
+ - Unicode normalize (NFKC)
150
+ - Replace whitespace with underscore
151
+ - Remove characters not in [A-Za-z0-9._-]
152
+ - Collapse multiple underscores
153
+ """
154
+ # Normalize
155
+ name = unicodedata.normalize("NFKC", name)
156
+ base, ext = os.path.splitext(name)
157
+ base = re.sub(r"\s+", "_", base)
158
+ base = re.sub(r"[^A-Za-z0-9._-]", "_", base)
159
+ base = re.sub(r"_+", "_", base).strip("._")
160
+ if not base:
161
+ base = "file"
162
+ ext = "." + ext.lstrip(".").lower()
163
+ ext = re.sub(r"[^A-Za-z0-9]", "", ext)
164
+ ext = f".{ext}" if ext else ""
165
+ return base + ext
166
+ # Legacy routes removed after frontend migration to /api/ocr
backend/app/api/routers/synthetic.py ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, HTTPException, Query
2
+ from typing import Dict, Optional
3
+ import os
4
+ import uuid
5
+
6
+ from ...services.synthetic.config import ENHANCED_DEFAULT_PARAMS
7
+ from ...services.synthetic.core import (
8
+ generate_enhanced_sanskrit_samples,
9
+ generate_comprehensive_dataset,
10
+ generate_ultra_realistic_samples,
11
+ )
12
+ from ...services.synthetic.huggingface_processor import HuggingFaceDatasetProcessor
13
+
14
+
15
+ router = APIRouter(prefix="/api/synthetic", tags=["synthetic"])
16
+
17
+
18
+ BASE_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
19
+ DEFAULT_DATA_DIR = os.path.join(BASE_DIR, "data")
20
+ DATA_DIR = os.getenv("DATA_DIR", DEFAULT_DATA_DIR)
21
+ SYN_OUT_DIR = os.path.join(DATA_DIR, "synth_outputs")
22
+ FONTS_DIR = os.path.abspath(os.path.join(BASE_DIR, os.pardir, os.pardir, "content", "static"))
23
+
24
+ os.makedirs(SYN_OUT_DIR, exist_ok=True)
25
+
26
+
27
+ def normalized_params(incoming: Optional[Dict]) -> Dict:
28
+ params = ENHANCED_DEFAULT_PARAMS.copy()
29
+ if incoming:
30
+ # convert hyphen keys to underscore if any
31
+ normalized = {k.replace('-', '_'): v for k, v in incoming.items()}
32
+ params.update(normalized)
33
+ # Force font_dir to repo fonts absolute path for backend execution
34
+ params['font_dir'] = FONTS_DIR
35
+ return params
36
+
37
+
38
+ def _list_image_files(base_dir: str, job_id: str) -> Dict:
39
+ """Return relative URLs for images under the job output directory."""
40
+ rels = []
41
+ job_dir = os.path.join(base_dir, job_id)
42
+ if not os.path.isdir(job_dir):
43
+ return {"files": [], "count": 0}
44
+ for root, _, files in os.walk(job_dir):
45
+ for fn in files:
46
+ if fn.lower().endswith((".png", ".jpg", ".jpeg", ".webp")):
47
+ abs_path = os.path.join(root, fn)
48
+ rel_path = os.path.relpath(abs_path, base_dir)
49
+ rels.append(f"/static/synthetic/{rel_path.replace(os.sep, '/')}")
50
+ rels.sort()
51
+ return {"files": rels, "count": len(rels)}
52
+
53
+
54
+ @router.post("/generate")
55
+ def generate(request: Dict[str, object]):
56
+ """
57
+ Universal generation endpoint.
58
+ Body:
59
+ {
60
+ mode: 'single' | 'comprehensive' | 'ultra-realistic' | 'huggingface',
61
+ text?: string (for non-HF modes),
62
+ output_subdir?: string,
63
+ params?: object,
64
+ text_column?: string (HF),
65
+ max_samples?: int (HF),
66
+ dataset_url?: string (HF) OR csv_file?: string (server-side path)
67
+ }
68
+ Returns paths relative to /static/synthetic when applicable.
69
+ """
70
+ mode = str(request.get("mode", "single"))
71
+ text = str(request.get("text", "")).strip() or "କବି ସମ୍ରାଟ ଉପେନ୍ଦ୍ର ଭଞ୍ଜ ..."
72
+ output_subdir = str(request.get("output_subdir", ""))
73
+ params = normalized_params(request.get("params"))
74
+
75
+ # Resolve output dir under synth_outputs
76
+ job_id = output_subdir or str(uuid.uuid4())
77
+ out_dir = os.path.join(SYN_OUT_DIR, job_id)
78
+ os.makedirs(out_dir, exist_ok=True)
79
+
80
+ try:
81
+ if mode == 'single':
82
+ generate_enhanced_sanskrit_samples(
83
+ text=text,
84
+ font_path=os.path.join(params['font_dir'], params['font']),
85
+ output_dir=out_dir,
86
+ params=params,
87
+ )
88
+ listing = _list_image_files(SYN_OUT_DIR, job_id)
89
+ return {"status": "ok", "output_dir": f"/static/synthetic/{job_id}", **listing}
90
+
91
+ elif mode == 'comprehensive':
92
+ generate_comprehensive_dataset(
93
+ text=text,
94
+ output_dir=out_dir,
95
+ params=params,
96
+ )
97
+ listing = _list_image_files(SYN_OUT_DIR, job_id)
98
+ return {"status": "ok", "output_dir": f"/static/synthetic/{job_id}", **listing}
99
+
100
+ elif mode == 'ultra-realistic':
101
+ generate_ultra_realistic_samples(
102
+ text=text,
103
+ output_dir=out_dir,
104
+ style_focus=request.get("style_focus"),
105
+ params=params,
106
+ )
107
+ listing = _list_image_files(SYN_OUT_DIR, job_id)
108
+ return {"status": "ok", "output_dir": f"/static/synthetic/{job_id}", **listing}
109
+
110
+ elif mode == 'huggingface':
111
+ text_column = str(request.get("text_column", "text"))
112
+ max_samples = request.get("max_samples")
113
+ dataset_url = request.get("dataset_url")
114
+ csv_file = request.get("csv_file")
115
+
116
+ processor = HuggingFaceDatasetProcessor(output_dir=out_dir, params=params)
117
+
118
+ if csv_file:
119
+ ok = processor.process_local_csv(csv_path=csv_file, text_column=text_column, max_samples=max_samples)
120
+ elif dataset_url:
121
+ ok = processor.process_huggingface_dataset(dataset_identifier=dataset_url, text_column=text_column, max_samples=max_samples)
122
+ else:
123
+ raise HTTPException(status_code=400, detail="Provide dataset_url or csv_file for huggingface mode")
124
+
125
+ if not ok:
126
+ raise HTTPException(status_code=500, detail="HuggingFace processing failed")
127
+
128
+ listing = _list_image_files(SYN_OUT_DIR, job_id)
129
+ return {
130
+ "status": "ok",
131
+ "output_dir": f"/static/synthetic/{job_id}",
132
+ "csv": f"/static/synthetic/{job_id}/dataset.csv",
133
+ "images_dir": f"/static/synthetic/{job_id}/images",
134
+ **listing,
135
+ }
136
+
137
+ else:
138
+ raise HTTPException(status_code=400, detail=f"Unknown mode: {mode}")
139
+
140
+ except HTTPException:
141
+ raise
142
+ except Exception as e:
143
+ raise HTTPException(status_code=500, detail=str(e))
144
+
145
+
146
+ @router.get("/list")
147
+ def list_outputs(job_id: str = Query(..., description="Job ID (output_subdir) to list")):
148
+ listing = _list_image_files(SYN_OUT_DIR, job_id)
149
+ if listing["count"] == 0:
150
+ raise HTTPException(status_code=404, detail="Job not found or contains no images")
151
+ return {"status": "ok", "job_id": job_id, **listing}
backend/app/data/.gitkeep ADDED
File without changes
backend/app/data/annotations/annotations.csv ADDED
@@ -0,0 +1 @@
 
 
1
+ image_filename,extracted_text,validated_text
backend/app/data/synth_outputs/6dba8cf5-8e28-40e6-bf6c-5f1a5f62155c/enhanced_sanskrit_parchment_1.png ADDED

Git LFS Details

  • SHA256: a035d2bf9cdc5c00b59f381116d7a6cfb49d2d6866997e45e24a00d6fc8e8464
  • Pointer size: 131 Bytes
  • Size of remote file: 179 kB
backend/app/data/synth_outputs/6dba8cf5-8e28-40e6-bf6c-5f1a5f62155c/enhanced_sanskrit_parchment_1_perspective.png ADDED

Git LFS Details

  • SHA256: 7eb8257bdbba599e3640c5fd55952ec9dc60aa73ade391a3aea82a9e8cf95449
  • Pointer size: 131 Bytes
  • Size of remote file: 144 kB
backend/app/data/synth_outputs/81aefb9a-aaaa-4d5d-b838-48ce5b586547/enhanced_sanskrit_birch_1.png ADDED

Git LFS Details

  • SHA256: c6a3c50581280a6e9c7af777a8ec46352d1422a6891a149a4e4e57aa5ad5e568
  • Pointer size: 131 Bytes
  • Size of remote file: 200 kB
backend/app/data/synth_outputs/81aefb9a-aaaa-4d5d-b838-48ce5b586547/enhanced_sanskrit_birch_1_blur.png ADDED

Git LFS Details

  • SHA256: 9329d31ebaa534bd7dcdeb986ab20c507b2442a73110a6c99bfb623a9348392e
  • Pointer size: 131 Bytes
  • Size of remote file: 171 kB
backend/app/data/synth_outputs/81aefb9a-aaaa-4d5d-b838-48ce5b586547/enhanced_sanskrit_birch_1_brightness.png ADDED

Git LFS Details

  • SHA256: b758b275c50bb50c63122c4c6929c2cefed2a9011903dfab35568775cfc71d51
  • Pointer size: 131 Bytes
  • Size of remote file: 192 kB
backend/app/data/synth_outputs/81aefb9a-aaaa-4d5d-b838-48ce5b586547/enhanced_sanskrit_birch_1_combined.png ADDED

Git LFS Details

  • SHA256: 7fe03fcb23bf1c7bd7f2f200d44c30a3d65cac833806c384b357bd19d7b1bd6d
  • Pointer size: 131 Bytes
  • Size of remote file: 125 kB
backend/app/data/synth_outputs/81aefb9a-aaaa-4d5d-b838-48ce5b586547/enhanced_sanskrit_birch_1_rotate.png ADDED

Git LFS Details

  • SHA256: 8e1667b418dabd37488e45b08cc6d3794382c044f1b81cbf2a0e3128b21f52ef
  • Pointer size: 131 Bytes
  • Size of remote file: 189 kB
backend/app/main.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from fastapi.middleware.cors import CORSMiddleware
3
+ from fastapi.staticfiles import StaticFiles
4
+ import os
5
+
6
+ """Main FastAPI application (serves API and static frontend).
7
+
8
+ Note: We delay importing routers until after DATA_DIR is finalized to avoid
9
+ permission issues when creating directories on certain platforms (e.g., Spaces).
10
+ """
11
+
12
+
13
+ app = FastAPI(title="Unified Backend: OCR + Synthetic")
14
+
15
+ # CORS (dev-friendly; tighten for prod)
16
+ app.add_middleware(
17
+ CORSMiddleware,
18
+ allow_origins=["*"],
19
+ allow_credentials=True,
20
+ allow_methods=["*"],
21
+ allow_headers=["*"],
22
+ )
23
+
24
+ # Storage paths (can be overridden by env, e.g., DATA_DIR=/data in Docker)
25
+ BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
26
+ DEFAULT_DATA_DIR = os.path.join(BASE_DIR, "data")
27
+ DATA_DIR = os.getenv("DATA_DIR", DEFAULT_DATA_DIR)
28
+ try:
29
+ os.makedirs(DATA_DIR, exist_ok=True)
30
+ except Exception:
31
+ # Fallback to non-persistent tmp if permission denied
32
+ DATA_DIR = "/tmp/data"
33
+ os.environ["DATA_DIR"] = DATA_DIR
34
+ os.makedirs(DATA_DIR, exist_ok=True)
35
+
36
+ UPLOAD_DIR = os.path.join(DATA_DIR, "uploaded_images")
37
+ SYN_OUT_DIR = os.path.join(DATA_DIR, "synth_outputs")
38
+ os.makedirs(UPLOAD_DIR, exist_ok=True)
39
+ os.makedirs(SYN_OUT_DIR, exist_ok=True)
40
+
41
+ # Ensure child modules see the final DATA_DIR
42
+ os.environ["DATA_DIR"] = DATA_DIR
43
+
44
+ # Routers (import after DATA_DIR is finalized so they read correct env)
45
+ from .api.routers.ocr import router as ocr_router
46
+ from .api.routers.synthetic import router as synthetic_router
47
+ app.include_router(ocr_router)
48
+ app.include_router(synthetic_router)
49
+
50
+ # Static mounts for data
51
+ app.mount("/images", StaticFiles(directory=UPLOAD_DIR), name="images")
52
+ app.mount("/static/synthetic", StaticFiles(directory=SYN_OUT_DIR), name="synthetic")
53
+
54
+ # Serve compiled frontend (if provided via env FRONTEND_DIST)
55
+ FRONTEND_DIST = os.getenv("FRONTEND_DIST")
56
+ if FRONTEND_DIST and os.path.isdir(FRONTEND_DIST):
57
+ app.mount("/", StaticFiles(directory=FRONTEND_DIST, html=True), name="frontend")
58
+
59
+
60
+ @app.get("/")
61
+ def root():
62
+ return {"message": "Unified backend is running", "routes": ["/api/ocr", "/api/synthetic"]}
63
+
64
+
65
+ # Legacy compatibility (optional):
66
+ # If you want to keep old OCR paths working without frontend changes,
67
+ # you can import and map handlers or create thin wrappers here.
68
+ # For now, keep frontend updates in a later step.
backend/app/services/__pycache__/annotations.cpython-313.pyc ADDED
Binary file (6.53 kB). View file
 
backend/app/services/__pycache__/ocr_processor.cpython-313.pyc ADDED
Binary file (4.98 kB). View file
 
backend/app/services/annotations.py ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import pandas as pd
4
+ import numpy as np
5
+ from typing import Tuple, Dict, List
6
+
7
+
8
+ class CustomJSONEncoder(json.JSONEncoder):
9
+ def default(self, obj):
10
+ if isinstance(obj, (np.float32, np.float64)):
11
+ return float(obj)
12
+ if isinstance(obj, (np.int32, np.int64)):
13
+ return int(obj)
14
+ return super().default(obj)
15
+
16
+
17
+ def load_annotations(path: str) -> Dict:
18
+ if not os.path.exists(path):
19
+ return {}
20
+ with open(path, "r", encoding="utf-8") as f:
21
+ return json.load(f)
22
+
23
+
24
+ def save_annotations(path: str, data: Dict):
25
+ """Save annotations to both JSON and CSV formats, merging with existing."""
26
+ # Load existing (prefer JSON, else CSV)
27
+ json_path = path.replace('.csv', '.json')
28
+ existing: Dict[str, Dict[str, str]] = {}
29
+
30
+ if os.path.exists(json_path):
31
+ try:
32
+ with open(json_path, "r", encoding="utf-8") as f:
33
+ existing = json.load(f)
34
+ except Exception:
35
+ existing = {}
36
+ elif os.path.exists(path):
37
+ try:
38
+ existing = read_annotations_from_csv(path)
39
+ except Exception:
40
+ existing = {}
41
+
42
+ # Merge incoming data
43
+ merged: Dict[str, Dict[str, str]] = dict(existing)
44
+ for filename, ann in data.items():
45
+ if filename not in merged:
46
+ merged[filename] = {"extracted_text": "", "validated_text": ""}
47
+ if isinstance(ann, dict):
48
+ if "extracted_text" in ann:
49
+ merged[filename]["extracted_text"] = str(ann.get("extracted_text", ""))
50
+ if "validated_text" in ann:
51
+ merged[filename]["validated_text"] = str(ann.get("validated_text", ""))
52
+ else:
53
+ merged[filename]["validated_text"] = str(ann)
54
+
55
+ # Save JSON (full merged)
56
+ with open(json_path, "w", encoding="utf-8") as f:
57
+ json.dump(merged, f, ensure_ascii=False, indent=2, cls=CustomJSONEncoder)
58
+
59
+ # Save CSV (full merged)
60
+ save_annotations_to_csv(path, merged)
61
+
62
+
63
+ def load_annotations_from_csv(csv_file: str, image_folder: str) -> Tuple[Dict, List[str], List[str]]:
64
+ if not os.path.exists(csv_file):
65
+ return {}, [], []
66
+
67
+ df = pd.read_csv(csv_file, encoding='utf-8-sig')
68
+
69
+ if 'image_filename' not in df.columns:
70
+ raise ValueError("CSV must contain 'image_filename' column.")
71
+
72
+ annotations: Dict[str, Dict[str, str]] = {}
73
+ valid_images: List[str] = []
74
+ missing_images: List[str] = []
75
+
76
+ for _, row in df.iterrows():
77
+ filename = row['image_filename']
78
+ image_path = os.path.join(image_folder, filename)
79
+ if os.path.exists(image_path):
80
+ annotations[filename] = {
81
+ 'extracted_text': str(row.get('extracted_text', '')),
82
+ 'validated_text': str(row.get('validated_text', row.get('extracted_text', '')))
83
+ }
84
+ valid_images.append(filename)
85
+ else:
86
+ missing_images.append(filename)
87
+
88
+ return annotations, valid_images, missing_images
89
+
90
+
91
+ def read_annotations_from_csv(csv_file: str) -> Dict[str, Dict[str, str]]:
92
+ """Read existing CSV into a filename->annotation dict."""
93
+ df = pd.read_csv(csv_file, encoding='utf-8-sig')
94
+ existing: Dict[str, Dict[str, str]] = {}
95
+ for _, row in df.iterrows():
96
+ filename = str(row.get('image_filename', '')).strip()
97
+ if not filename:
98
+ continue
99
+ existing[filename] = {
100
+ 'extracted_text': str(row.get('extracted_text', '')),
101
+ 'validated_text': str(row.get('validated_text', ''))
102
+ }
103
+ return existing
104
+
105
+
106
+ def save_annotations_to_csv(csv_file: str, annotations: Dict[str, Dict[str, str]]):
107
+ rows = [
108
+ {
109
+ 'image_filename': filename,
110
+ 'extracted_text': str(ann.get('extracted_text', '')),
111
+ 'validated_text': str(ann.get('validated_text', ''))
112
+ }
113
+ for filename, ann in annotations.items()
114
+ ]
115
+ df = pd.DataFrame(rows)
116
+ os.makedirs(os.path.dirname(csv_file), exist_ok=True)
117
+ df.to_csv(csv_file, index=False, encoding='utf-8-sig')
backend/app/services/ocr_processor.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Unified backend OCR processor using Google Gemini
2
+
3
+ import os
4
+ import base64
5
+ import logging
6
+ import time
7
+ from typing import List, Dict, Optional
8
+
9
+ import google.generativeai as genai
10
+
11
+ logging.basicConfig(
12
+ level=logging.INFO,
13
+ format='[%(asctime)s] %(levelname)s - %(message)s',
14
+ datefmt='%Y-%m-%d %H:%M:%S'
15
+ )
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ SUPPORTED_IMAGE_TYPES = {"jpg", "jpeg", "png", "bmp", "webp", "tiff"}
20
+
21
+
22
+ def encode_image_to_base64(image_path: str) -> Optional[str]:
23
+ if not os.path.exists(image_path):
24
+ logger.error(f"Image not found: {image_path}")
25
+ return None
26
+ try:
27
+ with open(image_path, "rb") as img_file:
28
+ return base64.b64encode(img_file.read()).decode("utf-8")
29
+ except Exception as e:
30
+ logger.error(f"Failed to read or encode image {image_path}: {e}")
31
+ return None
32
+
33
+
34
+ def get_mime_type(image_path: str) -> Optional[str]:
35
+ ext = image_path.split(".")[-1].lower()
36
+ if ext in SUPPORTED_IMAGE_TYPES:
37
+ return f"image/{'jpeg' if ext == 'jpg' else ext}"
38
+ logger.warning(f"Unsupported image format: {ext}")
39
+ return None
40
+
41
+
42
+ def run_gemini_ocr(image_path: str, api_key: str, max_retries: int = 3) -> str:
43
+ genai.configure(api_key=api_key)
44
+ model = genai.GenerativeModel("gemini-1.5-flash")
45
+
46
+ base64_image = encode_image_to_base64(image_path)
47
+ mime_type = get_mime_type(image_path)
48
+
49
+ if base64_image is None or mime_type is None:
50
+ return "[Image could not be processed]"
51
+
52
+ prompt = (
53
+ "Extract all visible Odia (ଓଡ଼ିଆ) text from the image accurately.\n"
54
+ "Only output the Odia text content. Do not explain or translate anything.\n"
55
+ "If no Odia text is found, return '[No Odia text found]'."
56
+ )
57
+
58
+ for attempt in range(max_retries):
59
+ try:
60
+ response = model.generate_content(
61
+ [
62
+ prompt,
63
+ {
64
+ "mime_type": mime_type,
65
+ "data": base64_image
66
+ }
67
+ ],
68
+ generation_config={
69
+ "temperature": 0.2,
70
+ "max_output_tokens": 2048,
71
+ "top_p": 0.8,
72
+ "top_k": 40
73
+ }
74
+ )
75
+
76
+ text = response.text.strip() if response.text else "[No text extracted]"
77
+ logger.info(f"OCR complete for {os.path.basename(image_path)}")
78
+ return text
79
+
80
+ except Exception as e:
81
+ logger.error(f"OCR attempt {attempt + 1} failed for {image_path}: {e}")
82
+ if attempt == max_retries - 1:
83
+ return f"[OCR failed after {max_retries} attempts: {str(e)}]"
84
+ time.sleep(1)
85
+
86
+
87
+ def batch_run_ocr(image_filenames: List[str], image_folder: str, api_key: str) -> Dict[str, str]:
88
+ results: Dict[str, str] = {}
89
+ logger.info(f"Starting batch OCR on {len(image_filenames)} images.")
90
+
91
+ for filename in image_filenames:
92
+ image_path = os.path.join(image_folder, filename)
93
+ if not os.path.exists(image_path):
94
+ logger.error(f"Image not found: {image_path}")
95
+ results[filename] = "[Image file not found]"
96
+ continue
97
+
98
+ results[filename] = run_gemini_ocr(image_path, api_key)
99
+
100
+ logger.info("Batch OCR complete.")
101
+ return results
102
+
backend/app/services/synthetic/__init__.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Synthetic Text Generator Package (moved under unified backend services)
3
+ """
4
+
5
+ __version__ = "1.0.0"
6
+
7
+ from .config import ENHANCED_DEFAULT_PARAMS
8
+ from .core import (
9
+ generate_enhanced_sanskrit_samples,
10
+ generate_comprehensive_dataset,
11
+ generate_ultra_realistic_samples,
12
+ )
13
+ from .huggingface_processor import HuggingFaceDatasetProcessor
14
+
15
+ __all__ = [
16
+ "ENHANCED_DEFAULT_PARAMS",
17
+ "generate_enhanced_sanskrit_samples",
18
+ "generate_comprehensive_dataset",
19
+ "generate_ultra_realistic_samples",
20
+ "HuggingFaceDatasetProcessor",
21
+ ]
22
+
backend/app/services/synthetic/__pycache__/__init__.cpython-313.pyc ADDED
Binary file (642 Bytes). View file
 
backend/app/services/synthetic/__pycache__/backgrounds.cpython-313.pyc ADDED
Binary file (11.1 kB). View file
 
backend/app/services/synthetic/__pycache__/config.cpython-313.pyc ADDED
Binary file (1.93 kB). View file
 
backend/app/services/synthetic/__pycache__/core.cpython-313.pyc ADDED
Binary file (7.33 kB). View file
 
backend/app/services/synthetic/__pycache__/effects.cpython-313.pyc ADDED
Binary file (17.5 kB). View file
 
backend/app/services/synthetic/__pycache__/huggingface_processor.cpython-313.pyc ADDED
Binary file (12.6 kB). View file
 
backend/app/services/synthetic/__pycache__/text_renderer.cpython-313.pyc ADDED
Binary file (5.57 kB). View file
 
backend/app/services/synthetic/__pycache__/transformations.cpython-313.pyc ADDED
Binary file (25 kB). View file
 
backend/app/services/synthetic/backgrounds.py ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Background generation module for creating realistic paper textures and backgrounds
3
+ """
4
+
5
+ import os
6
+ import random
7
+ import numpy as np
8
+ from PIL import Image
9
+ from typing import Dict
10
+ import logging
11
+ from .effects import AdvancedImageEffects
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ def create_enhanced_background(width: int, height: int, style: str, params: Dict) -> Image.Image:
17
+ if params.get("image_dir") and os.path.exists(params["image_dir"]):
18
+ image_files = [f for f in os.listdir(params["image_dir"]) if f.lower().endswith((".png", ".jpg", ".jpeg"))]
19
+ if image_files:
20
+ img_path = os.path.join(params["image_dir"], random.choice(image_files))
21
+ try:
22
+ bg_img = Image.open(img_path).convert('RGB')
23
+ bg_img = bg_img.resize((width, height), Image.LANCZOS)
24
+ return bg_img
25
+ except Exception as e:
26
+ logger.error(f"Error loading background image {img_path}: {e}")
27
+
28
+ if params.get('fiber_density', 0) > 0:
29
+ fiber_texture = AdvancedImageEffects.simulate_paper_fiber_texture(width, height, params['fiber_density'])
30
+ else:
31
+ fiber_texture = np.zeros((height, width, 3), dtype=np.uint8)
32
+
33
+ if style == "lined_paper":
34
+ background = np.ones((height, width, 3), dtype=np.uint8) * [210, 180, 140]
35
+ background = np.clip(background.astype(np.float32) - fiber_texture, 0, 255).astype(np.uint8)
36
+ line_spacing = random.randint(15, 25)
37
+ for y in range(0, height, line_spacing):
38
+ line_width = random.randint(1, 2)
39
+ darkness = random.randint(6, 20) * params["texture"]
40
+ if y + line_width < height:
41
+ background[y:y+line_width, :, :] = np.clip(background[y:y+line_width, :, :] - darkness, 0, 255)
42
+ noise = np.random.randint(0, int(15 * params["noise"]), (height, width, 3), dtype=np.uint8)
43
+ background = np.clip(background - noise, 0, 255).astype(np.uint8)
44
+ stain_count = int(random.randint(2, 4) * params["stains"])
45
+ for _ in range(stain_count):
46
+ x = random.randint(0, width-100)
47
+ y = random.randint(0, height-100)
48
+ size = random.randint(20, 60)
49
+ darkness = random.randint(8, 25) * params["stain_intensity"]
50
+ stain_mask = np.zeros((size, size), dtype=np.float32)
51
+ center = size // 2
52
+ for i in range(size):
53
+ for j in range(size):
54
+ dist = np.sqrt((i - center)**2 + (j - center)**2)
55
+ if dist < center:
56
+ stain_mask[i, j] = (1 - dist / center) * np.random.uniform(0.4, 1.0)
57
+ end_y = min(y + size, height)
58
+ end_x = min(x + size, width)
59
+ actual_size_y = end_y - y
60
+ actual_size_x = end_x - x
61
+ if actual_size_y > 0 and actual_size_x > 0:
62
+ stain_region = stain_mask[:actual_size_y, :actual_size_x]
63
+ for c in range(3):
64
+ background[y:end_y, x:end_x, c] = np.clip(
65
+ background[y:end_y, x:end_x, c] - darkness * stain_region * params["stain_intensity"], 0, 255
66
+ )
67
+
68
+ elif style == "old_paper":
69
+ background = np.ones((height, width, 3), dtype=np.uint8) * [236, 222, 181]
70
+ background = np.clip(background.astype(np.float32) - fiber_texture, 0, 255).astype(np.uint8)
71
+ noise = np.random.randint(0, int(12 * params["noise"]), (height, width, 3), dtype=np.uint8)
72
+ background = np.clip(background - noise, 0, 255).astype(np.uint8)
73
+ edge_width = width // 10
74
+ for i in range(edge_width):
75
+ factor = (edge_width - i) / edge_width * 15 * params["aging"]
76
+ aging_noise = np.random.uniform(0.5, 1.5, (height, width))
77
+ if i < height:
78
+ background[i, :, 2] = np.clip(background[i, :, 2] - factor * aging_noise[i, :], 0, 255)
79
+ if height - i - 1 >= 0:
80
+ background[height-i-1, :, 2] = np.clip(background[height-i-1, :, 2] - factor * aging_noise[height-i-1, :], 0, 255)
81
+ if i < width:
82
+ background[:, i, 2] = np.clip(background[:, i, 2] - factor * aging_noise[:, i], 0, 255)
83
+ if width - i - 1 >= 0:
84
+ background[:, width-i-1, 2] = np.clip(background[:, width-i-1, 2] - factor * aging_noise[:, width-i-1], 0, 255)
85
+
86
+ elif style == "birch":
87
+ background = np.ones((height, width, 3), dtype=np.uint8) * [235, 225, 215]
88
+ background = np.clip(background.astype(np.float32) - fiber_texture, 0, 255).astype(np.uint8)
89
+ noise = np.random.randint(0, int(10 * params["noise"]), (height, width, 3), dtype=np.uint8)
90
+ background = np.clip(background - noise, 0, 255).astype(np.uint8)
91
+ variation_count = int(150 * params["texture"])
92
+ for _ in range(variation_count):
93
+ x = random.randint(0, width-1)
94
+ y = random.randint(0, height-1)
95
+ size = random.randint(10, 25)
96
+ variation = random.randint(-6, 6) * params["texture"]
97
+ for i in range(-size, size):
98
+ for j in range(-size, size):
99
+ dist = np.sqrt(i*i + j*j)
100
+ if dist <= size:
101
+ shape_factor = np.random.uniform(0.7, 1.3)
102
+ if dist <= size * shape_factor:
103
+ yi, xi = y + i, x + j
104
+ if 0 <= yi < height and 0 <= xi < width:
105
+ background[yi, xi, :] = np.clip(background[yi, xi, :] + variation, 0, 255)
106
+
107
+ else: # parchment
108
+ background = np.ones((height, width, 3), dtype=np.uint8) * [230, 215, 185]
109
+ background = np.clip(background.astype(np.float32) - fiber_texture, 0, 255).astype(np.uint8)
110
+ variation_count = int(400 * params["texture"])
111
+ for _ in range(variation_count):
112
+ x = random.randint(0, width-1)
113
+ y = random.randint(0, height-1)
114
+ size = random.randint(5, 12)
115
+ variation = random.randint(-7, 7) * params["texture"]
116
+ for i in range(-size, size):
117
+ for j in range(-size, size):
118
+ dist = np.sqrt(i*i + j*j)
119
+ if dist <= size:
120
+ grain_factor = 1 + 0.3 * np.sin(j * 0.5) * np.cos(i * 0.3)
121
+ if dist <= size * grain_factor:
122
+ yi, xi = y + i, x + j
123
+ if 0 <= yi < height and 0 <= xi < width:
124
+ background[yi, xi, :] = np.clip(background[yi, xi, :] + variation, 0, 255)
125
+ noise = np.random.randint(0, int(8 * params["noise"]), (height, width, 3), dtype=np.uint8)
126
+ background = np.clip(background - noise, 0, 255).astype(np.uint8)
127
+
128
+ return Image.fromarray(background)
129
+
backend/app/services/synthetic/config.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Configuration parameters for the Synthetic Text Generator
3
+ """
4
+
5
+ ENHANCED_DEFAULT_PARAMS = {
6
+ 'width': 400,
7
+ 'height': 320,
8
+ 'base_images': 1,
9
+
10
+ 'font_dir': './content/static',
11
+ 'font': 'NotoSansOriya_Condensed-Regular.ttf',
12
+
13
+ 'noise': 0.7,
14
+ 'aging': 0.6,
15
+ 'texture': 0.7,
16
+ 'stains': 0.6,
17
+ 'stain_intensity': 0.5,
18
+
19
+ 'word_position': 0.6,
20
+ 'ink_color': 0.5,
21
+ 'line_spacing': 0.4,
22
+ 'baseline': 0.3,
23
+ 'word_angle': 0.0,
24
+
25
+ 'apply_transforms': True,
26
+ 'all_transforms': False,
27
+ 'rotation_max': 5.0,
28
+ 'brightness_var': 0.2,
29
+ 'contrast_var': 0.2,
30
+ 'noise_min': 0.01,
31
+ 'noise_max': 0.05,
32
+ 'blur_min': 0.5,
33
+ 'blur_max': 1.0,
34
+
35
+ 'fold_intensity': 0.3,
36
+ 'bleed_intensity': 0.3,
37
+ 'bleed_radius': 3,
38
+ 'corner_displacement': 20,
39
+ 'morph_operation': 'mixed',
40
+ 'morph_kernel_size': 3,
41
+ 'aging_intensity': 0.5,
42
+ 'fiber_density': 0.5,
43
+ 'enable_advanced_effects': True,
44
+ 'advanced_effect_probability': 0.7,
45
+ 'shadow_angle': 45,
46
+ 'shadow_intensity': 0.4,
47
+ 'lens_distortion_strength': 0.2,
48
+ 'scanner_artifacts': True,
49
+ 'compression_quality': 85,
50
+ 'fold_probability': 0.4,
51
+ 'crease_probability': 0.3,
52
+ 'perspective_probability': 0.5,
53
+ 'shadow_probability': 0.6,
54
+
55
+ 'use_multiprocessing': False,
56
+ 'num_processes': 4,
57
+ 'enable_caching': True,
58
+ 'debug_mode': False,
59
+ 'image_dir': ''
60
+ }
61
+
backend/app/services/synthetic/core.py ADDED
@@ -0,0 +1,230 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Core module containing main generation functions
3
+ """
4
+
5
+ import os
6
+ import random
7
+ import logging
8
+ from typing import Dict, List, Optional
9
+ from PIL import Image
10
+ from .config import ENHANCED_DEFAULT_PARAMS
11
+ from .text_renderer import render_enhanced_sanskrit
12
+ from .transformations import (
13
+ apply_enhanced_postprocessing,
14
+ create_comprehensive_effect_combinations,
15
+ apply_systematic_postprocessing,
16
+ )
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+
21
+ def generate_enhanced_sanskrit_samples(
22
+ text: str,
23
+ font_path: str = None,
24
+ output_dir: str = None,
25
+ params: Dict = None,
26
+ ) -> Optional[List[Image.Image]]:
27
+ if params is None:
28
+ params = ENHANCED_DEFAULT_PARAMS.copy()
29
+ else:
30
+ params = {**ENHANCED_DEFAULT_PARAMS, **params}
31
+
32
+ if font_path is None:
33
+ font_path = os.path.join(params['font_dir'], params['font'])
34
+
35
+ if not os.path.exists(font_path):
36
+ logger.error(f"Font not found at {font_path}")
37
+ return [] if output_dir is None else None
38
+
39
+ styles = ["lined_paper", "old_paper", "birch", "parchment"]
40
+
41
+ ink_colors = {
42
+ "lined_paper": (60, 30, 10),
43
+ "old_paper": (20, 20, 20),
44
+ "birch": (50, 20, 10),
45
+ "parchment": (10, 10, 10),
46
+ }
47
+
48
+ width, height = params['width'], params['height']
49
+ if output_dir:
50
+ os.makedirs(output_dir, exist_ok=True)
51
+
52
+ sampled_styles = random.choices(styles, k=params['base_images'])
53
+ style_counts = {style: sampled_styles.count(style) for style in styles}
54
+ logger.info(f"Randomly selected styles: {style_counts}")
55
+
56
+ base_images = []
57
+
58
+ for style, count in style_counts.items():
59
+ for i in range(count):
60
+ font_size = random.randint(12, 18)
61
+ output_path = (
62
+ os.path.join(output_dir, f"enhanced_sanskrit_{style}_{i+1}.png")
63
+ if output_dir
64
+ else None
65
+ )
66
+
67
+ img = render_enhanced_sanskrit(
68
+ text=text,
69
+ font_path=font_path,
70
+ output_path=output_path,
71
+ width=width,
72
+ height=height,
73
+ font_size=font_size,
74
+ style=style,
75
+ ink_color=ink_colors[style],
76
+ params=params,
77
+ )
78
+
79
+ if img:
80
+ base_images.append(img)
81
+ if params['apply_transforms'] and output_dir:
82
+ base_filename = f"enhanced_sanskrit_{style}_{i+1}"
83
+ transformed_images = apply_enhanced_postprocessing(
84
+ img, output_dir, base_filename, params
85
+ )
86
+ base_images.extend(transformed_images[1:])
87
+
88
+ return base_images if output_dir is None else None
89
+
90
+
91
+ def generate_comprehensive_dataset(
92
+ text: str, font_path: str = None, output_dir: str = None, params: Dict = None
93
+ ) -> List[Image.Image]:
94
+ if params is None:
95
+ params = ENHANCED_DEFAULT_PARAMS.copy()
96
+ else:
97
+ params = {**ENHANCED_DEFAULT_PARAMS, **params}
98
+
99
+ if font_path is None:
100
+ font_path = os.path.join(params['font_dir'], params['font'])
101
+
102
+ if not os.path.exists(font_path):
103
+ logger.error(f"Font not found at {font_path}")
104
+ return []
105
+
106
+ if output_dir:
107
+ os.makedirs(output_dir, exist_ok=True)
108
+
109
+ effect_combinations = create_comprehensive_effect_combinations()
110
+
111
+ styles = ["lined_paper", "old_paper", "birch", "parchment"]
112
+ ink_colors = {
113
+ "lined_paper": (60, 30, 10),
114
+ "old_paper": (20, 20, 20),
115
+ "birch": (50, 20, 10),
116
+ "parchment": (10, 10, 10),
117
+ }
118
+
119
+ width, height = params['width'], params['height']
120
+ all_generated_images: List[Image.Image] = []
121
+
122
+ logger.info(
123
+ f"Generating comprehensive dataset with {len(effect_combinations)} effect combinations"
124
+ )
125
+
126
+ for style in styles:
127
+ font_size = random.randint(14, 18)
128
+ output_path = os.path.join(output_dir, f"base_{style}.png") if output_dir else None
129
+
130
+ base_image = render_enhanced_sanskrit(
131
+ text=text,
132
+ font_path=font_path,
133
+ output_path=output_path,
134
+ width=width,
135
+ height=height,
136
+ font_size=font_size,
137
+ style=style,
138
+ ink_color=ink_colors[style],
139
+ params=params,
140
+ )
141
+
142
+ if base_image:
143
+ all_generated_images.append(base_image)
144
+ for combo_idx, effect_combo in enumerate(effect_combinations):
145
+ base_filename = f"comprehensive_{style}_{combo_idx:03d}"
146
+ enhanced_images = apply_systematic_postprocessing(
147
+ base_image, output_dir, base_filename, params, effect_combo
148
+ )
149
+ all_generated_images.extend(enhanced_images[1:])
150
+
151
+ logger.info(f"Total images generated: {len(all_generated_images)}")
152
+ return all_generated_images
153
+
154
+
155
+ def generate_ultra_realistic_samples(
156
+ text: str, output_dir: str = None, style_focus: str = None, params: Dict = None
157
+ ) -> List[Image.Image]:
158
+ if params is None:
159
+ params = ENHANCED_DEFAULT_PARAMS.copy()
160
+
161
+ ultra_realistic_params = {
162
+ **params,
163
+ 'fold_intensity': 0.4,
164
+ 'bleed_intensity': 0.35,
165
+ 'shadow_intensity': 0.5,
166
+ 'lens_distortion_strength': 0.15,
167
+ 'aging_intensity': 0.7,
168
+ 'fiber_density': 0.6,
169
+ 'texture': 0.8,
170
+ 'noise': 0.6,
171
+ 'stains': 0.7,
172
+ 'stain_intensity': 0.6,
173
+ }
174
+
175
+ if output_dir:
176
+ os.makedirs(output_dir, exist_ok=True)
177
+
178
+ ultra_combinations = [
179
+ ["fold_crease", "ink_bleed", "shadow_cast", "scanner_artifacts"],
180
+ ["perspective", "morphological", "lens_distortion", "washboard"],
181
+ ["cylinder", "scanner_artifacts", "lens_distortion", "shadow_cast"],
182
+ ["fold_crease", "ink_bleed", "morphological", "perspective"],
183
+ [
184
+ "fold_crease",
185
+ "ink_bleed",
186
+ "perspective",
187
+ "shadow_cast",
188
+ "morphological",
189
+ "scanner_artifacts",
190
+ "lens_distortion",
191
+ ],
192
+ ["perspective", "lens_distortion", "shadow_cast", "cylinder"],
193
+ ["washboard", "ink_bleed", "morphological", "fold_crease"],
194
+ ]
195
+
196
+ font_path = os.path.join(ultra_realistic_params['font_dir'], ultra_realistic_params['font'])
197
+
198
+ styles = ["lined_paper", "old_paper", "birch", "parchment"] if not style_focus else [style_focus]
199
+ ink_colors = {
200
+ "lined_paper": (60, 30, 10),
201
+ "old_paper": (20, 20, 20),
202
+ "birch": (50, 20, 10),
203
+ "parchment": (10, 10, 10),
204
+ }
205
+
206
+ all_images: List[Image.Image] = []
207
+
208
+ for style in styles:
209
+ base_image = render_enhanced_sanskrit(
210
+ text=text,
211
+ font_path=font_path,
212
+ output_path=None,
213
+ width=ultra_realistic_params['width'],
214
+ height=ultra_realistic_params['height'],
215
+ font_size=random.randint(14, 18),
216
+ style=style,
217
+ ink_color=ink_colors[style],
218
+ params=ultra_realistic_params,
219
+ )
220
+
221
+ if base_image:
222
+ for combo_idx, effect_combo in enumerate(ultra_combinations):
223
+ base_filename = f"ultra_realistic_{style}_{combo_idx:02d}"
224
+ enhanced_images = apply_systematic_postprocessing(
225
+ base_image, output_dir, base_filename, ultra_realistic_params, effect_combo
226
+ )
227
+ all_images.extend(enhanced_images[1:])
228
+
229
+ return all_images
230
+
backend/app/services/synthetic/effects.py ADDED
@@ -0,0 +1,218 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Advanced image effects for synthetic text generation
3
+ """
4
+
5
+ import cv2
6
+ import numpy as np
7
+ import random
8
+ import logging
9
+ from typing import List, Tuple
10
+ from noise import pnoise2
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ class EffectPlugin:
16
+ def __init__(self, name: str, params: dict):
17
+ self.name = name
18
+ self.params = params
19
+ self.validate_params()
20
+
21
+ def apply(self, image: np.ndarray) -> np.ndarray:
22
+ raise NotImplementedError
23
+
24
+ def validate_params(self):
25
+ pass
26
+
27
+
28
+ class AdvancedImageEffects:
29
+ @staticmethod
30
+ def generate_perlin_noise(width: int, height: int, scale: float = 0.1, octaves: int = 4) -> np.ndarray:
31
+ noise_map = np.zeros((height, width))
32
+ for i in range(height):
33
+ for j in range(width):
34
+ noise_map[i][j] = pnoise2(i * scale, j * scale, octaves=octaves)
35
+ return noise_map
36
+
37
+ @staticmethod
38
+ def simulate_paper_fiber_texture(width: int, height: int, fiber_density: float = 0.5) -> np.ndarray:
39
+ try:
40
+ base_texture = AdvancedImageEffects.generate_perlin_noise(width, height, 0.02, 4)
41
+ fine_texture = AdvancedImageEffects.generate_perlin_noise(width, height, 0.1, 2)
42
+ combined = base_texture * 0.7 + fine_texture * 0.3
43
+ combined = ((combined + 1) / 2) * fiber_density * 20
44
+ texture = np.stack([combined, combined, combined], axis=2)
45
+ return texture.astype(np.uint8)
46
+ except Exception as e:
47
+ logger.warning(f"Failed to generate Perlin noise texture: {e}")
48
+ return np.random.randint(0, int(20 * fiber_density), (height, width, 3), dtype=np.uint8)
49
+
50
+ @staticmethod
51
+ def simulate_fold_crease(image: np.ndarray, fold_lines: List[Tuple], fold_intensity: float = 0.5) -> np.ndarray:
52
+ try:
53
+ height, width = image.shape[:2]
54
+ result = image.copy()
55
+ for fold_line in fold_lines:
56
+ y_coords, x_coords = np.ogrid[:height, :width]
57
+ x1, y1, x2, y2 = fold_line
58
+ line_length = np.sqrt((x2 - x1)**2 + (y2 - y1)**2)
59
+ if line_length > 0:
60
+ distances = np.abs((y2 - y1) * x_coords - (x2 - x1) * y_coords + x2 * y1 - y2 * x1) / line_length
61
+ fold_width = min(width, height) * 0.1
62
+ fold_profile = np.exp(-0.5 * (distances / fold_width)**2)
63
+ fold_effect = fold_profile * fold_intensity * 40
64
+ shadow_mask = (y_coords - y1) * (x2 - x1) - (x_coords - x1) * (y2 - y1) > 0
65
+ shadow_effect = fold_profile * shadow_mask * fold_intensity * 20
66
+ result = result.astype(np.float32)
67
+ result[:, :, 0] -= fold_effect + shadow_effect
68
+ result[:, :, 1] -= fold_effect + shadow_effect
69
+ result[:, :, 2] -= fold_effect + shadow_effect
70
+ result = np.clip(result, 0, 255).astype(np.uint8)
71
+ return result
72
+ except Exception as e:
73
+ logger.error(f"Error in fold/crease simulation: {e}")
74
+ return image
75
+
76
+ @staticmethod
77
+ def simulate_ink_bleed(image: np.ndarray, bleed_intensity: float = 0.3, bleed_radius: int = 3) -> np.ndarray:
78
+ try:
79
+ gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
80
+ _, text_mask = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY_INV)
81
+ kernel_size = max(1, int(bleed_radius * 2 + 1))
82
+ kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (kernel_size, kernel_size))
83
+ bleeding_mask = cv2.dilate(text_mask, kernel, iterations=1)
84
+ bleed_effect = cv2.GaussianBlur(bleeding_mask.astype(np.float32), (kernel_size, kernel_size), 0)
85
+ bleed_effect = bleed_effect * bleed_intensity / 255.0
86
+ result = image.astype(np.float32)
87
+ for c in range(3):
88
+ result[:, :, c] = result[:, :, c] * (1 - 0.5 * bleed_effect)
89
+ return np.clip(result, 0, 255).astype(np.uint8)
90
+ except Exception as e:
91
+ logger.error(f"Error in ink bleed simulation: {e}")
92
+ return image
93
+
94
+ @staticmethod
95
+ def apply_perspective_distortion(image: np.ndarray, corner_displacement: int = 20) -> np.ndarray:
96
+ try:
97
+ height, width = image.shape[:2]
98
+ src_points = np.float32([[0, 0], [width, 0], [width, height], [0, height]])
99
+ dst_points = src_points.copy()
100
+ for i in range(4):
101
+ dst_points[i][0] += random.randint(-corner_displacement, corner_displacement)
102
+ dst_points[i][1] += random.randint(-corner_displacement, corner_displacement)
103
+ dst_points[:, 0] = np.clip(dst_points[:, 0], -width*0.1, width*1.1)
104
+ dst_points[:, 1] = np.clip(dst_points[:, 1], -height*0.1, height*1.1)
105
+ matrix = cv2.getPerspectiveTransform(src_points, dst_points)
106
+ result = cv2.warpPerspective(
107
+ image, matrix, (width, height), borderMode=cv2.BORDER_REPLICATE
108
+ )
109
+ return result
110
+ except Exception as e:
111
+ logger.error(f"Error in perspective distortion: {e}")
112
+ return image
113
+
114
+ @staticmethod
115
+ def apply_shadow_effects(image: np.ndarray, shadow_angle: float = 45, shadow_intensity: float = 0.4) -> np.ndarray:
116
+ try:
117
+ height, width = image.shape[:2]
118
+ result = image.copy().astype(np.float32)
119
+ angle_rad = np.radians(shadow_angle)
120
+ x_coords, y_coords = np.meshgrid(np.arange(width), np.arange(height))
121
+ shadow_factor = (np.cos(angle_rad) * x_coords / width + np.sin(angle_rad) * y_coords / height)
122
+ shadow_factor = np.clip(shadow_factor, 0, 1)
123
+ shadow_effect = 1 - shadow_factor * shadow_intensity
124
+ for c in range(3):
125
+ result[:, :, c] *= shadow_effect
126
+ return np.clip(result, 0, 255).astype(np.uint8)
127
+ except Exception as e:
128
+ logger.error(f"Error in shadow effects: {e}")
129
+ return image
130
+
131
+ @staticmethod
132
+ def apply_morphological_operations(image: np.ndarray, operation: str = 'mixed', kernel_size: int = 3) -> np.ndarray:
133
+ try:
134
+ gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
135
+ kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (kernel_size, kernel_size))
136
+ if operation == 'erosion':
137
+ processed = cv2.erode(gray, kernel, iterations=1)
138
+ elif operation == 'dilation':
139
+ processed = cv2.dilate(gray, kernel, iterations=1)
140
+ elif operation == 'opening':
141
+ processed = cv2.morphologyEx(gray, cv2.MORPH_OPEN, kernel)
142
+ elif operation == 'closing':
143
+ processed = cv2.morphologyEx(gray, cv2.MORPH_CLOSE, kernel)
144
+ else:
145
+ ops = ['erosion', 'dilation', 'opening', 'closing']
146
+ import random as _r
147
+ chosen = _r.choice(ops)
148
+ return AdvancedImageEffects.apply_morphological_operations(image, chosen, kernel_size)
149
+ return cv2.cvtColor(processed, cv2.COLOR_GRAY2RGB)
150
+ except Exception as e:
151
+ logger.error(f"Error in morphological operations: {e}")
152
+ return image
153
+
154
+ @staticmethod
155
+ def simulate_scanner_artifacts(image: np.ndarray, compression_quality: int = 85) -> np.ndarray:
156
+ try:
157
+ height, width = image.shape[:2]
158
+ result = image.copy()
159
+ for y in range(0, height, random.randint(8, 15)):
160
+ intensity = random.randint(5, 15)
161
+ if y < height:
162
+ result[y, :, :] = np.clip(result[y, :, :] - intensity, 0, 255)
163
+ dust_count = random.randint(3, 8)
164
+ for _ in range(dust_count):
165
+ x = random.randint(0, width - 5)
166
+ y = random.randint(0, height - 5)
167
+ size = random.randint(2, 5)
168
+ dust_intensity = random.randint(20, 40)
169
+ result[y:y+size, x:x+size, :] = np.clip(result[y:y+size, x:x+size, :] - dust_intensity, 0, 255)
170
+ encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), compression_quality]
171
+ _, encimg = cv2.imencode('.jpg', cv2.cvtColor(result, cv2.COLOR_RGB2BGR), encode_param)
172
+ result = cv2.imdecode(encimg, cv2.IMREAD_COLOR)
173
+ result = cv2.cvtColor(result, cv2.COLOR_BGR2RGB)
174
+ return result
175
+ except Exception as e:
176
+ logger.error(f"Error in scanner artifacts: {e}")
177
+ return image
178
+
179
+ @staticmethod
180
+ def apply_lens_distortion(image: np.ndarray, strength: float = 0.2) -> np.ndarray:
181
+ try:
182
+ height, width = image.shape[:2]
183
+ center_x, center_y = width // 2, height // 2
184
+ y_coords, x_coords = np.ogrid[:height, :width]
185
+ distances = np.sqrt((x_coords - center_x)**2 + (y_coords - center_y)**2)
186
+ max_distance = np.sqrt(center_x**2 + center_y**2)
187
+ normalized_distances = distances / max_distance
188
+ distortion_factor = 1 + strength * normalized_distances**2
189
+ map_x = ((x_coords - center_x) / distortion_factor + center_x).astype(np.float32)
190
+ map_y = ((y_coords - center_y) / distortion_factor + center_y).astype(np.float32)
191
+ result = cv2.remap(image, map_x, map_y, cv2.INTER_LINEAR, borderMode=cv2.BORDER_REPLICATE)
192
+ return result
193
+ except Exception as e:
194
+ logger.error(f"Error in lens distortion: {e}")
195
+ return image
196
+
197
+
198
+ def generate_random_fold_lines(image_size: Tuple[int, int], num_folds: int = None) -> List[Tuple]:
199
+ width, height = image_size
200
+ if num_folds is None:
201
+ num_folds = random.randint(1, 3)
202
+ fold_lines = []
203
+ for _ in range(num_folds):
204
+ x1 = random.randint(0, width)
205
+ y1 = random.randint(0, height)
206
+ x2 = random.randint(0, width)
207
+ y2 = random.randint(0, height)
208
+ fold_lines.append((x1, y1, x2, y2))
209
+ return fold_lines
210
+
211
+
212
+ def safe_apply_effect(effect_func, image: np.ndarray, effect_name: str) -> np.ndarray:
213
+ try:
214
+ return effect_func(image)
215
+ except Exception as e:
216
+ logger.error(f"Error applying {effect_name}: {e}")
217
+ return image
218
+
backend/app/services/synthetic/huggingface_processor.py ADDED
@@ -0,0 +1,228 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Hugging Face dataset processor for downloading datasets and generating synthetic text images
3
+ """
4
+
5
+ import os
6
+ import logging
7
+ import pandas as pd
8
+ from typing import Dict, List, Optional
9
+ from urllib.parse import urlparse
10
+ import requests
11
+ from PIL import Image
12
+
13
+ from datasets import load_dataset
14
+ import datasets
15
+
16
+ from .config import ENHANCED_DEFAULT_PARAMS
17
+ from .text_renderer import render_enhanced_sanskrit
18
+ from .transformations import apply_enhanced_postprocessing
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+
23
+ class HuggingFaceDatasetProcessor:
24
+ def __init__(self, output_dir: str = "hf_dataset_output", params: Dict = None):
25
+ self.output_dir = output_dir
26
+ self.params = params if params else ENHANCED_DEFAULT_PARAMS.copy()
27
+ self.image_dir = os.path.join(output_dir, "images")
28
+ self.csv_path = os.path.join(output_dir, "dataset.csv")
29
+ os.makedirs(self.image_dir, exist_ok=True)
30
+ self.ink_colors = {
31
+ "lined_paper": (60, 30, 10),
32
+ "old_paper": (20, 20, 20),
33
+ "birch": (50, 20, 10),
34
+ "parchment": (10, 10, 10),
35
+ }
36
+
37
+ def load_huggingface_dataset(self, dataset_name: str, config_name: str = None, split: str = None, streaming: bool = False):
38
+ try:
39
+ dataset = load_dataset(dataset_name, config_name, split=split, streaming=streaming)
40
+ if streaming:
41
+ return dataset
42
+ else:
43
+ if isinstance(dataset, datasets.DatasetDict):
44
+ if split:
45
+ df = dataset[split].to_pandas()
46
+ elif 'train' in dataset:
47
+ df = dataset['train'].to_pandas()
48
+ else:
49
+ first_split = list(dataset.keys())[0]
50
+ df = dataset[first_split].to_pandas()
51
+ else:
52
+ df = dataset.to_pandas()
53
+ return df
54
+ except Exception as e:
55
+ logger.error(f"Error loading Hugging Face dataset: {e}")
56
+ return None
57
+
58
+ def download_dataset_from_url(self, url: str, output_file: str = "dataset.csv") -> bool:
59
+ try:
60
+ if "huggingface.co/datasets" in url:
61
+ parsed = urlparse(url)
62
+ dataset_path = parsed.path.strip('/')
63
+ if "/blob/main/" in url:
64
+ raw_url = url.replace("/blob/main/", "/raw/main/")
65
+ elif "/tree/main" in url:
66
+ raw_url = url.replace("/tree/main", "/raw/main/dataset.csv")
67
+ else:
68
+ raw_url = f"https://huggingface.co/{dataset_path}/raw/main/dataset.csv"
69
+ else:
70
+ raw_url = url
71
+ response = requests.get(raw_url, stream=True)
72
+ response.raise_for_status()
73
+ file_path = os.path.join(self.output_dir, output_file)
74
+ with open(file_path, 'wb') as f:
75
+ for chunk in response.iter_content(chunk_size=8192):
76
+ f.write(chunk)
77
+ return True
78
+ except Exception as e:
79
+ logger.error(f"Error downloading dataset: {e}")
80
+ return False
81
+
82
+ def load_dataset(self, file_path: str, text_column: str) -> Optional[pd.DataFrame]:
83
+ try:
84
+ encodings = ['utf-8', 'iso-8859-1', 'windows-1252', 'utf-16']
85
+ df = None
86
+ for encoding in encodings:
87
+ try:
88
+ df = pd.read_csv(file_path, encoding=encoding)
89
+ break
90
+ except UnicodeDecodeError:
91
+ continue
92
+ if df is None:
93
+ raise Exception("Could not load dataset with any supported encoding")
94
+ if text_column not in df.columns:
95
+ raise Exception(f"Column '{text_column}' not found. Available columns: {list(df.columns)}")
96
+ initial_rows = len(df)
97
+ df = df.dropna(subset=[text_column])
98
+ df = df[df[text_column].str.strip() != ""]
99
+ return df
100
+ except Exception as e:
101
+ logger.error(f"Error loading dataset: {e}")
102
+ return None
103
+
104
+ def generate_images_from_dataset(self, dataset_df: pd.DataFrame, text_column: str, max_samples: int = None) -> List[Dict]:
105
+ results = []
106
+ if max_samples and max_samples < len(dataset_df):
107
+ dataset_df = dataset_df.head(max_samples)
108
+ styles = ["lined_paper", "old_paper", "birch", "parchment"]
109
+ for idx, row in dataset_df.iterrows():
110
+ try:
111
+ text = str(row[text_column]).strip()
112
+ if not text:
113
+ continue
114
+ style = styles[idx % len(styles)]
115
+ base_filename = f"text_image_{idx:06d}"
116
+ image_filename = f"{base_filename}.png"
117
+ image_path = os.path.join(self.image_dir, image_filename)
118
+ img = render_enhanced_sanskrit(
119
+ text=text,
120
+ font_path=os.path.join(self.params['font_dir'], self.params['font']),
121
+ output_path=None,
122
+ width=self.params['width'],
123
+ height=self.params['height'],
124
+ font_size=14,
125
+ style=style,
126
+ ink_color=self.ink_colors[style],
127
+ params=self.params,
128
+ )
129
+ if img is None:
130
+ continue
131
+ if self.params.get('apply_transforms', True):
132
+ transformed_images = apply_enhanced_postprocessing(img, None, base_filename, self.params)
133
+ final_img = transformed_images[-1] if len(transformed_images) > 1 else img
134
+ else:
135
+ final_img = img
136
+ final_img.save(image_path)
137
+ result = {
138
+ 'row_index': idx,
139
+ 'image_path': os.path.relpath(image_path, self.output_dir),
140
+ 'text': text,
141
+ 'style': style,
142
+ 'image_filename': image_filename,
143
+ }
144
+ for col in dataset_df.columns:
145
+ if col != text_column:
146
+ result[col] = row[col]
147
+ results.append(result)
148
+ except Exception as e:
149
+ logger.error(f"Error processing row {idx}: {e}")
150
+ continue
151
+ return results
152
+
153
+ def save_results_csv(self, results: List[Dict], additional_info: Dict = None):
154
+ try:
155
+ if not results:
156
+ return
157
+ df = pd.DataFrame(results)
158
+ important_cols = ['image_path', 'text', 'style', 'image_filename']
159
+ other_cols = [col for col in df.columns if col not in important_cols]
160
+ df = df[important_cols + other_cols]
161
+ df.to_csv(self.csv_path, index=False, encoding='utf-8')
162
+ if additional_info:
163
+ metadata_path = os.path.join(self.output_dir, "metadata.txt")
164
+ with open(metadata_path, 'w', encoding='utf-8') as f:
165
+ f.write("Dataset Processing Metadata\n")
166
+ f.write("=" * 30 + "\n")
167
+ for key, value in additional_info.items():
168
+ f.write(f"{key}: {value}\n")
169
+ except Exception as e:
170
+ logger.error(f"Error saving results: {e}")
171
+
172
+ def process_huggingface_dataset(self, dataset_identifier: str, text_column: str, max_samples: int = None, config_name: str = None, split: str = None) -> bool:
173
+ try:
174
+ df = None
175
+ if not dataset_identifier.startswith("http"):
176
+ df = self.load_huggingface_dataset(dataset_identifier, config_name=config_name, split=split)
177
+ if df is None and dataset_identifier.startswith("http"):
178
+ dataset_file = "downloaded_dataset.csv"
179
+ if self.download_dataset_from_url(dataset_identifier, dataset_file):
180
+ dataset_path = os.path.join(self.output_dir, dataset_file)
181
+ df = self.load_dataset(dataset_path, text_column)
182
+ if df is None:
183
+ return False
184
+ if text_column not in df.columns:
185
+ return False
186
+ results = self.generate_images_from_dataset(df, text_column, max_samples)
187
+ if not results:
188
+ return False
189
+ additional_info = {
190
+ "dataset_identifier": dataset_identifier,
191
+ "config_name": config_name,
192
+ "split": split,
193
+ "text_column": text_column,
194
+ "original_rows": len(df),
195
+ "processed_rows": len(results),
196
+ "max_samples": max_samples or "all",
197
+ "output_directory": self.output_dir,
198
+ "image_directory": self.image_dir,
199
+ }
200
+ self.save_results_csv(results, additional_info)
201
+ return True
202
+ except Exception as e:
203
+ logger.error(f"Error in dataset processing workflow: {e}")
204
+ return False
205
+
206
+ def process_local_csv(self, csv_path: str, text_column: str, max_samples: int = None) -> bool:
207
+ try:
208
+ df = self.load_dataset(csv_path, text_column)
209
+ if df is None:
210
+ return False
211
+ results = self.generate_images_from_dataset(df, text_column, max_samples)
212
+ if not results:
213
+ return False
214
+ additional_info = {
215
+ "source_file": csv_path,
216
+ "text_column": text_column,
217
+ "original_rows": len(df),
218
+ "processed_rows": len(results),
219
+ "max_samples": max_samples or "all",
220
+ "output_directory": self.output_dir,
221
+ "image_directory": self.image_dir,
222
+ }
223
+ self.save_results_csv(results, additional_info)
224
+ return True
225
+ except Exception as e:
226
+ logger.error(f"Error processing local CSV: {e}")
227
+ return False
228
+
backend/app/services/synthetic/text_renderer.py ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Text rendering module for Sanskrit/Oriya text with various effects
3
+ """
4
+
5
+ import os
6
+ import math
7
+ import random
8
+ import logging
9
+ from typing import Dict, Tuple, Optional
10
+ import numpy as np
11
+ from PIL import Image, ImageDraw, ImageFont
12
+ from .backgrounds import create_enhanced_background
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ def render_enhanced_sanskrit(
18
+ text: str,
19
+ font_path: str,
20
+ output_path: str,
21
+ width: int,
22
+ height: int,
23
+ font_size: int,
24
+ style: str,
25
+ ink_color: Tuple[int, int, int],
26
+ params: Dict,
27
+ ) -> Optional[Image.Image]:
28
+ img = create_enhanced_background(width, height, style, params)
29
+ draw = ImageDraw.Draw(img)
30
+
31
+ try:
32
+ font = ImageFont.truetype(font_path, font_size)
33
+ words = text.strip().replace('\n', ' ').split()
34
+ y_position = random.randint(25, 75)
35
+ margin = 25
36
+ available_width = width - 2 * margin
37
+ space_width = draw.textlength(" ", font=font)
38
+
39
+ current_line = []
40
+ current_line_width = 0
41
+ all_lines = []
42
+ for word in words:
43
+ word_width = draw.textlength(word, font=font)
44
+ if current_line and current_line_width + space_width + word_width > available_width:
45
+ all_lines.append(current_line)
46
+ current_line = [word]
47
+ current_line_width = word_width
48
+ else:
49
+ if current_line:
50
+ current_line_width += space_width + word_width
51
+ else:
52
+ current_line_width = word_width
53
+ current_line.append(word)
54
+ if current_line:
55
+ all_lines.append(current_line)
56
+
57
+ for line in all_lines:
58
+ line_text = " ".join(line)
59
+ line_width = draw.textlength(line_text, font=font)
60
+ x_position = (width - line_width) // 2
61
+ baseline_offset = random.randint(-2, 2) * params["baseline"]
62
+ y_line_position = y_position + baseline_offset
63
+ if y_line_position + font_size > height - margin:
64
+ break
65
+ x_word_position = x_position
66
+ for word in line:
67
+ word_x_offset = int(random.uniform(-1.5, 1.5) * params["word_position"])
68
+ word_y_offset = int(random.uniform(-1, 1) * params["word_position"])
69
+ color_variation = int(random.randint(-3, 3) * params["ink_color"])
70
+ word_color = (
71
+ np.clip(ink_color[0] + color_variation, 0, 255),
72
+ np.clip(ink_color[1] + color_variation, 0, 255),
73
+ np.clip(ink_color[2] + color_variation, 0, 255),
74
+ )
75
+ word_width = draw.textlength(word, font=font)
76
+ word_height = font_size * 1.2
77
+ if params["word_angle"] > 0:
78
+ word_angle = random.uniform(-2, 2) * params["word_angle"]
79
+ diagonal = math.sqrt(word_width**2 + word_height**2)
80
+ padding = int(diagonal * 0.5)
81
+ temp_width = int(diagonal + 2 * padding)
82
+ temp_height = int(diagonal + 2 * padding)
83
+ txt_img = Image.new('RGBA', (temp_width, temp_height), (0, 0, 0, 0))
84
+ txt_d = ImageDraw.Draw(txt_img)
85
+ center_x = temp_width // 2 - word_width // 2
86
+ center_y = temp_height // 2 - word_height // 2
87
+ txt_d.text((center_x, center_y), word, font=font, fill=word_color + (255,))
88
+ rotated = txt_img.rotate(
89
+ word_angle, resample=Image.BICUBIC, expand=0, center=(temp_width//2, temp_height//2)
90
+ )
91
+ paste_x = int(x_word_position + word_x_offset - padding)
92
+ paste_y = int(y_line_position + word_y_offset - padding)
93
+ img.paste(rotated, (paste_x, paste_y), rotated)
94
+ else:
95
+ draw.text(
96
+ (x_word_position + word_x_offset, y_line_position + word_y_offset),
97
+ word, fill=word_color, font=font
98
+ )
99
+ x_word_position += word_width + space_width
100
+ line_spacing_factor = 1.0 + (random.uniform(-0.1, 0.1) * params["line_spacing"])
101
+ y_position += int(font_size * 1.2 * line_spacing_factor)
102
+
103
+ if output_path is not None:
104
+ img.save(output_path)
105
+ logger.info(f"Saved rendered Sanskrit to {output_path}")
106
+
107
+ return img
108
+
109
+ except Exception as e:
110
+ logger.error(f"Error rendering text with font {font_path}: {e}")
111
+ return None
112
+
backend/app/services/synthetic/transformations.py ADDED
@@ -0,0 +1,249 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Transformations module for geometric transformations and post-processing effects
3
+ """
4
+
5
+ import cv2
6
+ import os
7
+ import random
8
+ import logging
9
+ import itertools
10
+ from math import pi
11
+ from typing import List, Dict
12
+ import numpy as np
13
+ from PIL import Image, ImageEnhance, ImageFilter
14
+ from .effects import AdvancedImageEffects, generate_random_fold_lines, safe_apply_effect
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ def cylindrical_edge_warp(pil_img: Image.Image, side: str = "left", strength: float = 0.6, warp_portion: float = 0.45) -> Image.Image:
20
+ try:
21
+ img = cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2BGR)
22
+ h, w = img.shape[:2]
23
+ W = int(warp_portion * w)
24
+ R = W / strength if strength != 0 else 1e9
25
+ X, Y = np.meshgrid(np.arange(w), np.arange(h))
26
+ map_x = X.astype(np.float32).copy()
27
+ map_y = Y.astype(np.float32).copy()
28
+ if side == "left":
29
+ strip = X < W
30
+ dx = W - X[strip]
31
+ else:
32
+ strip = X > (w - W)
33
+ dx = X[strip] - (w - W)
34
+ theta = dx / R
35
+ displacement = R * np.sin(theta) - dx
36
+ map_x[strip] += displacement
37
+ scale_y = np.cos(theta)
38
+ map_y[strip] = (Y[strip] - h/2) / scale_y + h/2
39
+ warped = cv2.remap(img, map_x, map_y, interpolation=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
40
+ return Image.fromarray(cv2.cvtColor(warped, cv2.COLOR_BGR2RGB))
41
+ except Exception as e:
42
+ logger.error(f"Error in cylindrical warp: {e}")
43
+ return pil_img
44
+
45
+
46
+ def washboard_warp(pil_img: Image.Image, amplitude: float = 8, wavelength: float = 120, phase: float = 0.0, decay_from_top: bool = True) -> Image.Image:
47
+ try:
48
+ img = cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2BGR)
49
+ h, w = img.shape[:2]
50
+ x = np.arange(w, dtype=np.float32)
51
+ dy = amplitude * np.sin(2*pi*x / wavelength + phase)
52
+ if decay_from_top:
53
+ atten = np.linspace(1, 0.2, h, dtype=np.float32)[:, None]
54
+ else:
55
+ atten = 1.0
56
+ map_x, map_y = np.meshgrid(x, np.arange(h, dtype=np.float32))
57
+ map_y += dy * atten
58
+ warped = cv2.remap(img, map_x, map_y, cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
59
+ return Image.fromarray(cv2.cvtColor(warped, cv2.COLOR_BGR2RGB))
60
+ except Exception as e:
61
+ logger.error(f"Error in washboard warp: {e}")
62
+ return pil_img
63
+
64
+
65
+ def apply_enhanced_postprocessing(original_image: Image.Image, output_dir: str, base_filename: str, params: Dict) -> List[Image.Image]:
66
+ all_images = [original_image]
67
+ transforms = []
68
+
69
+ def rotate_image(img, angle):
70
+ bg_color = tuple(np.array(img).mean(axis=(0, 1)).astype(int))
71
+ return img.rotate(angle, resample=Image.BICUBIC, expand=False, fillcolor=bg_color)
72
+
73
+ def adjust_brightness(img, factor):
74
+ enhancer = ImageEnhance.Brightness(img)
75
+ return enhancer.enhance(factor)
76
+
77
+ def adjust_contrast(img, factor):
78
+ enhancer = ImageEnhance.Contrast(img)
79
+ return enhancer.enhance(factor)
80
+
81
+ def add_noise(img, intensity):
82
+ img_array = np.array(img).astype(np.float32)
83
+ noise = np.random.normal(0, intensity * 255, img_array.shape)
84
+ noisy_array = np.clip(img_array + noise, 0, 255).astype(np.uint8)
85
+ return Image.fromarray(noisy_array)
86
+
87
+ def blur_image(img, radius):
88
+ return img.filter(ImageFilter.GaussianBlur(radius=radius))
89
+
90
+ transforms.append(("rotate", lambda img: rotate_image(img, random.uniform(-params["rotation_max"], params["rotation_max"]))))
91
+ transforms.append(("brightness", lambda img: adjust_brightness(img, random.uniform(1.0-params["brightness_var"], 1.0+params["brightness_var"]))))
92
+ transforms.append(("contrast", lambda img: adjust_contrast(img, random.uniform(1.0-params["contrast_var"], 1.0+params["contrast_var"]))))
93
+ transforms.append(("noise", lambda img: add_noise(img, random.uniform(params["noise_min"], params["noise_max"]))))
94
+ transforms.append(("blur", lambda img: blur_image(img, random.uniform(params["blur_min"], params["blur_max"]))))
95
+
96
+ transforms.append(("washboard", lambda img: washboard_warp(img, amplitude=random.uniform(6, 12), wavelength=random.uniform(90, 150), phase=random.uniform(0, 2*pi), decay_from_top=random.choice([True, False]))))
97
+ transforms.append(("cylinder", lambda img: cylindrical_edge_warp(img, side=random.choice(["left", "right"]), strength=random.uniform(0.4, 0.8) * random.choice([1, -1]), warp_portion=random.uniform(0.35, 0.5))))
98
+
99
+ if params.get('enable_advanced_effects', True):
100
+ if random.random() < params.get('fold_probability', 0.4):
101
+ transforms.append(("fold_crease", lambda img: Image.fromarray(
102
+ AdvancedImageEffects.simulate_fold_crease(np.array(img), generate_random_fold_lines(img.size), params.get("fold_intensity", 0.3))
103
+ )))
104
+ if random.random() < params.get('advanced_effect_probability', 0.7):
105
+ transforms.append(("ink_bleed", lambda img: Image.fromarray(
106
+ AdvancedImageEffects.simulate_ink_bleed(np.array(img), params.get("bleed_intensity", 0.3), params.get("bleed_radius", 3))
107
+ )))
108
+ if random.random() < params.get('perspective_probability', 0.5):
109
+ transforms.append(("perspective", lambda img: Image.fromarray(
110
+ AdvancedImageEffects.apply_perspective_distortion(np.array(img), params.get("corner_displacement", 20))
111
+ )))
112
+ if random.random() < params.get('shadow_probability', 0.6):
113
+ transforms.append(("shadow_cast", lambda img: Image.fromarray(
114
+ AdvancedImageEffects.apply_shadow_effects(np.array(img), params.get("shadow_angle", 45), params.get("shadow_intensity", 0.4))
115
+ )))
116
+ if random.random() < params.get('advanced_effect_probability', 0.7):
117
+ transforms.append(("morphological", lambda img: Image.fromarray(
118
+ AdvancedImageEffects.apply_morphological_operations(np.array(img), params.get("morph_operation", "mixed"), params.get("morph_kernel_size", 3))
119
+ )))
120
+ if params.get('scanner_artifacts', True) and random.random() < 0.3:
121
+ transforms.append(("scanner_artifacts", lambda img: Image.fromarray(
122
+ AdvancedImageEffects.simulate_scanner_artifacts(np.array(img), params.get("compression_quality", 85))
123
+ )))
124
+ if random.random() < 0.3:
125
+ transforms.append(("lens_distortion", lambda img: Image.fromarray(
126
+ AdvancedImageEffects.apply_lens_distortion(np.array(img), params.get("lens_distortion_strength", 0.2))
127
+ )))
128
+
129
+ if params["all_transforms"]:
130
+ selected_transforms = transforms
131
+ else:
132
+ n_transforms = random.randint(1, min(5, len(transforms)))
133
+ selected_transforms = random.sample(transforms, n_transforms)
134
+
135
+ for transform_name, transform_func in selected_transforms:
136
+ try:
137
+ transformed_img = safe_apply_effect(transform_func, original_image, transform_name)
138
+ if output_dir:
139
+ transformed_filename = f"{base_filename}_{transform_name}.png"
140
+ transformed_path = os.path.join(output_dir, transformed_filename)
141
+ transformed_img.save(transformed_path)
142
+ logger.info(f"Saved transformed image to {transformed_path}")
143
+ all_images.append(transformed_img)
144
+ except Exception as e:
145
+ logger.error(f"Error applying transform {transform_name}: {e}")
146
+
147
+ if len(selected_transforms) > 1:
148
+ try:
149
+ combined_img = original_image.copy()
150
+ for _, transform_func in selected_transforms:
151
+ combined_img = safe_apply_effect(transform_func, combined_img, "combined")
152
+ if output_dir:
153
+ combined_path = os.path.join(output_dir, f"{base_filename}_combined.png")
154
+ combined_img.save(combined_path)
155
+ logger.info(f"Saved combined transformation to {combined_path}")
156
+ all_images.append(combined_img)
157
+ except Exception as e:
158
+ logger.error(f"Error creating combined transformation: {e}")
159
+
160
+ return all_images
161
+
162
+
163
+ def create_comprehensive_effect_combinations():
164
+ base_effects = ["rotate", "brightness", "contrast", "noise", "blur"]
165
+ geometric_effects = ["washboard", "cylinder"]
166
+ advanced_effects = [
167
+ "fold_crease",
168
+ "ink_bleed",
169
+ "perspective",
170
+ "shadow_cast",
171
+ "morphological",
172
+ "scanner_artifacts",
173
+ "lens_distortion",
174
+ ]
175
+ effect_combinations = []
176
+ for effect in base_effects + geometric_effects + advanced_effects:
177
+ effect_combinations.append([effect])
178
+ for combo in itertools.combinations(advanced_effects, 2):
179
+ effect_combinations.append(list(combo))
180
+ for geo in geometric_effects:
181
+ for adv in advanced_effects:
182
+ effect_combinations.append([geo, adv])
183
+ for combo in itertools.combinations(advanced_effects, 3):
184
+ effect_combinations.append(list(combo))
185
+ effect_combinations.append(advanced_effects[:4])
186
+ effect_combinations.append(advanced_effects[4:])
187
+ effect_combinations.append(advanced_effects)
188
+ return effect_combinations
189
+
190
+
191
+ def apply_systematic_postprocessing(original_image: Image.Image, output_dir: str, base_filename: str, params: Dict, effect_combination: List[str] = None) -> List[Image.Image]:
192
+ all_images = [original_image]
193
+
194
+ def rotate_image(img, angle):
195
+ bg_color = tuple(np.array(img).mean(axis=(0, 1)).astype(int))
196
+ return img.rotate(angle, resample=Image.BICUBIC, expand=False, fillcolor=bg_color)
197
+
198
+ def adjust_brightness(img, factor):
199
+ enhancer = ImageEnhance.Brightness(img)
200
+ return enhancer.enhance(factor)
201
+
202
+ def adjust_contrast(img, factor):
203
+ enhancer = ImageEnhance.Contrast(img)
204
+ return enhancer.enhance(factor)
205
+
206
+ def add_noise(img, intensity):
207
+ img_array = np.array(img).astype(np.float32)
208
+ noise = np.random.normal(0, intensity * 255, img_array.shape)
209
+ noisy_array = np.clip(img_array + noise, 0, 255).astype(np.uint8)
210
+ return Image.fromarray(noisy_array)
211
+
212
+ def blur_image(img, radius):
213
+ return img.filter(ImageFilter.GaussianBlur(radius=radius))
214
+
215
+ transforms = {
216
+ "rotate": lambda img: rotate_image(img, random.uniform(-params["rotation_max"], params["rotation_max"])),
217
+ "brightness": lambda img: adjust_brightness(img, random.uniform(1.0-params["brightness_var"], 1.0+params["brightness_var"])),
218
+ "contrast": lambda img: adjust_contrast(img, random.uniform(1.0-params["contrast_var"], 1.0+params["contrast_var"])),
219
+ "noise": lambda img: add_noise(img, random.uniform(params["noise_min"], params["noise_max"])),
220
+ "blur": lambda img: blur_image(img, random.uniform(params["blur_min"], params["blur_max"])) ,
221
+ "washboard": lambda img: washboard_warp(img, amplitude=random.uniform(6, 12), wavelength=random.uniform(90, 150), phase=random.uniform(0, 2*pi), decay_from_top=random.choice([True, False])),
222
+ "cylinder": lambda img: cylindrical_edge_warp(img, side=random.choice(["left", "right"]), strength=random.uniform(0.4, 0.8) * random.choice([1, -1]), warp_portion=random.uniform(0.35, 0.5)),
223
+ "fold_crease": lambda img: Image.fromarray(AdvancedImageEffects.simulate_fold_crease(np.array(img), generate_random_fold_lines(img.size), params.get("fold_intensity", 0.3))),
224
+ "ink_bleed": lambda img: Image.fromarray(AdvancedImageEffects.simulate_ink_bleed(np.array(img), params.get("bleed_intensity", 0.3), params.get("bleed_radius", 3))),
225
+ "perspective": lambda img: Image.fromarray(AdvancedImageEffects.apply_perspective_distortion(np.array(img), params.get("corner_displacement", 20))),
226
+ "shadow_cast": lambda img: Image.fromarray(AdvancedImageEffects.apply_shadow_effects(np.array(img), params.get("shadow_angle", 45), params.get("shadow_intensity", 0.4))),
227
+ "morphological": lambda img: Image.fromarray(AdvancedImageEffects.apply_morphological_operations(np.array(img), params.get("morph_operation", "mixed"), params.get("morph_kernel_size", 3))),
228
+ "scanner_artifacts": lambda img: Image.fromarray(AdvancedImageEffects.simulate_scanner_artifacts(np.array(img), params.get("compression_quality", 85))),
229
+ "lens_distortion": lambda img: Image.fromarray(AdvancedImageEffects.apply_lens_distortion(np.array(img), params.get("lens_distortion_strength", 0.2))),
230
+ }
231
+
232
+ current_image = original_image
233
+ for effect_name in ["rotate", "brightness", "contrast", "noise", "blur"]:
234
+ current_image = safe_apply_effect(transforms[effect_name], current_image, effect_name)
235
+
236
+ if effect_combination:
237
+ for effect_name in effect_combination:
238
+ if effect_name in transforms:
239
+ current_image = safe_apply_effect(transforms[effect_name], current_image, effect_name)
240
+ if output_dir:
241
+ combo_name = "_".join(effect_combination)
242
+ filename = f"{base_filename}_{combo_name}.png"
243
+ filepath = os.path.join(output_dir, filename)
244
+ current_image.save(filepath)
245
+ logger.info(f"Saved combination image: {filepath}")
246
+
247
+ all_images.append(current_image)
248
+ return all_images
249
+
backend/data/annotations/annotations.csv ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ image_filename,extracted_text,validated_text
2
+ s1.png,"ଉପକ୍ରମ [ ସମ୍ପାଦନା ]
3
+ ବିଜ୍ଞାନ ହେଉଛି ଏକ ସୁବ୍ୟବସ୍ଥିତ ପ୍ରଣାଳୀ ଯାହାଦ୍ବାରା ବିଶ୍ବର ସମସ୍ତ ପଦାର୍ଥ ଓ ବସ୍ତୁମାନଙ୍କ ବିଷୟରେ ଆଲୋଚନା, ପରୀକ୍ଷା,
4
+ ଗବେଷଣା, ଅନୁମାନ ଏବଂ ପଠନ କରାଯାଇଥାଏ । ଆଧୁନିକ ଯୁଗରେ ବିଜ୍ଞାନ କହିଲେ ସାଧାରଣତଃ କିଛି ଶିଖୁବା ଶୈଳୀକୁ
5
+ ବୁଝାଇଥାଏ","ଧର୍ମ ହେଉଛି ନିର୍ଦ୍ଦିଷ୍ଟ ଆଚରଣ ଏବଂ ଅଭ୍ୟାସ, ବିଶ୍ବ ଦୃଷ୍ଟିକୋଣ, ଦିବ୍ୟଗ୍ରନ୍ଥ, ତୀର୍ଥ, ଭବିଷ୍ୟବାଣୀ,
6
+ ନୈତିକତା ଇତ୍ୟାଦି ଯାହା ମାନବକୁ ଆଲୌକିକ, ଆଧ୍ୟୟନିକ କିମ୍ବା ଆଧ୍ୟାତ୍ମିକ ଉପାଦାନ ସହିତ
7
+ ଜଡ଼ିତ କରୁଥିବା ସାମାଜିକ-ସାଂସ୍କୃତିକ ବ୍ୟବସ୍ଥା । ତେବେ ସୃଷ୍ଟ ଭାବରେ ଧର୍ମର ସଜ୍ଞା ଉପରେ
8
+ ଏପର୍ଯ୍ୟନ୍ତ କୌଣସି ଅଧ୍ୟୟନପ୍ରସୂତ ସହମତି ନାହିଁ । ୪୪"
9
+ s1_1.png,"ଉପକ୍ରମ [ ସମ୍ପାଦନା ]
10
+ ବିଜ୍ଞାନ ହେଉଛି ଏକ ସୁବ୍ୟବସ୍ଥିତ ପ୍ରଣାଳୀ ଯାହାଦ୍ବାରା ବିଶ୍ବର ସମସ୍ତ ପଦାର୍ଥ ଓ ବସ୍ତୁମାନଙ୍କ ବିଷୟରେ ଆଲୋଚନା, ପରୀକ୍ଷା,
11
+ ଗବେଷଣା, ଅନୁମାନ ଏବଂ ପଠନ କରାଯାଇଥାଏ । ଆଧୁନିକ ଯୁଗରେ ବିଜ୍ଞାନ କହିଲେ ସାଧାରଣତଃ କିଛି ଶିଖୁବା ଶୈଳୀକୁ
12
+ ବୁଝାଇଥାଏ","ଧର୍ମ ହେଉଛି ନିର୍ଦ୍ଦିଷ୍ଟ ଆଚରଣ ଏବଂ ଅଭ୍ୟାସ, ବିଶ୍ବ ଦୃଷ୍ଟିକୋଣ, ଦିବ୍ୟଗ୍ରନ୍ଥ, ତୀର୍ଥ, ଭବିଷ୍ୟବାଣୀ,
13
+ ନୈତିକତା ଇତ୍ୟାଦି ଯାହା ମାନବକୁ ଆଲୌକିକ, ଆଧ୍ୟୟନିକ କିମ୍ବା ଆଧ୍ୟାତ୍ମିକ ଉପାଦାନ ସହିତ
14
+ ଜଡ଼ିତ କରୁଥିବା ସାମାଜିକ-ସାଂସ୍କୃତିକ ବ୍ୟବସ୍ଥା । ତେବେ ସୃଷ୍ଟ ଭାବରେ ଧର୍ମର ସଜ୍ଞା ଉପରେ
15
+ ଏପର୍ଯ୍ୟନ୍ତ କୌଣସି ଅଧ୍ୟୟନପ୍ରସୂତ ସହମତି ନାହିଁ । ୪୪"
16
+ s2.png,"ଧର୍ମ ହେଉଛି ନିର୍ଦ୍ଦିଷ୍ଟ ଆଚରଣ ଏବଂ ଅଭ୍ୟାସ, ବିଶ୍ବ ଦୃଷ୍ଟିକୋଣ, ଦିବ୍ୟଗ୍ରନ୍ଥ, ତୀର୍ଥ, ଭବିଷ୍ୟବାଣୀ,
17
+ ନୈତିକତା ଇତ୍ୟାଦି ଯାହା ମାନବକୁ ଆଲୌକିକ, ଆଧ୍ୟୟନିକ କିମ୍ବା ଆଧ୍ୟାତ୍ମିକ ଉପାଦାନ ସହିତ
18
+ ଜଡ଼ିତ କରୁଥିବା ସାମାଜିକ-ସାଂସ୍କୃତିକ ବ୍ୟବସ୍ଥା । ତେବେ ସୃଷ୍ଟ ଭାବରେ ଧର୍ମର ସଜ୍ଞା ଉପରେ
19
+ ଏପର୍ଯ୍ୟନ୍ତ କୌଣସି ଅଧ୍ୟୟନପ୍ରସୂତ ସହମତି ନାହିଁ ।","ଧର୍ମ ହେଉଛି ନିର୍ଦ୍ଦିଷ୍ଟ ଆଚରଣ ଏବଂ ଅଭ୍ୟାସ, ବିଶ୍ବ ଦୃଷ୍ଟିକୋଣ, ଦିବ୍ୟଗ୍ରନ୍ଥ, ତୀର୍ଥ, ଭବିଷ୍ୟବାଣୀ,
20
+ ନୈତିକତା ଇତ୍ୟାଦି ଯାହା ମାନବକୁ ଆଲୌକିକ, ଆଧ୍ୟୟନିକ କିମ୍ବା ଆଧ୍ୟାତ୍ମିକ ଉପାଦାନ ସହିତ
21
+ ଜଡ଼ିତ କରୁଥିବା ସାମାଜିକ-ସାଂସ୍କୃତିକ ବ୍ୟବସ୍ଥା । ତେବେ ସୃଷ୍ଟ ଭାବରେ ଧର୍ମର ସଜ୍ଞା ଉପରେ
22
+ ଏପର୍ଯ୍ୟନ୍ତ କୌଣସି ଅଧ୍ୟୟନପ୍ରସୂତ ସହମତି ନାହିଁ । ୪୪"
backend/data/annotations/annotations.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "s1.png": {
3
+ "extracted_text": "ଉପକ୍ରମ [ ସମ୍ପାଦନା ]\nବିଜ୍ଞାନ ହେଉଛି ଏକ ସୁବ୍ୟବସ୍ଥିତ ପ୍ରଣାଳୀ ଯାହାଦ୍ବାରା ବିଶ୍ବର ସମସ୍ତ ପଦାର୍ଥ ଓ ବସ୍ତୁମାନଙ୍କ ବିଷୟରେ ଆଲୋଚନା, ପରୀକ୍ଷା,\nଗବେଷଣା, ଅନୁମାନ ଏବଂ ପଠନ କରାଯାଇଥାଏ । ଆଧୁନିକ ଯୁଗରେ ବିଜ୍ଞାନ କହିଲେ ସାଧାରଣତଃ କିଛି ଶିଖୁବା ଶୈଳୀକୁ\nବୁଝାଇଥାଏ",
4
+ "validated_text": "ଉପକ୍ରମ [ ସମ୍ପାଦନା ] ୩୩\nବିଜ୍ଞାନ ହେଉଛି ଏକ ସୁବ୍ୟବସ୍ଥିତ ପ୍ରଣାଳୀ ଯାହାଦ୍ବାରା ବିଶ୍ବର ସମସ୍ତ ପଦାର୍ଥ ଓ ବସ୍ତୁମାନଙ୍କ ବିଷୟରେ ଆଲୋଚନା, ପରୀକ୍ଷା,\nଗବେଷଣା, ଅନୁମାନ ଏବଂ ପଠନ କରାଯାଇଥାଏ । ଆଧୁନିକ ଯୁଗରେ ବିଜ୍ଞାନ କହିଲେ ସାଧାରଣତଃ କିଛି ଶିଖୁବା ଶୈଳୀକୁ\nବୁଝାଇଥାଏ"
5
+ },
6
+ "s2.png": {
7
+ "extracted_text": "ଧର୍ମ ହେଉଛି ନିର୍ଦ୍ଦିଷ୍ଟ ଆଚରଣ ଏବଂ ଅଭ୍ୟାସ, ବିଶ୍ବ ଦୃଷ୍ଟିକୋଣ, ଦିବ୍ୟଗ୍ରନ୍ଥ, ତୀର୍ଥ, ଭବିଷ୍ୟବାଣୀ,\nନୈତିକତା ଇତ୍ୟାଦି ଯାହା ମାନବକୁ ଆଲୌକିକ, ଆଧ୍ୟୟନିକ କିମ୍ବା ଆଧ୍ୟାତ୍ମିକ ଉପାଦାନ ସହିତ\nଜଡ଼ିତ କରୁଥିବା ସାମାଜିକ-ସାଂସ୍କୃତିକ ବ୍ୟବସ୍ଥା । ତେବେ ସୃଷ୍ଟ ଭାବରେ ଧର୍ମର ସଜ୍ଞା ଉପରେ\nଏପର୍ଯ୍ୟନ୍ତ କୌଣସି ଅଧ୍ୟୟନପ୍ରସୂତ ସହମତି ନାହିଁ ।",
8
+ "validated_text": "ଧର୍ମ ହେଉଛି ନିର୍ଦ୍ଦିଷ୍ଟ ଆଚରଣ ଏବଂ ଅଭ୍ୟାସ, ବିଶ୍ବ ଦୃଷ୍ଟିକୋଣ, ଦିବ୍ୟଗ୍ରନ୍ଥ, ତୀର୍ଥ, ଭବିଷ୍ୟବାଣୀ,\nନୈତିକତା ଇତ୍ୟାଦି ଯାହା ମାନବକୁ ଆଲୌକିକ, ଆଧ୍ୟୟନିକ କିମ୍ବା ଆଧ୍ୟାତ୍ମିକ ଉପାଦାନ ସହିତ\nଜଡ଼ିତ କରୁଥିବା ସାମାଜିକ-ସାଂସ୍କୃତିକ ବ୍ୟବସ୍ଥା । ତେବେ ସୃଷ୍ଟ ଭାବରେ ଧର୍ମର ସଜ୍ଞା ଉପରେ\nଏପର୍ଯ୍ୟନ୍ତ କୌଣସି ଅଧ୍ୟୟନପ୍ରସୂତ ସହମତି ନାହିଁ । ୪୪"
9
+ }
10
+ }
backend/data/uploaded_images/s1.png ADDED
backend/data/uploaded_images/s1_1.png ADDED
backend/data/uploaded_images/s2.png ADDED
backend/requirements.txt ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ python-multipart
4
+ pydantic
5
+ google-api-core>=2.0.0
6
+ google-generativeai
7
+
8
+ # Image/text generation stack
9
+ opencv-python
10
+ pillow
11
+ numpy
12
+ scipy
13
+ noise
14
+ pandas
15
+ requests
16
+ matplotlib
17
+ datasets
content/static/NotoSansOriya-Black (2).ttf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb33fbf1d96373a315468ba4087645cac7fbf3b7f0da9cc5a7fb8d6bbc79f7e3
3
+ size 142472