vasanthbanoth commited on
Commit
44a967a
·
verified ·
1 Parent(s): 7c15617

Delete src

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +0 -350
src/streamlit_app.py DELETED
@@ -1,350 +0,0 @@
1
- import os
2
- import shutil
3
- import uuid
4
- import asyncio
5
- from fastapi import FastAPI, File, UploadFile, Form, Request
6
- from contextlib import asynccontextmanager
7
- from fastapi.staticfiles import StaticFiles
8
- from fastapi.templating import Jinja2Templates
9
- from fastapi.responses import HTMLResponse, JSONResponse, RedirectResponse
10
- from fastapi.middleware.cors import CORSMiddleware
11
- import os
12
- import shutil
13
- import uuid
14
- from ultralytics import YOLO
15
- from helpers.image_processing import process_newspaper_image
16
- import logging
17
- import json
18
- from gtts import gTTS
19
- import google.generativeai as genai
20
- from pathlib import Path
21
- from starlette.staticfiles import StaticFiles
22
- logging.basicConfig(level=logging.INFO)
23
- logger = logging.getLogger(__name__)
24
- @asynccontextmanager
25
- async def lifespan(app: FastAPI):
26
- # Startup
27
- # Note: process_task_worker is defined later but available at runtime
28
- asyncio.create_task(process_task_worker())
29
- logger.info("Background task worker started.")
30
- yield
31
- # Shutdown (if needed)
32
- logger.info("Shutting down...")
33
-
34
- app = FastAPI(
35
- title="Newspaper Text Detection and Recognition",
36
- description="Web application for detecting and recognizing text in newspaper images",
37
- version="1.0.0",
38
- lifespan=lifespan
39
- )
40
- app.add_middleware(
41
- CORSMiddleware,
42
- allow_origins=["*"],
43
- allow_credentials=True,
44
- allow_methods=["*"],
45
- allow_headers=["*"],
46
- )
47
-
48
- MODEL_PATH = "best_30_epochs.pt"
49
- UPLOAD_DIR = "uploads"
50
- PROCESSED_DIR = "processed_files"
51
- STATUS_FILE = "status.json"
52
- LOG_FILE = "logs.json"
53
-
54
- os.makedirs(UPLOAD_DIR, exist_ok=True)
55
- os.makedirs(PROCESSED_DIR, exist_ok=True)
56
-
57
- templates = Jinja2Templates(directory="templates")
58
-
59
- app.mount("/static", StaticFiles(directory="static"), name="static")
60
- app.mount("/processed_files", StaticFiles(directory=PROCESSED_DIR), name="processed_files")
61
-
62
- GEMINI_API_KEY = "AIzaSyBVVKW7jJaxndA04RDgqR0WxmyRA5ajby4"
63
- GEMINI_MODEL_NAME = "gemini-1.5-flash"
64
-
65
- genai.configure(api_key=GEMINI_API_KEY)
66
-
67
- try:
68
- gemini_model = genai.GenerativeModel(GEMINI_MODEL_NAME)
69
- logger.info(f"Gemini model '{GEMINI_MODEL_NAME}' loaded successfully.")
70
- except Exception as e:
71
- logger.error(f"Error loading Gemini model '{GEMINI_MODEL_NAME}': {e}")
72
- gemini_model = None
73
-
74
- model = None
75
- try:
76
- model = YOLO(MODEL_PATH)
77
- logger.info("YOLO model loaded successfully.")
78
- except Exception as e:
79
- logger.error(f"Error loading YOLO model: {e}")
80
-
81
- task_queue = asyncio.Queue()
82
-
83
- task_status = {}
84
-
85
- async def process_task_worker():
86
- while True:
87
- task_data = await task_queue.get()
88
- upload_id = task_data["upload_id"]
89
- file_path_temp = task_data["file_path_temp"]
90
- processed_output_dir = task_data["processed_output_dir"]
91
- upload_id_str = str(upload_id)
92
- task_status[upload_id_str]["status"] = "processing"
93
- task_status[upload_id_str]["logs"].append("Task started.")
94
- logger.info(f"Processing task {upload_id}")
95
- try:
96
- if model is None:
97
- task_status[upload_id_str]["status"] = "failed"
98
- task_status[upload_id_str][
99
- "error"
100
- ] = "Image processing model failed to load on startup."
101
- task_status[upload_id_str]["logs"].append(
102
- "Task failed: Image processing model not loaded."
103
- )
104
- logger.error(f"Task {upload_id_str} failed: YOLO model not loaded.")
105
- else:
106
- processed_results = await process_newspaper_image(
107
- file_path_temp,
108
- processed_output_dir,
109
- model,
110
- upload_id_str,
111
- task_status,
112
- task_data.get("language", "en"),
113
- )
114
- task_status[upload_id_str]["status"] = "completed"
115
- task_status[upload_id_str]["results"] = processed_results
116
- task_status[upload_id_str]["logs"].append(
117
- "Task completed successfully."
118
- )
119
- logger.info(f"Task {upload_id} completed.")
120
- except Exception as e:
121
- task_status[upload_id_str]["status"] = "failed"
122
- task_status[upload_id_str]["error"] = str(e)
123
- task_status[upload_id_str]["logs"].append(f"Task failed: {e}")
124
- logger.error(f"Task {upload_id} failed: {e}")
125
- finally:
126
- if os.path.exists(os.path.dirname(file_path_temp)):
127
- shutil.rmtree(os.path.dirname(file_path_temp))
128
- task_status[upload_id_str]["logs"].append(
129
- "Cleaned up temporary upload directory."
130
- )
131
- task_queue.task_done()
132
-
133
-
134
-
135
- @app.get("/", response_class=HTMLResponse)
136
- async def get_upload_form(request: Request):
137
- return templates.TemplateResponse("index.html", {"request": request})
138
-
139
- @app.post("/process/")
140
- async def process_upload(
141
- request: Request, file: UploadFile = File(...), language: str = Form("en")
142
- ):
143
- if model is None:
144
- return JSONResponse(
145
- status_code=500,
146
- content={
147
- "upload_id": "error",
148
- "error_message": "Image processing model failed to load. Please check server logs.",
149
- },
150
- )
151
- upload_id = uuid.uuid4()
152
- upload_id_str = str(upload_id)
153
- upload_dir_temp = os.path.join(UPLOAD_DIR, upload_id_str)
154
- processed_output_dir = os.path.join(PROCESSED_DIR, upload_id_str)
155
- os.makedirs(upload_dir_temp, exist_ok=True)
156
- os.makedirs(processed_output_dir, exist_ok=True)
157
- if file.filename is None:
158
- if os.path.exists(upload_dir_temp):
159
- shutil.rmtree(upload_dir_temp)
160
- if os.path.exists(processed_output_dir):
161
- shutil.rmtree(processed_output_dir)
162
- return JSONResponse(
163
- status_code=400,
164
- content={
165
- "upload_id": "error",
166
- "error_message": "No filename provided for the uploaded file.",
167
- },
168
- )
169
- file_path_temp = os.path.join(upload_dir_temp, file.filename)
170
- try:
171
- with open(file_path_temp, "wb") as buffer:
172
- shutil.copyfileobj(file.file, buffer)
173
- task_status[upload_id_str] = {
174
- "status": "pending",
175
- "progress": 0.0,
176
- "logs": [f"Received file: {file.filename}. Adding to queue."],
177
- "results": [],
178
- "error": None,
179
- "language": language,
180
- }
181
- logger.info(
182
- f"Received file {file.filename}, assigning ID {upload_id_str}, adding to queue with language '{language}'."
183
- )
184
- await task_queue.put(
185
- {
186
- "upload_id": upload_id,
187
- "file_path_temp": file_path_temp,
188
- "processed_output_dir": processed_output_dir,
189
- "language": language,
190
- }
191
- )
192
- return RedirectResponse(url=f"/status/{upload_id_str}", status_code=303)
193
- except Exception as e:
194
- if os.path.exists(upload_dir_temp):
195
- shutil.rmtree(upload_dir_temp)
196
- if os.path.exists(processed_output_dir):
197
- shutil.rmtree(processed_output_dir)
198
- error_id = str(uuid.uuid4())
199
- task_status[error_id] = {
200
- "status": "failed",
201
- "progress": 0.0,
202
- "logs": [f"Failed to receive or queue file: {e}"],
203
- "results": [],
204
- "error": str(e),
205
- }
206
- logger.error(f"Failed to receive or queue file: {e}")
207
- return JSONResponse(
208
- status_code=500, content={"upload_id": error_id, "error_message": str(e)}
209
- )
210
-
211
- @app.get("/status/{upload_id}", response_class=HTMLResponse)
212
- async def get_status_page(request: Request, upload_id: str):
213
- if upload_id not in task_status:
214
- if not os.path.exists(os.path.join(PROCESSED_DIR, upload_id)):
215
- return templates.TemplateResponse(
216
- "error.html",
217
- {
218
- "request": request,
219
- "error_message": f"Invalid or expired Upload ID: {upload_id}",
220
- },
221
- status_code=404,
222
- )
223
- return templates.TemplateResponse(
224
- "status.html", {"request": request, "upload_id": upload_id}
225
- )
226
-
227
- @app.get("/api/status/{upload_id}")
228
- async def get_task_status(upload_id: str):
229
- status = task_status.get(upload_id)
230
- if status:
231
- return JSONResponse(status)
232
- if os.path.exists(os.path.join(PROCESSED_DIR, upload_id)):
233
- return JSONResponse(
234
- status_code=404,
235
- content={
236
- "status": "unknown",
237
- "error": "Task status not found, but directory exists. Server may have restarted.",
238
- },
239
- )
240
- return JSONResponse(status_code=404, content={"status": "not_found"})
241
-
242
- @app.get("/api/logs/{upload_id}")
243
- async def get_task_logs(upload_id: str):
244
- status = task_status.get(upload_id)
245
- if status:
246
- return JSONResponse({"logs": status.get("logs", [])})
247
- return JSONResponse(status_code=404, content={"logs": []})
248
-
249
- @app.get("/api/generate_audio/{upload_id}/{box_index}")
250
- async def generate_audio(upload_id: str, box_index: int):
251
- task = task_status.get(upload_id)
252
- if not task or task["status"] != "completed":
253
- return JSONResponse(
254
- status_code=404, content={"error": "Task not found or not completed."}
255
- )
256
- box_result = None
257
- for item in task["results"]:
258
- if item.get("box_index") == box_index:
259
- box_result = item
260
- break
261
- if not box_result or not box_result.get("text"):
262
- return JSONResponse(
263
- status_code=404, content={"error": "Box not found or no text extracted."}
264
- )
265
- cleaned_text = box_result["text"]
266
- selected_language = task.get("language", "en")
267
- gtts_lang = "en"
268
- if selected_language == "hi":
269
- gtts_lang = "hi"
270
- elif selected_language == "te":
271
- gtts_lang = "te"
272
- processed_output_dir = os.path.join(PROCESSED_DIR, upload_id)
273
- audio_filename = f"output_box_{box_index}.mp3"
274
- audio_file_path_abs = os.path.join(processed_output_dir, audio_filename)
275
- if os.path.exists(audio_file_path_abs):
276
- audio_path_url = f"/processed_files/{upload_id}/{audio_filename}"
277
- logger.info(
278
- f"Audio already exists for task {upload_id}, box {box_index}. Returning existing."
279
- )
280
- return JSONResponse({"audio_path": audio_path_url})
281
- try:
282
- tts = gTTS(text=cleaned_text, lang=gtts_lang, slow=False)
283
- tts.save(audio_file_path_abs)
284
- audio_path_url = f"/processed_files/{upload_id}/{audio_filename}"
285
- logger.info(f"Generated audio for task {upload_id}, box {box_index}.")
286
- box_result["audio_path"] = audio_path_url
287
- return JSONResponse({"audio_path": audio_path_url})
288
- except Exception as e:
289
- logger.error(
290
- f"Error generating audio for task {upload_id}, box {box_index}: {e}"
291
- )
292
- return JSONResponse(
293
- status_code=500, content={"error": f"Failed to generate audio: {e}"}
294
- )
295
-
296
- @app.get("/api/summarize/{upload_id}/{box_index}")
297
- async def summarize_text(upload_id: str, box_index: int):
298
- if gemini_model is None:
299
- return JSONResponse(
300
- status_code=500, content={"error": "Gemini model failed to load."}
301
- )
302
- task = task_status.get(upload_id)
303
- if not task or task["status"] != "completed":
304
- return JSONResponse(
305
- status_code=404, content={"error": "Task not found or not completed."}
306
- )
307
- box_result = None
308
- for item in task["results"]:
309
- if item.get("box_index") == box_index:
310
- box_result = item
311
- break
312
- if not box_result or not box_result.get("text"):
313
- return JSONResponse(
314
- status_code=404, content={"error": "Box not found or no text extracted."}
315
- )
316
- cleaned_text = box_result["text"]
317
- if box_result.get("summary"):
318
- logger.info(
319
- f"Summary already exists for task {upload_id}, box {box_index}. Returning existing."
320
- )
321
- return JSONResponse({"summary": box_result["summary"]})
322
- try:
323
- prompt = f"Summarize the following text:\n\n{cleaned_text}"
324
- response = await asyncio.to_thread(gemini_model.generate_content, prompt)
325
- summary = response.text.strip()
326
- logger.info(f"Generated summary for task {upload_id}, box {box_index}.")
327
- box_result["summary"] = summary
328
- return JSONResponse({"summary": summary})
329
- except Exception as e:
330
- logger.error(
331
- f"Error generating summary for task {upload_id}, box {box_index}: {e}"
332
- )
333
- return JSONResponse(
334
- status_code=500, content={"error": f"Failed to generate summary: {e}"}
335
- )
336
-
337
- @app.get("/list_uploads/", response_class=HTMLResponse)
338
- async def list_uploads(request: Request):
339
- upload_ids = [
340
- d
341
- for d in os.listdir(PROCESSED_DIR)
342
- if os.path.isdir(os.path.join(PROCESSED_DIR, d))
343
- ]
344
- return templates.TemplateResponse(
345
- "list_uploads.html", {"request": request, "upload_ids": upload_ids}
346
- )
347
-
348
- if __name__ == "__main__":
349
- import uvicorn
350
- uvicorn.run("main:app", host="0.0.0.0", port=8000)