jo8780 commited on
Commit
877ea34
·
1 Parent(s): 182f0b8

Switch /health to call analyzer.health_check() for per-component status

Browse files

Restores the richer health response with tamper_detector, script_detector,
ocr_router, image_processor, paddle_parser, ml_extractor components.
Reverts to Depends(get_analyzer) synchronous singleton pattern.

Files changed (1) hide show
  1. app/api/routes.py +53 -74
app/api/routes.py CHANGED
@@ -4,48 +4,46 @@ from typing import Optional
4
  import uuid
5
  import logging
6
  from datetime import datetime
7
- # from app.analyzers.certificate_analyzer import ProductionCertificateAnalyzer # REMOVED: Triggers heavy imports too early
8
  from app.utils.cache import get_redis_client
9
  from .schemas import (
10
  AnalysisRequest, AnalysisResponse, HealthResponse, ErrorResponse,
11
  SyntheticGenerationRequest, SyntheticGenerationResponse,
12
  analyzer_to_response # Import the helper function
13
  )
14
- from app.utils.analyzer_singleton import get_analyzer_instance
15
 
16
  logger = logging.getLogger(__name__)
17
 
18
  router = APIRouter(prefix="/api/v1", tags=["certificate-analysis"])
19
 
 
 
 
20
  def get_analyzer():
21
  """Get analyzer instance (singleton)"""
22
- return get_analyzer_instance()
 
 
 
23
 
24
  @router.get("/health", response_model=HealthResponse)
25
- async def health_check():
26
- """Health check endpoint - Non-blocking"""
 
 
27
  try:
28
- from app.utils.analyzer_singleton import _analyzer_instance
29
- analyzer = _analyzer_instance
30
-
31
- health_data = {
32
- "status": "healthy" if analyzer else "initializing",
33
- "ready": analyzer is not None,
34
- "components": {
35
- "analyzer": "ready" if analyzer else "initializing",
36
- "redis": "unknown"
37
- },
38
- "timestamp": datetime.now().isoformat(),
39
- "version": analyzer.model_version if analyzer else "loading"
40
- }
41
  # Check Redis health
42
  redis_client = await get_redis_client()
43
  redis_stats = await redis_client.get_stats()
44
-
45
  health_data["components"]["redis"] = redis_stats.get("status", "unknown")
46
-
47
  return HealthResponse(**health_data)
48
-
49
  except Exception as e:
50
  logger.error(f"Health check failed: {e}")
51
  return JSONResponse(
@@ -62,39 +60,34 @@ async def analyze_upload(
62
  file: UploadFile = File(...),
63
  provider_id: str = "default",
64
  language_hint: Optional[str] = None,
65
- background_tasks: BackgroundTasks = None
 
66
  ):
67
  """Analyze uploaded certificate"""
68
- analyzer = get_analyzer()
69
- if analyzer is None:
70
- raise HTTPException(
71
- status_code=503,
72
- detail="Analyzer is still initializing. Please try again in 30 seconds."
73
- )
74
  try:
75
  # Validate file
76
  if not file.filename:
77
  raise HTTPException(status_code=400, detail="No filename provided")
78
-
79
  # Check file size
80
  content = await file.read()
81
- if len(content) > 100 * 1024 * 1024:
82
  raise HTTPException(status_code=400, detail="File too large (max 100MB)")
83
-
84
  # Reset file pointer
85
  await file.seek(0)
86
-
87
  # Generate request ID
88
  request_id = str(uuid.uuid4())[:12]
89
-
90
  logger.info(f"Processing upload: {file.filename}, request_id: {request_id}")
91
-
92
  # Analyze certificate
93
  result = await analyzer.analyze_certificate_file(file, provider_id, request_id)
94
-
95
  # Convert to proper response format using the helper function
96
  response = analyzer_to_response(result)
97
-
98
  # Store in background if needed (use the original result dict)
99
  if background_tasks and result.get('authenticity_score', 0) > 0.5:
100
  background_tasks.add_task(
@@ -102,9 +95,9 @@ async def analyze_upload(
102
  result,
103
  "upload"
104
  )
105
-
106
  return response
107
-
108
  except HTTPException:
109
  raise
110
  except Exception as e:
@@ -114,42 +107,28 @@ async def analyze_upload(
114
  @router.post("/analyze/url", response_model=AnalysisResponse)
115
  async def analyze_url(
116
  request: AnalysisRequest,
117
- background_tasks: BackgroundTasks = None
 
118
  ):
119
  """Analyze certificate from URL"""
120
- analyzer = get_analyzer()
121
- if analyzer is None:
122
- raise HTTPException(
123
- status_code=503,
124
- detail="Analyzer is still initializing. Please try again in 30 seconds."
125
- )
126
  try:
127
  if not request.document_url:
128
  raise HTTPException(status_code=400, detail="document_url is required")
129
-
130
  request_id = str(uuid.uuid4())[:12]
131
-
132
  logger.info(f"Processing URL: {request.document_url[:100]}...")
133
- import psutil
134
- process = psutil.Process()
135
- mem_info = process.memory_info()
136
- logger.info(f"🧠 Current Memory Usage: {mem_info.rss / 1024 / 1024:.2f} MB")
137
-
138
- start_time = datetime.now()
139
-
140
  # Analyze certificate
141
  result = await analyzer.analyze_certificate_url(
142
  request.document_url,
143
  request.provider_id,
144
  request_id
145
  )
146
-
147
- duration = (datetime.now() - start_time).total_seconds()
148
- logger.info(f"URL analysis completed in {duration:.2f}s for request_id: {request_id}")
149
-
150
  # Convert to proper response format using the helper function
151
  response = analyzer_to_response(result)
152
-
153
  # Store in background (use the original result dict)
154
  if background_tasks:
155
  background_tasks.add_task(
@@ -157,9 +136,9 @@ async def analyze_url(
157
  result,
158
  "url"
159
  )
160
-
161
  return response
162
-
163
  except HTTPException:
164
  raise
165
  except Exception as e:
@@ -174,14 +153,14 @@ async def generate_synthetic_data(
174
  """Generate synthetic training data"""
175
  try:
176
  from app.analyzers.synthetic_generator import generate_optimized_dataset
177
-
178
  # Start generation in background
179
  background_tasks.add_task(
180
  generate_synthetic_background,
181
  request.num_samples,
182
  request.tampering_ratio
183
  )
184
-
185
  return SyntheticGenerationResponse(
186
  status="started",
187
  samples_generated=0,
@@ -189,7 +168,7 @@ async def generate_synthetic_data(
189
  tampering_ratio=request.tampering_ratio,
190
  processing_time=0.0
191
  )
192
-
193
  except Exception as e:
194
  logger.error(f"Synthetic generation failed: {e}")
195
  raise HTTPException(status_code=500, detail=str(e))
@@ -202,18 +181,18 @@ async def get_statistics(
202
  try:
203
  # Get OCR statistics
204
  ocr_stats = analyzer.ocr_router.get_statistics() if hasattr(analyzer, 'ocr_router') else {}
205
-
206
  # Get Redis statistics
207
  redis_client = await get_redis_client()
208
  redis_stats = await redis_client.get_stats()
209
-
210
  return {
211
  "timestamp": datetime.now().isoformat(),
212
  "ocr_statistics": ocr_stats,
213
  "redis_statistics": redis_stats,
214
  "analyzer_version": analyzer.model_version
215
  }
216
-
217
  except Exception as e:
218
  raise HTTPException(status_code=500, detail=str(e))
219
 
@@ -223,7 +202,7 @@ async def store_analysis_result(result: dict, source: str):
223
  # This would connect to your database
224
  # For now, just log it
225
  logger.info(f"Storing analysis result for {result.get('analysis_id')} from {source}")
226
-
227
  # Example: Store in Redis for quick access
228
  redis_client = await get_redis_client()
229
  await redis_client.set(
@@ -231,7 +210,7 @@ async def store_analysis_result(result: dict, source: str):
231
  result,
232
  ttl=86400 # 24 hours
233
  )
234
-
235
  except Exception as e:
236
  logger.error(f"Failed to store analysis result: {e}")
237
 
@@ -239,16 +218,16 @@ async def generate_synthetic_background(num_samples: int, tampering_ratio: float
239
  """Background task for synthetic data generation"""
240
  try:
241
  from app.analyzers.synthetic_generator import generate_optimized_dataset
242
-
243
  logger.info(f"Starting synthetic data generation: {num_samples} samples")
244
-
245
  output_dir = generate_optimized_dataset(
246
  num_samples=num_samples,
247
  tampering_ratio=tampering_ratio,
248
  use_parallel=True
249
  )
250
-
251
  logger.info(f"Synthetic data generation complete: {output_dir}")
252
-
253
  except Exception as e:
254
- logger.error(f"Synthetic generation background task failed: {e}")
 
4
  import uuid
5
  import logging
6
  from datetime import datetime
7
+ from app.analyzers.certificate_analyzer import ProductionCertificateAnalyzer
8
  from app.utils.cache import get_redis_client
9
  from .schemas import (
10
  AnalysisRequest, AnalysisResponse, HealthResponse, ErrorResponse,
11
  SyntheticGenerationRequest, SyntheticGenerationResponse,
12
  analyzer_to_response # Import the helper function
13
  )
 
14
 
15
  logger = logging.getLogger(__name__)
16
 
17
  router = APIRouter(prefix="/api/v1", tags=["certificate-analysis"])
18
 
19
+ # Initialize analyzer (singleton)
20
+ _analyzer = None
21
+
22
  def get_analyzer():
23
  """Get analyzer instance (singleton)"""
24
+ global _analyzer
25
+ if _analyzer is None:
26
+ _analyzer = ProductionCertificateAnalyzer(use_ml=True)
27
+ return _analyzer
28
 
29
  @router.get("/health", response_model=HealthResponse)
30
+ async def health_check(
31
+ analyzer = Depends(get_analyzer)
32
+ ):
33
+ """Health check endpoint"""
34
  try:
35
+ #health_data = await analyzer.health_check()
36
+ health_data = analyzer.health_check()
37
+ health_data["timestamp"] = datetime.now().isoformat()
38
+ health_data["version"] = analyzer.model_version
 
 
 
 
 
 
 
 
 
39
  # Check Redis health
40
  redis_client = await get_redis_client()
41
  redis_stats = await redis_client.get_stats()
42
+
43
  health_data["components"]["redis"] = redis_stats.get("status", "unknown")
44
+
45
  return HealthResponse(**health_data)
46
+
47
  except Exception as e:
48
  logger.error(f"Health check failed: {e}")
49
  return JSONResponse(
 
60
  file: UploadFile = File(...),
61
  provider_id: str = "default",
62
  language_hint: Optional[str] = None,
63
+ background_tasks: BackgroundTasks = None,
64
+ analyzer = Depends(get_analyzer)
65
  ):
66
  """Analyze uploaded certificate"""
 
 
 
 
 
 
67
  try:
68
  # Validate file
69
  if not file.filename:
70
  raise HTTPException(status_code=400, detail="No filename provided")
71
+
72
  # Check file size
73
  content = await file.read()
74
+ if len(content) > 100 * 1024 * 1024:
75
  raise HTTPException(status_code=400, detail="File too large (max 100MB)")
76
+
77
  # Reset file pointer
78
  await file.seek(0)
79
+
80
  # Generate request ID
81
  request_id = str(uuid.uuid4())[:12]
82
+
83
  logger.info(f"Processing upload: {file.filename}, request_id: {request_id}")
84
+
85
  # Analyze certificate
86
  result = await analyzer.analyze_certificate_file(file, provider_id, request_id)
87
+
88
  # Convert to proper response format using the helper function
89
  response = analyzer_to_response(result)
90
+
91
  # Store in background if needed (use the original result dict)
92
  if background_tasks and result.get('authenticity_score', 0) > 0.5:
93
  background_tasks.add_task(
 
95
  result,
96
  "upload"
97
  )
98
+
99
  return response
100
+
101
  except HTTPException:
102
  raise
103
  except Exception as e:
 
107
  @router.post("/analyze/url", response_model=AnalysisResponse)
108
  async def analyze_url(
109
  request: AnalysisRequest,
110
+ background_tasks: BackgroundTasks = None,
111
+ analyzer = Depends(get_analyzer)
112
  ):
113
  """Analyze certificate from URL"""
 
 
 
 
 
 
114
  try:
115
  if not request.document_url:
116
  raise HTTPException(status_code=400, detail="document_url is required")
117
+
118
  request_id = str(uuid.uuid4())[:12]
119
+
120
  logger.info(f"Processing URL: {request.document_url[:100]}...")
121
+
 
 
 
 
 
 
122
  # Analyze certificate
123
  result = await analyzer.analyze_certificate_url(
124
  request.document_url,
125
  request.provider_id,
126
  request_id
127
  )
128
+
 
 
 
129
  # Convert to proper response format using the helper function
130
  response = analyzer_to_response(result)
131
+
132
  # Store in background (use the original result dict)
133
  if background_tasks:
134
  background_tasks.add_task(
 
136
  result,
137
  "url"
138
  )
139
+
140
  return response
141
+
142
  except HTTPException:
143
  raise
144
  except Exception as e:
 
153
  """Generate synthetic training data"""
154
  try:
155
  from app.analyzers.synthetic_generator import generate_optimized_dataset
156
+
157
  # Start generation in background
158
  background_tasks.add_task(
159
  generate_synthetic_background,
160
  request.num_samples,
161
  request.tampering_ratio
162
  )
163
+
164
  return SyntheticGenerationResponse(
165
  status="started",
166
  samples_generated=0,
 
168
  tampering_ratio=request.tampering_ratio,
169
  processing_time=0.0
170
  )
171
+
172
  except Exception as e:
173
  logger.error(f"Synthetic generation failed: {e}")
174
  raise HTTPException(status_code=500, detail=str(e))
 
181
  try:
182
  # Get OCR statistics
183
  ocr_stats = analyzer.ocr_router.get_statistics() if hasattr(analyzer, 'ocr_router') else {}
184
+
185
  # Get Redis statistics
186
  redis_client = await get_redis_client()
187
  redis_stats = await redis_client.get_stats()
188
+
189
  return {
190
  "timestamp": datetime.now().isoformat(),
191
  "ocr_statistics": ocr_stats,
192
  "redis_statistics": redis_stats,
193
  "analyzer_version": analyzer.model_version
194
  }
195
+
196
  except Exception as e:
197
  raise HTTPException(status_code=500, detail=str(e))
198
 
 
202
  # This would connect to your database
203
  # For now, just log it
204
  logger.info(f"Storing analysis result for {result.get('analysis_id')} from {source}")
205
+
206
  # Example: Store in Redis for quick access
207
  redis_client = await get_redis_client()
208
  await redis_client.set(
 
210
  result,
211
  ttl=86400 # 24 hours
212
  )
213
+
214
  except Exception as e:
215
  logger.error(f"Failed to store analysis result: {e}")
216
 
 
218
  """Background task for synthetic data generation"""
219
  try:
220
  from app.analyzers.synthetic_generator import generate_optimized_dataset
221
+
222
  logger.info(f"Starting synthetic data generation: {num_samples} samples")
223
+
224
  output_dir = generate_optimized_dataset(
225
  num_samples=num_samples,
226
  tampering_ratio=tampering_ratio,
227
  use_parallel=True
228
  )
229
+
230
  logger.info(f"Synthetic data generation complete: {output_dir}")
231
+
232
  except Exception as e:
233
+ logger.error(f"Synthetic generation background task failed: {e}")