Hydra-Bolt commited on
Commit
0e65d5f
Β·
1 Parent(s): af78335
Files changed (5) hide show
  1. app.py +12 -12
  2. config.py +6 -7
  3. constants.py +12 -12
  4. routes.py +74 -66
  5. services.py +107 -65
app.py CHANGED
@@ -29,6 +29,7 @@ app.add_middleware(
29
  # Include API routes
30
  app.include_router(router)
31
 
 
32
  # Global exception handler
33
  @app.exception_handler(Exception)
34
  async def global_exception_handler(request, exc):
@@ -39,16 +40,16 @@ async def global_exception_handler(request, exc):
39
  status_code=500,
40
  content={
41
  "detail": f"Internal server error: {str(exc)}",
42
- "type": type(exc).__name__
43
- }
44
  )
45
  else:
46
  # In production, return generic error message
47
  return JSONResponse(
48
- status_code=500,
49
- content={"detail": "Internal server error"}
50
  )
51
 
 
52
  # Root endpoint
53
  @app.get("/", summary="Root endpoint")
54
  async def root():
@@ -60,10 +61,11 @@ async def root():
60
  "endpoints": {
61
  "extract_narrators": "/api/v1/extract-narrators",
62
  "analyze_narrator": "/api/v1/analyze-narrator",
63
- "health": "/api/v1/health"
64
- }
65
  }
66
 
 
67
  # Startup event
68
  @app.on_event("startup")
69
  async def startup_event():
@@ -71,23 +73,21 @@ async def startup_event():
71
  # Validate required environment variables
72
  if not settings.GOOGLE_API_KEY:
73
  raise ValueError("GOOGLE_API_KEY environment variable is required")
74
-
75
  print(f"Starting {settings.API_TITLE} v{settings.API_VERSION}")
76
  print(f"Environment: {settings.ENVIRONMENT}")
77
  print(f"Debug mode: {settings.DEBUG}")
78
 
 
79
  # Shutdown event
80
  @app.on_event("shutdown")
81
  async def shutdown_event():
82
  """Shutdown event handler."""
83
  print("Shutting down SanadCheck API")
84
 
 
85
  if __name__ == "__main__":
86
  # Run the application
87
  uvicorn.run(
88
- "app:app",
89
- host="0.0.0.0",
90
- port=8000,
91
- reload=settings.DEBUG,
92
- log_level="info"
93
  )
 
29
  # Include API routes
30
  app.include_router(router)
31
 
32
+
33
  # Global exception handler
34
  @app.exception_handler(Exception)
35
  async def global_exception_handler(request, exc):
 
40
  status_code=500,
41
  content={
42
  "detail": f"Internal server error: {str(exc)}",
43
+ "type": type(exc).__name__,
44
+ },
45
  )
46
  else:
47
  # In production, return generic error message
48
  return JSONResponse(
49
+ status_code=500, content={"detail": "Internal server error"}
 
50
  )
51
 
52
+
53
  # Root endpoint
54
  @app.get("/", summary="Root endpoint")
55
  async def root():
 
61
  "endpoints": {
62
  "extract_narrators": "/api/v1/extract-narrators",
63
  "analyze_narrator": "/api/v1/analyze-narrator",
64
+ "health": "/api/v1/health",
65
+ },
66
  }
67
 
68
+
69
  # Startup event
70
  @app.on_event("startup")
71
  async def startup_event():
 
73
  # Validate required environment variables
74
  if not settings.GOOGLE_API_KEY:
75
  raise ValueError("GOOGLE_API_KEY environment variable is required")
76
+
77
  print(f"Starting {settings.API_TITLE} v{settings.API_VERSION}")
78
  print(f"Environment: {settings.ENVIRONMENT}")
79
  print(f"Debug mode: {settings.DEBUG}")
80
 
81
+
82
  # Shutdown event
83
  @app.on_event("shutdown")
84
  async def shutdown_event():
85
  """Shutdown event handler."""
86
  print("Shutting down SanadCheck API")
87
 
88
+
89
  if __name__ == "__main__":
90
  # Run the application
91
  uvicorn.run(
92
+ "app:app", host="0.0.0.0", port=8000, reload=settings.DEBUG, log_level="info"
 
 
 
 
93
  )
config.py CHANGED
@@ -2,29 +2,28 @@ import os
2
  from typing import Optional
3
 
4
 
5
-
6
  class Settings:
7
  """Application settings."""
8
-
9
  # API Settings
10
  API_TITLE: str = "SanadCheck API"
11
  API_DESCRIPTION: str = "API for Hadith narrator analysis and validation"
12
  API_VERSION: str = "1.0.0"
13
-
14
  # Environment
15
  ENVIRONMENT: str = os.getenv("ENVIRONMENT", "development")
16
  DEBUG: bool = os.getenv("DEBUG", "True").lower() == "true"
17
-
18
  # Google AI
19
  GOOGLE_API_KEY: Optional[str] = os.getenv("GOOGLE_API_KEY")
20
-
21
  # Rate Limiting
22
  RATE_LIMIT_REQUESTS: int = int(os.getenv("RATE_LIMIT_REQUESTS", "100"))
23
  RATE_LIMIT_WINDOW: int = int(os.getenv("RATE_LIMIT_WINDOW", "3600")) # 1 hour
24
-
25
  # CORS
26
  ALLOWED_ORIGINS: list = os.getenv("ALLOWED_ORIGINS", "*").split(",")
27
-
28
  class Config:
29
  env_file = ".env"
30
 
 
2
  from typing import Optional
3
 
4
 
 
5
  class Settings:
6
  """Application settings."""
7
+
8
  # API Settings
9
  API_TITLE: str = "SanadCheck API"
10
  API_DESCRIPTION: str = "API for Hadith narrator analysis and validation"
11
  API_VERSION: str = "1.0.0"
12
+
13
  # Environment
14
  ENVIRONMENT: str = os.getenv("ENVIRONMENT", "development")
15
  DEBUG: bool = os.getenv("DEBUG", "True").lower() == "true"
16
+
17
  # Google AI
18
  GOOGLE_API_KEY: Optional[str] = os.getenv("GOOGLE_API_KEY")
19
+
20
  # Rate Limiting
21
  RATE_LIMIT_REQUESTS: int = int(os.getenv("RATE_LIMIT_REQUESTS", "100"))
22
  RATE_LIMIT_WINDOW: int = int(os.getenv("RATE_LIMIT_WINDOW", "3600")) # 1 hour
23
+
24
  # CORS
25
  ALLOWED_ORIGINS: list = os.getenv("ALLOWED_ORIGINS", "*").split(",")
26
+
27
  class Config:
28
  env_file = ".env"
29
 
constants.py CHANGED
@@ -65,17 +65,17 @@ Provide a clear, humble, and well-justified analysis combining Shamela data and
65
 
66
  # Synthesis prompt constant (use PromptTemplate with this constant)
67
  SYNTHESIS_PROMPT = (
68
- "As a hadith expert, analyze this complete chain of narrators and provide an overall assessment:\n\n"
69
- "INDIVIDUAL NARRATOR ANALYSES:\n{narrator_summaries}\n\n"
70
- "Provide an overall chain assessment considering:\n"
71
- "1. Weakest link principle - the chain is only as strong as its weakest narrator\n"
72
- "2. Cumulative reliability - multiple weak narrators compound the weakness\n"
73
- "3. Historical context and scholarly methodology\n"
74
- "4. Practical recommendations for hadith scholars\n\n"
75
- "Response format:\n"
76
- "- Overall Chain Grade: [Sahih/Hasan/Da'if/Mawdu']\n"
77
- "- Confidence Level: [High/Medium/Low]\n"
78
- "- Critical Issues: [Main concerns]\n"
79
  "- Recommendation: [Accept/Use with caution/Reject]\n"
80
  "- Reasoning: [Detailed explanation]\n"
81
- )
 
65
 
66
  # Synthesis prompt constant (use PromptTemplate with this constant)
67
  SYNTHESIS_PROMPT = (
68
+ "As a hadith expert, analyze this complete chain of narrators and provide an overall assessment:\n\n"
69
+ "INDIVIDUAL NARRATOR ANALYSES:\n{narrator_summaries}\n\n"
70
+ "Provide an overall chain assessment considering:\n"
71
+ "1. Weakest link principle - the chain is only as strong as its weakest narrator\n"
72
+ "2. Cumulative reliability - multiple weak narrators compound the weakness\n"
73
+ "3. Historical context and scholarly methodology\n"
74
+ "4. Practical recommendations for hadith scholars\n\n"
75
+ "Response format:\n"
76
+ "- Overall Chain Grade: [Sahih/Hasan/Da'if/Mawdu']\n"
77
+ "- Confidence Level: [High/Medium/Low]\n"
78
+ "- Critical Issues: [Main concerns]\n"
79
  "- Recommendation: [Accept/Use with caution/Reject]\n"
80
  "- Reasoning: [Detailed explanation]\n"
81
+ )
routes.py CHANGED
@@ -3,7 +3,7 @@ from fastapi.responses import JSONResponse
3
  from typing import List, Dict, Any
4
 
5
  from models import (
6
- HadithTextRequest,
7
  NarratorExtractionResponse,
8
  NarratorAnalysisRequest,
9
  NarratorAnalysisResponse,
@@ -13,7 +13,7 @@ from models import (
13
  ChainAnalysisMetadata,
14
  ExtractionResult,
15
  ChainAnalysisResult,
16
- ExtractAndAnalyzeMetadata
17
  )
18
  from services import get_llm_service
19
 
@@ -24,43 +24,43 @@ router = APIRouter(prefix="/api/v1", tags=["hadith-analysis"])
24
  "/extract-narrators",
25
  response_model=NarratorExtractionResponse,
26
  summary="Extract narrators from hadith text",
27
- description="Analyzes Arabic hadith text and extracts the chain of narrators (sanad)"
28
  )
29
  async def extract_narrators(request: HadithTextRequest) -> NarratorExtractionResponse:
30
  """
31
  Extract narrators from hadith text.
32
-
33
  This endpoint takes a complete hadith text in Arabic and uses AI to identify
34
  and extract the chain of narrators (sanad), returning individual narrator names
35
  that can be used for database searches.
36
-
37
  Args:
38
  request: Contains the hadith text to analyze
39
-
40
  Returns:
41
  NarratorExtractionResponse with extracted narrator names and chain
42
-
43
  Raises:
44
  HTTPException: If the analysis fails
45
  """
46
  try:
47
  llm_service = get_llm_service()
48
  result = await llm_service.extract_narrators(request.hadith_text)
49
-
50
  if not result.success:
51
  raise HTTPException(
52
  status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
53
- detail=f"Failed to extract narrators: {result.message}"
54
  )
55
-
56
  return result
57
-
58
  except HTTPException:
59
  raise
60
  except Exception as e:
61
  raise HTTPException(
62
  status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
63
- detail=f"Internal server error during narrator extraction: {str(e)}"
64
  )
65
 
66
 
@@ -68,43 +68,45 @@ async def extract_narrators(request: HadithTextRequest) -> NarratorExtractionRes
68
  "/analyze-narrator",
69
  response_model=NarratorAnalysisResponse,
70
  summary="Analyze narrator reliability",
71
- description="Takes a narrator name and generates an AI-powered reliability assessment based on the model's knowledge"
72
  )
73
- async def analyze_narrator(request: NarratorAnalysisRequest) -> NarratorAnalysisResponse:
 
 
74
  """
75
  Analyze narrator reliability based on the model's internal knowledge.
76
-
77
- This endpoint takes a narrator's name and uses AI to provide a comprehensive
78
- reliability assessment based on its knowledge of Islamic hadith criticism
79
  methodologies and historical narrator evaluations.
80
-
81
  Args:
82
  request: Contains the narrator name to analyze
83
-
84
  Returns:
85
  NarratorAnalysisResponse with reliability grade, biographical info, and detailed analysis
86
-
87
  Raises:
88
  HTTPException: If the analysis fails
89
  """
90
  try:
91
  llm_service = get_llm_service()
92
  result = await llm_service.analyze_narrator(request.narrator_name)
93
-
94
  if not result.success:
95
  raise HTTPException(
96
  status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
97
- detail=f"Failed to analyze narrator: {result.message}"
98
  )
99
-
100
  return result
101
-
102
  except HTTPException:
103
  raise
104
  except Exception as e:
105
  raise HTTPException(
106
  status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
107
- detail=f"Internal server error during narrator analysis: {str(e)}"
108
  )
109
 
110
 
@@ -112,22 +114,24 @@ async def analyze_narrator(request: NarratorAnalysisRequest) -> NarratorAnalysis
112
  "/analyze-narrator-chain",
113
  response_model=NarratorChainAnalysisResponse,
114
  summary="Analyze narrator chain",
115
- description="Analyzes a complete chain of narrators using enhanced Shamela data + LLM agent"
116
  )
117
- async def analyze_narrator_chain(narrator_names: List[str]) -> NarratorChainAnalysisResponse:
 
 
118
  """
119
  Analyze a complete chain of narrators with enhanced data sources.
120
-
121
  This endpoint takes a list of narrator names and uses the enhanced agent approach
122
  to analyze each narrator using both Shamela.ws data and LLM knowledge, then
123
  provides a synthesized assessment of the complete chain.
124
-
125
  Args:
126
  narrator_names: List of narrator names in the chain
127
-
128
  Returns:
129
  Dictionary containing individual analyses and chain synthesis
130
-
131
  Raises:
132
  HTTPException: If the analysis fails
133
  """
@@ -135,17 +139,17 @@ async def analyze_narrator_chain(narrator_names: List[str]) -> NarratorChainAnal
135
  if not narrator_names:
136
  raise HTTPException(
137
  status_code=status.HTTP_400_BAD_REQUEST,
138
- detail="narrator_names list cannot be empty"
139
  )
140
-
141
  llm_service = get_llm_service()
142
-
143
  # Analyze individual narrators
144
  chain_results = await llm_service.analyze_narrator_chain(narrator_names)
145
-
146
  # Synthesize chain analysis
147
  synthesis = await llm_service.synthesize_chain_analysis(chain_results)
148
-
149
  return NarratorChainAnalysisResponse(
150
  chain=narrator_names,
151
  individual_analyses={
@@ -159,7 +163,7 @@ async def analyze_narrator_chain(narrator_names: List[str]) -> NarratorChainAnal
159
  biographical_info=result.biographical_info,
160
  recommendation=result.recommendation,
161
  success=result.success,
162
- message=result.message
163
  )
164
  for name, result in chain_results.items()
165
  },
@@ -167,16 +171,16 @@ async def analyze_narrator_chain(narrator_names: List[str]) -> NarratorChainAnal
167
  metadata=ChainAnalysisMetadata(
168
  total_narrators=len(narrator_names),
169
  successful_analyses=sum(1 for r in chain_results.values() if r.success),
170
- analysis_method="Enhanced agent with Shamela.ws + LLM"
171
- )
172
  )
173
-
174
  except HTTPException:
175
  raise
176
  except Exception as e:
177
  raise HTTPException(
178
  status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
179
- detail=f"Internal server error during chain analysis: {str(e)}"
180
  )
181
 
182
 
@@ -184,58 +188,62 @@ async def analyze_narrator_chain(narrator_names: List[str]) -> NarratorChainAnal
184
  "/extract-and-analyze",
185
  response_model=ExtractAndAnalyzeResponse,
186
  summary="Extract narrators and analyze chain",
187
- description="Complete workflow: extract narrators from hadith text and analyze the complete chain"
188
  )
189
- async def extract_and_analyze_hadith(request: HadithTextRequest) -> ExtractAndAnalyzeResponse:
 
 
190
  """
191
  Complete hadith analysis workflow: extraction + chain analysis.
192
-
193
  This endpoint combines narrator extraction and chain analysis in one call,
194
  providing a complete assessment of a hadith's chain of narration.
195
-
196
  Args:
197
  request: Contains the hadith text to analyze
198
-
199
  Returns:
200
  Complete analysis including extraction results and chain assessment
201
-
202
  Raises:
203
  HTTPException: If the analysis fails
204
  """
205
  try:
206
  llm_service = get_llm_service()
207
-
208
  # Step 1: Extract narrators
209
  extraction_result = await llm_service.extract_narrators(request.hadith_text)
210
-
211
  if not extraction_result.success or not extraction_result.narrators:
212
  return ExtractAndAnalyzeResponse(
213
  extraction=ExtractionResult(
214
  narrators=extraction_result.narrators,
215
  sanad_chain=extraction_result.sanad_chain,
216
  success=extraction_result.success,
217
- message=extraction_result.message
218
  ),
219
  chain_analysis=None,
220
  metadata=ExtractAndAnalyzeMetadata(
221
  hadith_text_length=len(request.hadith_text),
222
  extracted_narrators_count=len(extraction_result.narrators),
223
  successful_analyses=0,
224
- analysis_method="Enhanced agent with Shamela.ws + LLM"
225
  ),
226
- error="Failed to extract narrators or no narrators found"
227
  )
228
-
229
  # Step 2: Analyze narrator chain
230
- chain_results = await llm_service.analyze_narrator_chain(extraction_result.narrators)
 
 
231
  synthesis = await llm_service.synthesize_chain_analysis(chain_results)
232
-
233
  return ExtractAndAnalyzeResponse(
234
  extraction=ExtractionResult(
235
  narrators=extraction_result.narrators,
236
  sanad_chain=extraction_result.sanad_chain,
237
  success=extraction_result.success,
238
- message=extraction_result.message
239
  ),
240
  chain_analysis=ChainAnalysisResult(
241
  individual_analyses={
@@ -249,43 +257,43 @@ async def extract_and_analyze_hadith(request: HadithTextRequest) -> ExtractAndAn
249
  biographical_info=result.biographical_info,
250
  recommendation=result.recommendation,
251
  success=result.success,
252
- message=result.message
253
  )
254
  for name, result in chain_results.items()
255
  },
256
- synthesis=synthesis
257
  ),
258
  metadata=ExtractAndAnalyzeMetadata(
259
  hadith_text_length=len(request.hadith_text),
260
  extracted_narrators_count=len(extraction_result.narrators),
261
  successful_analyses=sum(1 for r in chain_results.values() if r.success),
262
- analysis_method="Enhanced agent with Shamela.ws + LLM"
263
- )
264
  )
265
-
266
  except HTTPException:
267
  raise
268
  except Exception as e:
269
  raise HTTPException(
270
  status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
271
- detail=f"Internal server error during complete analysis: {str(e)}"
272
  )
273
 
274
 
275
  @router.get(
276
  "/health",
277
  summary="Health check",
278
- description="Check if the API is running and healthy"
279
  )
280
  async def health_check():
281
  """Health check endpoint."""
282
  return {
283
- "status": "healthy",
284
  "message": "SanadCheck API is running",
285
  "features": [
286
  "Enhanced narrator analysis with Shamela.ws integration",
287
  "Narrator chain analysis",
288
  "Complete hadith workflow analysis",
289
- "AI-powered narrator extraction"
290
- ]
291
  }
 
3
  from typing import List, Dict, Any
4
 
5
  from models import (
6
+ HadithTextRequest,
7
  NarratorExtractionResponse,
8
  NarratorAnalysisRequest,
9
  NarratorAnalysisResponse,
 
13
  ChainAnalysisMetadata,
14
  ExtractionResult,
15
  ChainAnalysisResult,
16
+ ExtractAndAnalyzeMetadata,
17
  )
18
  from services import get_llm_service
19
 
 
24
  "/extract-narrators",
25
  response_model=NarratorExtractionResponse,
26
  summary="Extract narrators from hadith text",
27
+ description="Analyzes Arabic hadith text and extracts the chain of narrators (sanad)",
28
  )
29
  async def extract_narrators(request: HadithTextRequest) -> NarratorExtractionResponse:
30
  """
31
  Extract narrators from hadith text.
32
+
33
  This endpoint takes a complete hadith text in Arabic and uses AI to identify
34
  and extract the chain of narrators (sanad), returning individual narrator names
35
  that can be used for database searches.
36
+
37
  Args:
38
  request: Contains the hadith text to analyze
39
+
40
  Returns:
41
  NarratorExtractionResponse with extracted narrator names and chain
42
+
43
  Raises:
44
  HTTPException: If the analysis fails
45
  """
46
  try:
47
  llm_service = get_llm_service()
48
  result = await llm_service.extract_narrators(request.hadith_text)
49
+
50
  if not result.success:
51
  raise HTTPException(
52
  status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
53
+ detail=f"Failed to extract narrators: {result.message}",
54
  )
55
+
56
  return result
57
+
58
  except HTTPException:
59
  raise
60
  except Exception as e:
61
  raise HTTPException(
62
  status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
63
+ detail=f"Internal server error during narrator extraction: {str(e)}",
64
  )
65
 
66
 
 
68
  "/analyze-narrator",
69
  response_model=NarratorAnalysisResponse,
70
  summary="Analyze narrator reliability",
71
+ description="Takes a narrator name and generates an AI-powered reliability assessment based on the model's knowledge",
72
  )
73
+ async def analyze_narrator(
74
+ request: NarratorAnalysisRequest,
75
+ ) -> NarratorAnalysisResponse:
76
  """
77
  Analyze narrator reliability based on the model's internal knowledge.
78
+
79
+ This endpoint takes a narrator's name and uses AI to provide a comprehensive
80
+ reliability assessment based on its knowledge of Islamic hadith criticism
81
  methodologies and historical narrator evaluations.
82
+
83
  Args:
84
  request: Contains the narrator name to analyze
85
+
86
  Returns:
87
  NarratorAnalysisResponse with reliability grade, biographical info, and detailed analysis
88
+
89
  Raises:
90
  HTTPException: If the analysis fails
91
  """
92
  try:
93
  llm_service = get_llm_service()
94
  result = await llm_service.analyze_narrator(request.narrator_name)
95
+
96
  if not result.success:
97
  raise HTTPException(
98
  status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
99
+ detail=f"Failed to analyze narrator: {result.message}",
100
  )
101
+
102
  return result
103
+
104
  except HTTPException:
105
  raise
106
  except Exception as e:
107
  raise HTTPException(
108
  status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
109
+ detail=f"Internal server error during narrator analysis: {str(e)}",
110
  )
111
 
112
 
 
114
  "/analyze-narrator-chain",
115
  response_model=NarratorChainAnalysisResponse,
116
  summary="Analyze narrator chain",
117
+ description="Analyzes a complete chain of narrators using enhanced Shamela data + LLM agent",
118
  )
119
+ async def analyze_narrator_chain(
120
+ narrator_names: List[str],
121
+ ) -> NarratorChainAnalysisResponse:
122
  """
123
  Analyze a complete chain of narrators with enhanced data sources.
124
+
125
  This endpoint takes a list of narrator names and uses the enhanced agent approach
126
  to analyze each narrator using both Shamela.ws data and LLM knowledge, then
127
  provides a synthesized assessment of the complete chain.
128
+
129
  Args:
130
  narrator_names: List of narrator names in the chain
131
+
132
  Returns:
133
  Dictionary containing individual analyses and chain synthesis
134
+
135
  Raises:
136
  HTTPException: If the analysis fails
137
  """
 
139
  if not narrator_names:
140
  raise HTTPException(
141
  status_code=status.HTTP_400_BAD_REQUEST,
142
+ detail="narrator_names list cannot be empty",
143
  )
144
+
145
  llm_service = get_llm_service()
146
+
147
  # Analyze individual narrators
148
  chain_results = await llm_service.analyze_narrator_chain(narrator_names)
149
+
150
  # Synthesize chain analysis
151
  synthesis = await llm_service.synthesize_chain_analysis(chain_results)
152
+
153
  return NarratorChainAnalysisResponse(
154
  chain=narrator_names,
155
  individual_analyses={
 
163
  biographical_info=result.biographical_info,
164
  recommendation=result.recommendation,
165
  success=result.success,
166
+ message=result.message,
167
  )
168
  for name, result in chain_results.items()
169
  },
 
171
  metadata=ChainAnalysisMetadata(
172
  total_narrators=len(narrator_names),
173
  successful_analyses=sum(1 for r in chain_results.values() if r.success),
174
+ analysis_method="Enhanced agent with Shamela.ws + LLM",
175
+ ),
176
  )
177
+
178
  except HTTPException:
179
  raise
180
  except Exception as e:
181
  raise HTTPException(
182
  status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
183
+ detail=f"Internal server error during chain analysis: {str(e)}",
184
  )
185
 
186
 
 
188
  "/extract-and-analyze",
189
  response_model=ExtractAndAnalyzeResponse,
190
  summary="Extract narrators and analyze chain",
191
+ description="Complete workflow: extract narrators from hadith text and analyze the complete chain",
192
  )
193
+ async def extract_and_analyze_hadith(
194
+ request: HadithTextRequest,
195
+ ) -> ExtractAndAnalyzeResponse:
196
  """
197
  Complete hadith analysis workflow: extraction + chain analysis.
198
+
199
  This endpoint combines narrator extraction and chain analysis in one call,
200
  providing a complete assessment of a hadith's chain of narration.
201
+
202
  Args:
203
  request: Contains the hadith text to analyze
204
+
205
  Returns:
206
  Complete analysis including extraction results and chain assessment
207
+
208
  Raises:
209
  HTTPException: If the analysis fails
210
  """
211
  try:
212
  llm_service = get_llm_service()
213
+
214
  # Step 1: Extract narrators
215
  extraction_result = await llm_service.extract_narrators(request.hadith_text)
216
+
217
  if not extraction_result.success or not extraction_result.narrators:
218
  return ExtractAndAnalyzeResponse(
219
  extraction=ExtractionResult(
220
  narrators=extraction_result.narrators,
221
  sanad_chain=extraction_result.sanad_chain,
222
  success=extraction_result.success,
223
+ message=extraction_result.message,
224
  ),
225
  chain_analysis=None,
226
  metadata=ExtractAndAnalyzeMetadata(
227
  hadith_text_length=len(request.hadith_text),
228
  extracted_narrators_count=len(extraction_result.narrators),
229
  successful_analyses=0,
230
+ analysis_method="Enhanced agent with Shamela.ws + LLM",
231
  ),
232
+ error="Failed to extract narrators or no narrators found",
233
  )
234
+
235
  # Step 2: Analyze narrator chain
236
+ chain_results = await llm_service.analyze_narrator_chain(
237
+ extraction_result.narrators
238
+ )
239
  synthesis = await llm_service.synthesize_chain_analysis(chain_results)
240
+
241
  return ExtractAndAnalyzeResponse(
242
  extraction=ExtractionResult(
243
  narrators=extraction_result.narrators,
244
  sanad_chain=extraction_result.sanad_chain,
245
  success=extraction_result.success,
246
+ message=extraction_result.message,
247
  ),
248
  chain_analysis=ChainAnalysisResult(
249
  individual_analyses={
 
257
  biographical_info=result.biographical_info,
258
  recommendation=result.recommendation,
259
  success=result.success,
260
+ message=result.message,
261
  )
262
  for name, result in chain_results.items()
263
  },
264
+ synthesis=synthesis,
265
  ),
266
  metadata=ExtractAndAnalyzeMetadata(
267
  hadith_text_length=len(request.hadith_text),
268
  extracted_narrators_count=len(extraction_result.narrators),
269
  successful_analyses=sum(1 for r in chain_results.values() if r.success),
270
+ analysis_method="Enhanced agent with Shamela.ws + LLM",
271
+ ),
272
  )
273
+
274
  except HTTPException:
275
  raise
276
  except Exception as e:
277
  raise HTTPException(
278
  status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
279
+ detail=f"Internal server error during complete analysis: {str(e)}",
280
  )
281
 
282
 
283
  @router.get(
284
  "/health",
285
  summary="Health check",
286
+ description="Check if the API is running and healthy",
287
  )
288
  async def health_check():
289
  """Health check endpoint."""
290
  return {
291
+ "status": "healthy",
292
  "message": "SanadCheck API is running",
293
  "features": [
294
  "Enhanced narrator analysis with Shamela.ws integration",
295
  "Narrator chain analysis",
296
  "Complete hadith workflow analysis",
297
+ "AI-powered narrator extraction",
298
+ ],
299
  }
services.py CHANGED
@@ -1,6 +1,6 @@
1
  from functools import lru_cache
2
  import json
3
- from typing import Dict, Any, Optional, cast
4
 
5
  from langchain_google_genai import ChatGoogleGenerativeAI
6
  from langchain.output_parsers import PydanticOutputParser
@@ -17,14 +17,13 @@ import asyncio
17
  load_dotenv()
18
 
19
 
20
-
21
  class LLMService:
22
  """Service class for LLM operations."""
23
-
24
  def __init__(self):
25
  self.model_name = "gemini-1.5-flash-latest"
26
  self._llm = None
27
-
28
  @property
29
  def llm(self) -> ChatGoogleGenerativeAI:
30
  """Lazy initialization of LLM."""
@@ -35,41 +34,46 @@ class LLMService:
35
  max_output_tokens=2048,
36
  )
37
  return self._llm
38
-
39
  async def extract_narrators(self, hadith_text: str) -> NarratorExtractionResponse:
40
  """Extract narrators from hadith text."""
41
  try:
42
  # Create parser for structured output
43
  parser = PydanticOutputParser(pydantic_object=NarratorExtractionResponse)
44
-
45
  # Create prompt template
46
  prompt_template = PromptTemplate(
47
  template=EXTRACT_PROMPT,
48
  input_variables=["hadith_text"],
49
- partial_variables={"format_instructions": parser.get_format_instructions()},
 
 
50
  )
51
-
52
  # Create chain
53
  chain = prompt_template | self.llm | parser
54
-
55
  # Invoke chain
56
  result = await chain.ainvoke({"hadith_text": hadith_text})
57
-
58
  return result
59
-
60
  except Exception as e:
61
  return NarratorExtractionResponse(
62
  narrators=[],
63
  sanad_chain="",
64
  success=False,
65
- message=f"Error extracting narrators: {str(e)}"
66
  )
 
67
  async def analyze_narrator(self, narrator_name: str) -> NarratorAnalysisResponse:
68
  """Enhanced narrator analyzer agent that uses Shamela scraper and LLM reasoning."""
69
  try:
70
  # Step 1: Scrape data from Shamela
71
  try:
72
- shamela_data = await ShamelaNarratorExtractor.extract_narrator_by_name(narrator_name)
 
 
73
  except Exception as shamela_error:
74
  shamela_data = {"error": f"Extraction failed: {str(shamela_error)}"}
75
 
@@ -77,15 +81,19 @@ class LLMService:
77
  try:
78
  shamela_context = self._format_shamela_data(shamela_data)
79
  except Exception as format_error:
80
- shamela_context = f"❌ Failed to format Shamela data: {str(format_error)}"
81
-
 
 
82
  # Step 3: Create enhanced prompt with Shamela data
83
  try:
84
  parser = PydanticOutputParser(pydantic_object=NarratorAnalysisResponse)
85
  prompt_template = PromptTemplate(
86
  template=ANALYZE_PROMPT,
87
  input_variables=["narrator_name", "shamela_context"],
88
- partial_variables={"format_instructions": parser.get_format_instructions()},
 
 
89
  )
90
  except Exception as prompt_error:
91
  raise prompt_error
@@ -93,26 +101,29 @@ class LLMService:
93
  # Step 4: Invoke the enhanced analysis
94
  try:
95
  chain = prompt_template | self.llm | parser
96
- result = await chain.ainvoke({
97
- "narrator_name": narrator_name,
98
- "shamela_context": shamela_context
99
- })
100
  except Exception as chain_error:
101
  raise chain_error
102
 
103
  # Step 5: Enhance the response with metadata
104
  try:
105
  total_scholars = 0
106
- if shamela_data and isinstance(shamela_data, dict) and not shamela_data.get("error"):
107
- metadata = shamela_data.get('extraction_metadata', {})
 
 
 
 
108
  if isinstance(metadata, dict):
109
- total_scholars = metadata.get('total_scholars', 0)
110
  result.message = f"Analysis completed using Shamela data ({total_scholars} scholars) + LLM knowledge"
111
  result.success = True
112
  return result
113
  except Exception as metadata_error:
114
  return result
115
-
116
  except Exception as e:
117
  return NarratorAnalysisResponse(
118
  narrator_name=narrator_name,
@@ -124,10 +135,12 @@ class LLMService:
124
  biographical_info="Unable to retrieve information due to error",
125
  recommendation="Cannot provide recommendation due to analysis failure",
126
  success=False,
127
- message=f"Error analyzing narrator: {str(e)}"
128
  )
129
-
130
- async def analyze_narrator_chain(self, narrator_names: list[str]) -> Dict[str, NarratorAnalysisResponse]:
 
 
131
  """Analyze a complete chain of narrators concurrently."""
132
 
133
  results: Dict[str, NarratorAnalysisResponse] = {}
@@ -138,7 +151,9 @@ class LLMService:
138
  print(f"Analyzing chain of {len(narrator_names)} narrators concurrently...")
139
 
140
  # Fire off all analysis tasks at once
141
- tasks = [asyncio.create_task(self.analyze_narrator(name)) for name in narrator_names]
 
 
142
 
143
  # Wait for all to complete, capturing exceptions per-task
144
  completed = await asyncio.gather(*tasks, return_exceptions=True)
@@ -156,99 +171,126 @@ class LLMService:
156
  biographical_info="Error during analysis",
157
  recommendation="Cannot recommend due to error",
158
  success=False,
159
- message=f"Error in chain analysis: {str(outcome)}"
160
  )
161
  else:
162
  results[name] = cast(NarratorAnalysisResponse, outcome)
163
 
164
  return results
165
- async def synthesize_chain_analysis(self, chain_results: Dict[str, NarratorAnalysisResponse]) -> Dict[str, Any]:
 
 
 
166
  """Synthesize individual narrator analyses into an overall chain assessment."""
167
  try:
168
  # Prepare data for synthesis
169
  narrator_summaries = []
170
  for name, analysis in chain_results.items():
171
- narrator_summaries.append({
172
- "name": name,
173
- "grade": analysis.reliability_grade,
174
- "confidence": analysis.confidence_level,
175
- "reasoning": analysis.reasoning[:200] + "..." if len(analysis.reasoning) > 200 else analysis.reasoning,
176
- "issues": analysis.known_issues
177
- })
 
 
 
 
 
 
178
  # Create PromptTemplate and invoke LLM
179
  prompt_template = PromptTemplate(
180
- template=SYNTHESIS_PROMPT,
181
- input_variables=["narrator_summaries"],
182
  )
183
 
184
- summaries_json = json.dumps(narrator_summaries, ensure_ascii=False, indent=2)
 
 
185
  chain = prompt_template | self.llm
186
- synthesis_result = await chain.ainvoke({"narrator_summaries": summaries_json})
 
 
187
 
188
  # Normalize synthesis text
189
  synthesis_text = getattr(synthesis_result, "content", synthesis_result)
190
-
191
  return {
192
- "overall_assessment": synthesis_text,
193
- "individual_results": chain_results,
194
- "chain_length": len(chain_results),
195
- "success": True
196
  }
197
-
198
  return {
199
  "overall_assessment": synthesis_result.content,
200
  "individual_results": chain_results,
201
  "chain_length": len(chain_results),
202
- "success": True
203
  }
204
-
205
  except Exception as e:
206
  return {
207
  "overall_assessment": f"Synthesis failed: {str(e)}",
208
  "individual_results": chain_results,
209
  "chain_length": len(chain_results),
210
- "success": False
211
  }
212
 
213
  def _format_shamela_data(self, narrator_info: Dict[str, Any]) -> str:
214
  """Format Shamela data for LLM consumption."""
215
  if not narrator_info or narrator_info.get("error"):
216
  return "❌ No data found on Shamela.ws or extraction failed"
217
-
218
  context_parts = []
219
-
220
  # Basic info
221
  if narrator_info.get("narrator_name"):
222
- context_parts.append(f"**Narrator Name (Shamela):** {narrator_info['narrator_name']}")
 
 
223
 
224
  # Biographical information
225
  if narrator_info.get("biographical_info"):
226
  context_parts.append("**πŸ“‹ Biographical Information:**")
227
- for key, value in narrator_info['biographical_info'].items():
228
  context_parts.append(f" β€’ {key}: {value}")
229
  else:
230
  context_parts.append("**πŸ“‹ Biographical Information:** None found")
231
-
232
  # Scholarly critique
233
  if narrator_info.get("scholarly_critique"):
234
- context_parts.append(f"**πŸ“š Scholarly Opinions ({len(narrator_info['scholarly_critique'])} scholars):**")
235
- for i, scholar_critique in enumerate(narrator_info['scholarly_critique'], 1):
 
 
 
 
236
  context_parts.append(f"\n {i}. **{scholar_critique['scholar']}:**")
237
- for comment in scholar_critique['comments']:
238
  context_parts.append(f" - {comment['text']}")
239
- if comment.get('highlighted'):
240
- context_parts.append(f" (Highlighted terms: {', '.join(comment['highlighted'])})")
 
 
241
  else:
242
  context_parts.append("**πŸ“š Scholarly Opinions:** None found")
243
-
244
  # Metadata
245
  metadata = narrator_info.get("extraction_metadata", {})
246
  context_parts.append(f"\n**πŸ“Š Data Quality:**")
247
- context_parts.append(f" β€’ Total scholars cited: {metadata.get('total_scholars', 0)}")
 
 
248
  context_parts.append(f" β€’ Total comments: {metadata.get('total_comments', 0)}")
249
- context_parts.append(f" β€’ Biographical fields: {metadata.get('biographical_fields', 0)}")
250
- context_parts.append(f" β€’ Has critique section: {metadata.get('has_critique_section', False)}")
251
-
 
 
 
 
252
  return "\n".join(context_parts)
253
 
254
 
 
1
  from functools import lru_cache
2
  import json
3
+ from typing import Dict, Any, cast
4
 
5
  from langchain_google_genai import ChatGoogleGenerativeAI
6
  from langchain.output_parsers import PydanticOutputParser
 
17
  load_dotenv()
18
 
19
 
 
20
  class LLMService:
21
  """Service class for LLM operations."""
22
+
23
  def __init__(self):
24
  self.model_name = "gemini-1.5-flash-latest"
25
  self._llm = None
26
+
27
  @property
28
  def llm(self) -> ChatGoogleGenerativeAI:
29
  """Lazy initialization of LLM."""
 
34
  max_output_tokens=2048,
35
  )
36
  return self._llm
37
+
38
  async def extract_narrators(self, hadith_text: str) -> NarratorExtractionResponse:
39
  """Extract narrators from hadith text."""
40
  try:
41
  # Create parser for structured output
42
  parser = PydanticOutputParser(pydantic_object=NarratorExtractionResponse)
43
+
44
  # Create prompt template
45
  prompt_template = PromptTemplate(
46
  template=EXTRACT_PROMPT,
47
  input_variables=["hadith_text"],
48
+ partial_variables={
49
+ "format_instructions": parser.get_format_instructions()
50
+ },
51
  )
52
+
53
  # Create chain
54
  chain = prompt_template | self.llm | parser
55
+
56
  # Invoke chain
57
  result = await chain.ainvoke({"hadith_text": hadith_text})
58
+
59
  return result
60
+
61
  except Exception as e:
62
  return NarratorExtractionResponse(
63
  narrators=[],
64
  sanad_chain="",
65
  success=False,
66
+ message=f"Error extracting narrators: {str(e)}",
67
  )
68
+
69
  async def analyze_narrator(self, narrator_name: str) -> NarratorAnalysisResponse:
70
  """Enhanced narrator analyzer agent that uses Shamela scraper and LLM reasoning."""
71
  try:
72
  # Step 1: Scrape data from Shamela
73
  try:
74
+ shamela_data = await ShamelaNarratorExtractor.extract_narrator_by_name(
75
+ narrator_name
76
+ )
77
  except Exception as shamela_error:
78
  shamela_data = {"error": f"Extraction failed: {str(shamela_error)}"}
79
 
 
81
  try:
82
  shamela_context = self._format_shamela_data(shamela_data)
83
  except Exception as format_error:
84
+ shamela_context = (
85
+ f"❌ Failed to format Shamela data: {str(format_error)}"
86
+ )
87
+
88
  # Step 3: Create enhanced prompt with Shamela data
89
  try:
90
  parser = PydanticOutputParser(pydantic_object=NarratorAnalysisResponse)
91
  prompt_template = PromptTemplate(
92
  template=ANALYZE_PROMPT,
93
  input_variables=["narrator_name", "shamela_context"],
94
+ partial_variables={
95
+ "format_instructions": parser.get_format_instructions()
96
+ },
97
  )
98
  except Exception as prompt_error:
99
  raise prompt_error
 
101
  # Step 4: Invoke the enhanced analysis
102
  try:
103
  chain = prompt_template | self.llm | parser
104
+ result = await chain.ainvoke(
105
+ {"narrator_name": narrator_name, "shamela_context": shamela_context}
106
+ )
 
107
  except Exception as chain_error:
108
  raise chain_error
109
 
110
  # Step 5: Enhance the response with metadata
111
  try:
112
  total_scholars = 0
113
+ if (
114
+ shamela_data
115
+ and isinstance(shamela_data, dict)
116
+ and not shamela_data.get("error")
117
+ ):
118
+ metadata = shamela_data.get("extraction_metadata", {})
119
  if isinstance(metadata, dict):
120
+ total_scholars = metadata.get("total_scholars", 0)
121
  result.message = f"Analysis completed using Shamela data ({total_scholars} scholars) + LLM knowledge"
122
  result.success = True
123
  return result
124
  except Exception as metadata_error:
125
  return result
126
+
127
  except Exception as e:
128
  return NarratorAnalysisResponse(
129
  narrator_name=narrator_name,
 
135
  biographical_info="Unable to retrieve information due to error",
136
  recommendation="Cannot provide recommendation due to analysis failure",
137
  success=False,
138
+ message=f"Error analyzing narrator: {str(e)}",
139
  )
140
+
141
+ async def analyze_narrator_chain(
142
+ self, narrator_names: list[str]
143
+ ) -> Dict[str, NarratorAnalysisResponse]:
144
  """Analyze a complete chain of narrators concurrently."""
145
 
146
  results: Dict[str, NarratorAnalysisResponse] = {}
 
151
  print(f"Analyzing chain of {len(narrator_names)} narrators concurrently...")
152
 
153
  # Fire off all analysis tasks at once
154
+ tasks = [
155
+ asyncio.create_task(self.analyze_narrator(name)) for name in narrator_names
156
+ ]
157
 
158
  # Wait for all to complete, capturing exceptions per-task
159
  completed = await asyncio.gather(*tasks, return_exceptions=True)
 
171
  biographical_info="Error during analysis",
172
  recommendation="Cannot recommend due to error",
173
  success=False,
174
+ message=f"Error in chain analysis: {str(outcome)}",
175
  )
176
  else:
177
  results[name] = cast(NarratorAnalysisResponse, outcome)
178
 
179
  return results
180
+
181
+ async def synthesize_chain_analysis(
182
+ self, chain_results: Dict[str, NarratorAnalysisResponse]
183
+ ) -> Dict[str, Any]:
184
  """Synthesize individual narrator analyses into an overall chain assessment."""
185
  try:
186
  # Prepare data for synthesis
187
  narrator_summaries = []
188
  for name, analysis in chain_results.items():
189
+ narrator_summaries.append(
190
+ {
191
+ "name": name,
192
+ "grade": analysis.reliability_grade,
193
+ "confidence": analysis.confidence_level,
194
+ "reasoning": (
195
+ analysis.reasoning[:200] + "..."
196
+ if len(analysis.reasoning) > 200
197
+ else analysis.reasoning
198
+ ),
199
+ "issues": analysis.known_issues,
200
+ }
201
+ )
202
  # Create PromptTemplate and invoke LLM
203
  prompt_template = PromptTemplate(
204
+ template=SYNTHESIS_PROMPT,
205
+ input_variables=["narrator_summaries"],
206
  )
207
 
208
+ summaries_json = json.dumps(
209
+ narrator_summaries, ensure_ascii=False, indent=2
210
+ )
211
  chain = prompt_template | self.llm
212
+ synthesis_result = await chain.ainvoke(
213
+ {"narrator_summaries": summaries_json}
214
+ )
215
 
216
  # Normalize synthesis text
217
  synthesis_text = getattr(synthesis_result, "content", synthesis_result)
218
+
219
  return {
220
+ "overall_assessment": synthesis_text,
221
+ "individual_results": chain_results,
222
+ "chain_length": len(chain_results),
223
+ "success": True,
224
  }
225
+
226
  return {
227
  "overall_assessment": synthesis_result.content,
228
  "individual_results": chain_results,
229
  "chain_length": len(chain_results),
230
+ "success": True,
231
  }
232
+
233
  except Exception as e:
234
  return {
235
  "overall_assessment": f"Synthesis failed: {str(e)}",
236
  "individual_results": chain_results,
237
  "chain_length": len(chain_results),
238
+ "success": False,
239
  }
240
 
241
  def _format_shamela_data(self, narrator_info: Dict[str, Any]) -> str:
242
  """Format Shamela data for LLM consumption."""
243
  if not narrator_info or narrator_info.get("error"):
244
  return "❌ No data found on Shamela.ws or extraction failed"
245
+
246
  context_parts = []
247
+
248
  # Basic info
249
  if narrator_info.get("narrator_name"):
250
+ context_parts.append(
251
+ f"**Narrator Name (Shamela):** {narrator_info['narrator_name']}"
252
+ )
253
 
254
  # Biographical information
255
  if narrator_info.get("biographical_info"):
256
  context_parts.append("**πŸ“‹ Biographical Information:**")
257
+ for key, value in narrator_info["biographical_info"].items():
258
  context_parts.append(f" β€’ {key}: {value}")
259
  else:
260
  context_parts.append("**πŸ“‹ Biographical Information:** None found")
261
+
262
  # Scholarly critique
263
  if narrator_info.get("scholarly_critique"):
264
+ context_parts.append(
265
+ f"**πŸ“š Scholarly Opinions ({len(narrator_info['scholarly_critique'])} scholars):**"
266
+ )
267
+ for i, scholar_critique in enumerate(
268
+ narrator_info["scholarly_critique"], 1
269
+ ):
270
  context_parts.append(f"\n {i}. **{scholar_critique['scholar']}:**")
271
+ for comment in scholar_critique["comments"]:
272
  context_parts.append(f" - {comment['text']}")
273
+ if comment.get("highlighted"):
274
+ context_parts.append(
275
+ f" (Highlighted terms: {', '.join(comment['highlighted'])})"
276
+ )
277
  else:
278
  context_parts.append("**πŸ“š Scholarly Opinions:** None found")
279
+
280
  # Metadata
281
  metadata = narrator_info.get("extraction_metadata", {})
282
  context_parts.append(f"\n**πŸ“Š Data Quality:**")
283
+ context_parts.append(
284
+ f" β€’ Total scholars cited: {metadata.get('total_scholars', 0)}"
285
+ )
286
  context_parts.append(f" β€’ Total comments: {metadata.get('total_comments', 0)}")
287
+ context_parts.append(
288
+ f" β€’ Biographical fields: {metadata.get('biographical_fields', 0)}"
289
+ )
290
+ context_parts.append(
291
+ f" β€’ Has critique section: {metadata.get('has_critique_section', False)}"
292
+ )
293
+
294
  return "\n".join(context_parts)
295
 
296