Spaces:

HydraBolt
/

SanadLLM

Sleeping

App Files Files Community

Hydra-Bolt commited on Aug 15, 2025

Commit

b3b712b

1 Parent(s): 88259ad

added

Browse files

Files changed (3) hide show

requirements.txt +19 -34
services.py +65 -130
tools/fetch.py +2 -19

requirements.txt CHANGED Viewed

@@ -1,81 +1,66 @@
 annotated-types==0.7.0
 anyio==4.10.0
-asttokens==3.0.0
 beautifulsoup4==4.13.4
 cachetools==5.5.2
 certifi==2025.8.3
 charset-normalizer==3.4.3
 click==8.2.1
-comm==0.2.3
-debugpy==1.8.16
-decorator==5.2.1
-executing==2.2.0
 fastapi==0.116.1
 filetype==1.2.0
 google-ai-generativelanguage==0.6.18
 google-api-core==2.25.1
 google-auth==2.40.3
 googleapis-common-protos==1.70.0
 googlesearch-python==1.3.0
-greenlet==3.1.1
 grpcio==1.74.0
 grpcio-status==1.74.0
 h11==0.16.0
 httpcore==1.0.9
 httpx==0.28.1
 idna==3.10
-ipykernel==6.30.1
-ipython==9.4.0
-ipython_pygments_lexers==1.1.1
-jedi==0.19.2
 jsonpatch==1.33
 jsonpointer==3.0.0
-jupyter_client==8.6.3
-jupyter_core==5.8.1
 langchain==0.3.27
 langchain-core==0.3.74
 langchain-google-genai==2.1.9
 langchain-text-splitters==0.3.9
-langsmith==0.4.13
-matplotlib-inline==0.1.7
-nest-asyncio==1.6.0
-orjson==3.11.1
 packaging==25.0
-parso==0.8.4
-pexpect==4.9.0
-platformdirs==4.3.8
-playwright==1.49.1
-prompt_toolkit==3.0.51
 proto-plus==1.26.1
 protobuf==6.31.1
-psutil==7.0.0
-ptyprocess==0.7.0
-pure_eval==0.2.3
 pyasn1==0.6.1
 pyasn1_modules==0.4.2
 pydantic==2.11.7
 pydantic_core==2.33.2
-pyee==12.0.0
-Pygments==2.19.2
-python-dateutil==2.9.0.post0
 python-dotenv==1.1.1
 PyYAML==6.0.2
-pyzmq==27.0.1
 requests==2.32.4
 requests-toolbelt==1.0.0
 rsa==4.9.1
-six==1.17.0
 sniffio==1.3.1
 soupsieve==2.7
-SQLAlchemy==2.0.42
-stack-data==0.6.3
 starlette==0.47.2
 tenacity==9.1.2
-tornado==6.5.2
-traitlets==5.14.3
 typing-inspection==0.4.1
 typing_extensions==4.14.1
 urllib3==2.5.0
 uvicorn==0.35.0
-wcwidth==0.2.13
 zstandard==0.23.0

+aiohappyeyeballs==2.6.1
+aiohttp==3.12.15
+aiosignal==1.4.0
 annotated-types==0.7.0
 anyio==4.10.0
+attrs==25.3.0
 beautifulsoup4==4.13.4
 cachetools==5.5.2
 certifi==2025.8.3
 charset-normalizer==3.4.3
 click==8.2.1
 fastapi==0.116.1
 filetype==1.2.0
+frozenlist==1.7.0
 google-ai-generativelanguage==0.6.18
 google-api-core==2.25.1
+google-api-python-client==2.179.0
 google-auth==2.40.3
+google-auth-httplib2==0.2.0
 googleapis-common-protos==1.70.0
 googlesearch-python==1.3.0
+greenlet==3.2.4
 grpcio==1.74.0
 grpcio-status==1.74.0
 h11==0.16.0
 httpcore==1.0.9
+httplib2==0.22.0
 httpx==0.28.1
 idna==3.10
 jsonpatch==1.33
 jsonpointer==3.0.0
 langchain==0.3.27
 langchain-core==0.3.74
 langchain-google-genai==2.1.9
 langchain-text-splitters==0.3.9
+langsmith==0.4.14
+multidict==6.6.4
+orjson==3.11.2
 packaging==25.0
+playwright==1.54.0
+propcache==0.3.2
 proto-plus==1.26.1
 protobuf==6.31.1
 pyasn1==0.6.1
 pyasn1_modules==0.4.2
 pydantic==2.11.7
 pydantic_core==2.33.2
+pyee==13.0.0
+pyparsing==3.2.3
 python-dotenv==1.1.1
 PyYAML==6.0.2
 requests==2.32.4
 requests-toolbelt==1.0.0
 rsa==4.9.1
 sniffio==1.3.1
 soupsieve==2.7
+SQLAlchemy==2.0.43
 starlette==0.47.2
 tenacity==9.1.2
 typing-inspection==0.4.1
 typing_extensions==4.14.1
+uritemplate==4.2.0
 urllib3==2.5.0
 uvicorn==0.35.0
+yarl==1.20.1
 zstandard==0.23.0

services.py CHANGED Viewed

@@ -11,7 +11,69 @@ from models import NarratorExtractionResponse, NarratorAnalysisResponse
 from tools.scrape_shamela import ShamelaNarratorExtractor
 load_dotenv()
 class LLMService:
     """Service class for LLM operations."""
@@ -39,22 +101,7 @@ class LLMService:
             # Create prompt template
             prompt_template = PromptTemplate(
-                template="""
-You are an expert in Islamic hadith sciences and Arabic language. Your task is to analyze the given hadith text and extract the chain of narrators (sanad).
-Instructions:
-1. Identify the complete chain of narration (sanad) in the hadith text
-2. Extract individual narrator names in Arabic
-3. Preserve the original Arabic names exactly as they appear
-4. Focus on the chain that connects back to the Prophet Muhammad (ﷺ) or the source
-{format_instructions}
-Hadith Text:
-{hadith_text}
-Please provide a structured analysis of the narrators.
-""",
                 input_variables=["hadith_text"],
                 partial_variables={"format_instructions": parser.get_format_instructions()},
             )
@@ -74,137 +121,42 @@ Please provide a structured analysis of the narrators.
                 success=False,
                 message=f"Error extracting narrators: {str(e)}"
             )
     async def analyze_narrator(self, narrator_name: str) -> NarratorAnalysisResponse:
         """Enhanced narrator analyzer agent that uses Shamela scraper and LLM reasoning."""
         try:
-            print(f"🔍 Starting analysis for narrator: '{narrator_name}'")
-            print(f"📝 Step 1: Initiating Shamela data extraction...")
             # Step 1: Scrape data from Shamela
             try:
                 shamela_data = await ShamelaNarratorExtractor.extract_narrator_by_name(narrator_name)
-                print(f"✅ Step 1 completed: Shamela data extraction successful")
-                print(f"📊 Shamela data keys: {list(shamela_data.keys()) if shamela_data else 'None'}")
-                if shamela_data and not shamela_data.get("error"):
-                    metadata = shamela_data.get("extraction_metadata", {})
-                    print(f"📈 Shamela extraction stats:")
-                    print(f"   • Total scholars: {metadata.get('total_scholars', 0)}")
-                    print(f"   • Total comments: {metadata.get('total_comments', 0)}")
-                    print(f"   • Biographical fields: {metadata.get('biographical_fields', 0)}")
-                    print(f"   • Has critique section: {metadata.get('has_critique_section', False)}")
-                else:
-                    print(f"⚠️  Shamela data extraction returned error or empty data")
-                    if shamela_data and shamela_data.get("error"):
-                        print(f"   Error details: {shamela_data['error']}")
             except Exception as shamela_error:
-                print(f"❌ Step 1 failed: Shamela extraction error: {str(shamela_error)}")
                 shamela_data = {"error": f"Extraction failed: {str(shamela_error)}"}
-            print(f"📝 Step 2: Formatting Shamela data for LLM...")
             # Step 2: Prepare context for LLM analysis
             try:
                 shamela_context = self._format_shamela_data(shamela_data)
-                print(f"✅ Step 2 completed: Shamela context formatted")
-                print(f"📏 Context length: {len(shamela_context)} characters")
-                print(f"📄 Context preview (first 200 chars): {shamela_context[:200]}...")
             except Exception as format_error:
-                print(f"❌ Step 2 failed: Context formatting error: {str(format_error)}")
                 shamela_context = f"❌ Failed to format Shamela data: {str(format_error)}"
-            print(f"📝 Step 3: Creating LLM parser and prompt template...")
             # Step 3: Create enhanced prompt with Shamela data
             try:
                 parser = PydanticOutputParser(pydantic_object=NarratorAnalysisResponse)
-                print(f"✅ Step 3a completed: Pydantic parser created successfully")
-                print(f"📋 Parser format instructions length: {len(parser.get_format_instructions())} characters")
                 prompt_template = PromptTemplate(
-                    template="""
-You are an expert Islamic scholar specializing in hadith sciences and narrator criticism (Ilm al-Rijal).
-You have been provided with data from Shamela.ws about this narrator, along with your own knowledge.
-Narrator: {narrator_name}
-=== SHAMELA DATA ===
-{shamela_context}
-=== YOUR TASK ===
-Analyze this narrator comprehensively by:
-1. **Evaluating the Shamela data**: Assess the quality and reliability of scholarly opinions found
-2. **Cross-referencing with your knowledge**: Compare with your internal knowledge of hadith literature
-3. **Synthesizing scholarly consensus**: Analyze what different scholars have said
-4. **Identifying patterns**: Look for consistent praise or criticism across sources
-5. **Assigning reliability grade**: Based on the weight of evidence from both sources
-**Analysis Framework:**
-- Prioritize classical hadith scholars (Ibn Hajar, Dhahabi, Ibn Hibban, etc.)
-- Consider the consensus (إجماع) among scholars
-- Weigh criticism vs praise appropriately
-- Account for historical context and scholarly methodology
-**Reliability Grades:**
-- Thiqah (ثقة): Trustworthy - strong consensus of reliability
-- Saduq (صدوق): Truthful - generally reliable with minor reservations
-- Da'if (ضعيف): Weak - significant concerns about reliability
-- Matruk (متروك): Abandoned - severe weakness, narrations rejected
-- Majhul (مجهول): Unknown - insufficient reliable information
-**Instructions:**
-1. If Shamela data contains rich scholarly opinions, prioritize that analysis
-2. If Shamela data is limited, rely more on your knowledge but state the limitations
-3. Always explain your reasoning process clearly
-4. Be honest about confidence levels and data limitations
-5. Provide practical recommendations for hadith scholars
-{format_instructions}
-Provide a comprehensive analysis combining both Shamela data and your scholarly knowledge.
-""",
                     input_variables=["narrator_name", "shamela_context"],
                     partial_variables={"format_instructions": parser.get_format_instructions()},
                 )
-                print(f"✅ Step 3b completed: Prompt template created successfully")
-                print(f"📏 Prompt template length: {len(prompt_template.template)} characters")
             except Exception as prompt_error:
-                print(f"❌ Step 3 failed: Prompt creation error: {str(prompt_error)}")
                 raise prompt_error
-            print(f"📝 Step 4: Creating LLM chain and preparing for invocation...")
             # Step 4: Invoke the enhanced analysis
             try:
-                print(f"🤖 Creating chain: prompt_template | llm | parser")
                 chain = prompt_template | self.llm | parser
-                print(f"✅ Step 4a completed: Chain created successfully")
-                print(f"📤 Preparing chain invocation with parameters:")
-                print(f"   • narrator_name: '{narrator_name}'")
-                print(f"   • shamela_context length: {len(shamela_context)} chars")
-                print(f"🚀 Invoking LLM chain...")
                 result = await chain.ainvoke({
                     "narrator_name": narrator_name,
                     "shamela_context": shamela_context
                 })
-                print(f"✅ Step 4b completed: LLM analysis successful")
-                print(f"📊 LLM Result type: {type(result)}")
-                print(f"📋 Result attributes: {[attr for attr in dir(result) if not attr.startswith('_')]}")
-                if hasattr(result, 'reliability_grade'):
-                    print(f"🎯 Reliability grade assigned: {result.reliability_grade}")
-                if hasattr(result, 'confidence_level'):
-                    print(f"📈 Confidence level: {result.confidence_level}")
             except Exception as chain_error:
-                print(f"❌ Step 4 failed: LLM chain invocation error: {str(chain_error)}")
-                print(f"🔍 Error type: {type(chain_error)}")
                 raise chain_error
-            print(f"📝 Step 5: Enhancing response with metadata...")
             # Step 5: Enhance the response with metadata
             try:
                 total_scholars = 0
@@ -214,28 +166,11 @@ Provide a comprehensive analysis combining both Shamela data and your scholarly
                         total_scholars = metadata.get('total_scholars', 0)
                 result.message = f"Analysis completed using Shamela data ({total_scholars} scholars) + LLM knowledge"
                 result.success = True
-                print(f"✅ Step 5 completed: Response enhanced with metadata")
-                print(f"📝 Final message: {result.message}")
-                print(f"🎉 Analysis completed successfully for narrator: '{narrator_name}'")
                 return result
             except Exception as metadata_error:
-                print(f"❌ Step 5 failed: Metadata enhancement error: {str(metadata_error)}")
-                print(f"⚠️  Returning result without metadata enhancement")
                 return result
         except Exception as e:
-            print(f"💥 CRITICAL ERROR in analyze_narrator for '{narrator_name}':")
-            print(f"   Error type: {type(e).__name__}")
-            print(f"   Error message: {str(e)}")
-            print(f"   Error args: {e.args}")
-            # Try to get more detailed traceback info
-            import traceback
-            print(f"📍 Full traceback:")
-            traceback.print_exc()
-            print(f"🔄 Returning error response...")
             return NarratorAnalysisResponse(
                 narrator_name=narrator_name,
                 reliability_grade="Majhul",
@@ -248,7 +183,7 @@ Provide a comprehensive analysis combining both Shamela data and your scholarly
                 success=False,
                 message=f"Error analyzing narrator: {str(e)}"
             )
     async def analyze_narrator_chain(self, narrator_names: list[str]) -> Dict[str, NarratorAnalysisResponse]:
         """Analyze a complete chain of narrators using the enhanced agent approach."""
         results = {}

 from tools.scrape_shamela import ShamelaNarratorExtractor
 load_dotenv()
+EXTRACT_PROMPT = """
+You are an expert in Islamic hadith sciences and Arabic language. Your task is to analyze the given hadith text and extract the chain of narrators (sanad).
+Instructions:
+1. Identify the complete chain of narration (sanad) in the hadith text
+2. Extract individual narrator names in Arabic
+3. Preserve the original Arabic names exactly as they appear
+4. Focus on the chain that connects back to the Prophet Muhammad (ﷺ) or the source
+{format_instructions}
+Hadith Text:
+{hadith_text}
+Please provide a structured analysis of the narrators.
+"""
+ANALYZE_PROMPT = """
+You are an expert Islamic scholar specializing in hadith sciences and narrator criticism (Ilm al-Rijal).
+You have been provided with data from Shamela.ws about this narrator, along with your own knowledge.
+Original Narrator: {narrator_name}
+=== SHAMELA DATA / CONTEXT ===
+{shamela_context}
+=== PURPOSE ===
+Produce a careful, transparent, and conservative scholarly analysis of the narrator. Never invent facts. When in doubt, be explicit about uncertainty and base conclusions only on clear evidence (Shamela data or explicit mentions in the provided context).
+=== CRITICAL INITIAL CHECK (MANDATORY) ===
+1. First verify whether the Shamela entry actually corresponds to the Original Narrator provided.
+    - If the Shamela entry does NOT match the Original Narrator (e.g., different name, different lineage, or clearly different identity), DO NOT PRODUCE A FULL ANALYSIS. Instead, return only a concise structured statement that the narrator is "Majhul (مجهول)" due to mismatch.
+    - However, before concluding "Majhul", scan the provided shamela_context for any explicit textual mentions about the narrator (for example: biographical notes, short mentions inside a hadith text, or direct phrases referencing the same person). If there IS explicit mention or content about the narrator in the provided context, you may infer limited conclusions strictly from that text — clearly label such conclusions as "inferred from provided context" and keep confidence low.
+    - Under no circumstances fabricate additional biographical details or scholarly opinions beyond what is present in Shamela or what is well-established classical knowledge. When you use your internal knowledge, cite the general source class (e.g., "classical critics such as Ibn Hajar or Dhahabi") and indicate the level of confidence.
+=== ANALYSIS TASKS (if match is confirmed OR if limited inference is possible from provided context) ===
+1. Ensure the Shamela narrator matches the Original Narrator; if matched, proceed.
+2. Evaluate the quality and reliability of scholarly opinions found in Shamela.
+3. Cross-reference with your internal knowledge of hadith literature; state when you are relying on internal knowledge versus Shamela.
+4. Synthesize the scholarly consensus and identify consistent praise or criticism.
+5. Assign a reliability grade and a confidence level, with justification.
+6. Provide practical recommendations for hadith scholars (use, use with caution, reject), and explain reasoning.
+=== RELIABILITY GRADES (use one) ===
+- Thiqah (ثقة) — Trustworthy
+- Saduq (صدوق) — Generally reliable with reservations
+- Da'if (ضعيف) — Weak
+- Matruk (متروك) — Abandoned / rejected
+- Majhul (مجهول) — Unknown / insufficient reliable information
+=== SPECIAL OUTPUT RULE (important) ===
+- If you determined a mismatch and there is NO explicit information in the provided context to infer from, do NOT output a full profile. Only return the minimal structured result indicating:
+  - reliability_grade: "Majhul"
+  - confidence_level: "Low"
+  - reasoning: one short sentence explaining mismatch (e.g., "Shamela entry does not match the provided narrator; insufficient evidence to analyze.")
+  - success: false
+  - message: brief note
+- If you infer anything from explicit mentions in the provided context, label those items as "inferred from provided context" and keep confidence level Low or Medium depending on clarity.
+{format_instructions}
+Provide a clear, humble, and well-justified analysis combining Shamela data and your scholarly knowledge, and always avoid hallucination.
+"""
 class LLMService:
     """Service class for LLM operations."""
             # Create prompt template
             prompt_template = PromptTemplate(
+                template=EXTRACT_PROMPT,
                 input_variables=["hadith_text"],
                 partial_variables={"format_instructions": parser.get_format_instructions()},
             )
                 success=False,
                 message=f"Error extracting narrators: {str(e)}"
             )
     async def analyze_narrator(self, narrator_name: str) -> NarratorAnalysisResponse:
         """Enhanced narrator analyzer agent that uses Shamela scraper and LLM reasoning."""
         try:
             # Step 1: Scrape data from Shamela
             try:
                 shamela_data = await ShamelaNarratorExtractor.extract_narrator_by_name(narrator_name)
             except Exception as shamela_error:
                 shamela_data = {"error": f"Extraction failed: {str(shamela_error)}"}
             # Step 2: Prepare context for LLM analysis
             try:
                 shamela_context = self._format_shamela_data(shamela_data)
             except Exception as format_error:
                 shamela_context = f"❌ Failed to format Shamela data: {str(format_error)}"
             # Step 3: Create enhanced prompt with Shamela data
             try:
                 parser = PydanticOutputParser(pydantic_object=NarratorAnalysisResponse)
                 prompt_template = PromptTemplate(
+                    template=ANALYZE_PROMPT,
                     input_variables=["narrator_name", "shamela_context"],
                     partial_variables={"format_instructions": parser.get_format_instructions()},
                 )
             except Exception as prompt_error:
                 raise prompt_error
             # Step 4: Invoke the enhanced analysis
             try:
                 chain = prompt_template | self.llm | parser
                 result = await chain.ainvoke({
                     "narrator_name": narrator_name,
                     "shamela_context": shamela_context
                 })
             except Exception as chain_error:
                 raise chain_error
             # Step 5: Enhance the response with metadata
             try:
                 total_scholars = 0
                         total_scholars = metadata.get('total_scholars', 0)
                 result.message = f"Analysis completed using Shamela data ({total_scholars} scholars) + LLM knowledge"
                 result.success = True
                 return result
             except Exception as metadata_error:
                 return result
         except Exception as e:
             return NarratorAnalysisResponse(
                 narrator_name=narrator_name,
                 reliability_grade="Majhul",
                 success=False,
                 message=f"Error analyzing narrator: {str(e)}"
             )
     async def analyze_narrator_chain(self, narrator_names: list[str]) -> Dict[str, NarratorAnalysisResponse]:
         """Analyze a complete chain of narrators using the enhanced agent approach."""
         results = {}

tools/fetch.py CHANGED Viewed

@@ -1,25 +1,9 @@
-"""
-Advanced web scraping tool using Playwright for robust HTML fetching.
-This module provides multiple functions for fetching HTML content with different
-capabilities and anti-bot measures:
-1. fetch_html() - Basic HTML fetching with stealth measures
-2. fetch_html_with_js_execution() - Enhanced fetching with JavaScript execution
-3. fetch_html_with_browser() - Fetching with different browser types
-Features:
-- Anti-bot detection measures
-- Realistic browser simulation
-- Multiple retry attempts
-- Configurable delays and timeouts
-- Support for different browsers (Chromium, Firefox, WebKit)
-- Arabic locale support for shamela.ws
-"""
 import random
 import asyncio
 from playwright.async_api import async_playwright
 async def fetch_html(url, max_retries=5, min_delay=1, max_delay=3, headless=True, timeout=30000):
     """
@@ -346,5 +330,4 @@ async def main():
 if __name__ == "__main__":
-    asyncio.run(main())

 import random
 import asyncio
 from playwright.async_api import async_playwright
+import random
 async def fetch_html(url, max_retries=5, min_delay=1, max_delay=3, headless=True, timeout=30000):
     """
 if __name__ == "__main__":
+    asyncio.run(main())