Spaces:
Running
Running
Upload 95 files
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .dockerignore +31 -0
- .env.example +8 -0
- .gitattributes +6 -0
- .gitignore +29 -0
- ARCHITECTURE.txt +330 -0
- Dockerfile +28 -0
- PROJECT_STRUCTURE.txt +387 -0
- README.md +238 -11
- agent_io/__init__.py +3 -0
- agent_io/benefit_io.py +117 -0
- agent_io/exam_io.py +115 -0
- agent_io/profiling_io.py +111 -0
- agent_io/scheme_io.py +116 -0
- agents/__init__.py +3 -0
- agents/benefit_agent.py +213 -0
- agents/document_agent.py +165 -0
- agents/exam_agent.py +138 -0
- agents/profiling_agent.py +149 -0
- agents/rag_agent.py +91 -0
- agents/scheme_agent.py +142 -0
- agents/search_agent.py +71 -0
- app.py +599 -0
- config.py +8 -0
- data/exams_pdfs/README.txt +13 -0
- data/exams_pdfs/exam.pdf +3 -0
- data/schemes_pdfs/Government Welfare Schemes & Policies - Disha Experts.pdf +3 -0
- data/schemes_pdfs/Government of India Welfare Schemes & Policies For Competitive Exams.pdf +3 -0
- data/schemes_pdfs/README.txt +12 -0
- data/schemes_pdfs/all-indian-government-schemes-list-2026-716.pdf +3 -0
- graph/__init__.py +3 -0
- graph/workflow.py +319 -0
- hf_cache/.locks/models--sentence-transformers--all-MiniLM-L6-v2/53aa51172d142c89d9012cce15ae4d6cc0ca6895895114379cacb4fab128d9db.lock +0 -0
- hf_cache/.locks/models--sentence-transformers--all-MiniLM-L6-v2/58d4a9a45664eb9e12de9549c548c09b6134c17f.lock +0 -0
- hf_cache/.locks/models--sentence-transformers--all-MiniLM-L6-v2/59d594003bf59880a884c574bf88ef7555bb0202.lock +0 -0
- hf_cache/.locks/models--sentence-transformers--all-MiniLM-L6-v2/72b987fd805cfa2b58c4c8c952b274a11bfd5a00.lock +0 -0
- hf_cache/.locks/models--sentence-transformers--all-MiniLM-L6-v2/952a9b81c0bfd99800fabf352f69c7ccd46c5e43.lock +0 -0
- hf_cache/.locks/models--sentence-transformers--all-MiniLM-L6-v2/c79f2b6a0cea6f4b564fed1938984bace9d30ff0.lock +0 -0
- hf_cache/.locks/models--sentence-transformers--all-MiniLM-L6-v2/cb202bfe2e3c98645018a6d12f182a434c9d3e02.lock +0 -0
- hf_cache/.locks/models--sentence-transformers--all-MiniLM-L6-v2/d1514c3162bbe87b343f565fadc62e6c06f04f03.lock +0 -0
- hf_cache/.locks/models--sentence-transformers--all-MiniLM-L6-v2/e7b0375001f109a6b8873d756ad4f7bbb15fbaa5.lock +0 -0
- hf_cache/.locks/models--sentence-transformers--all-MiniLM-L6-v2/fb140275c155a9c7c5a3b3e0e77a9e839594a938.lock +0 -0
- hf_cache/.locks/models--sentence-transformers--all-MiniLM-L6-v2/fd1b291129c607e5d49799f87cb219b27f98acdf.lock +0 -0
- hf_cache/models--sentence-transformers--all-MiniLM-L6-v2/.no_exist/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/adapter_config.json +0 -0
- hf_cache/models--sentence-transformers--all-MiniLM-L6-v2/.no_exist/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/added_tokens.json +0 -0
- hf_cache/models--sentence-transformers--all-MiniLM-L6-v2/.no_exist/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/chat_template.jinja +0 -0
- hf_cache/models--sentence-transformers--all-MiniLM-L6-v2/blobs/53aa51172d142c89d9012cce15ae4d6cc0ca6895895114379cacb4fab128d9db +3 -0
- hf_cache/models--sentence-transformers--all-MiniLM-L6-v2/blobs/58d4a9a45664eb9e12de9549c548c09b6134c17f +173 -0
- hf_cache/models--sentence-transformers--all-MiniLM-L6-v2/blobs/59d594003bf59880a884c574bf88ef7555bb0202 +4 -0
- hf_cache/models--sentence-transformers--all-MiniLM-L6-v2/blobs/72b987fd805cfa2b58c4c8c952b274a11bfd5a00 +24 -0
- hf_cache/models--sentence-transformers--all-MiniLM-L6-v2/blobs/952a9b81c0bfd99800fabf352f69c7ccd46c5e43 +20 -0
.dockerignore
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Environment
|
| 2 |
+
.env
|
| 3 |
+
.venv/
|
| 4 |
+
venv/
|
| 5 |
+
env/
|
| 6 |
+
|
| 7 |
+
# Python cache
|
| 8 |
+
__pycache__/
|
| 9 |
+
*.py[cod]
|
| 10 |
+
*$py.class
|
| 11 |
+
*.so
|
| 12 |
+
|
| 13 |
+
# Git
|
| 14 |
+
.git/
|
| 15 |
+
.gitignore
|
| 16 |
+
|
| 17 |
+
# IDE
|
| 18 |
+
.vscode/
|
| 19 |
+
.idea/
|
| 20 |
+
|
| 21 |
+
# Documentation
|
| 22 |
+
*.md
|
| 23 |
+
ARCHITECTURE.txt
|
| 24 |
+
PROJECT_STRUCTURE.txt
|
| 25 |
+
|
| 26 |
+
# Outputs (will be generated)
|
| 27 |
+
outputs/*.json
|
| 28 |
+
|
| 29 |
+
# RAG indexes (build during deployment)
|
| 30 |
+
rag/scheme_index/
|
| 31 |
+
rag/exam_index/
|
.env.example
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
GROQ_API_KEY="your_groq_api_key_here"
|
| 2 |
+
TAVILY_API_KEY="your_tavily_api_key_here"
|
| 3 |
+
HF_TOKEN="your_huggingface_token_here"
|
| 4 |
+
|
| 5 |
+
# Skip vectorstores on memory-constrained platforms
|
| 6 |
+
# Set to "true" to use only web search (saves ~300MB RAM)
|
| 7 |
+
# Set to "false" to use FAISS vectorstores (for Hugging Face Spaces)
|
| 8 |
+
SKIP_VECTORSTORES="false"
|
.gitattributes
CHANGED
|
@@ -33,3 +33,9 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
data/exams_pdfs/exam.pdf filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
data/schemes_pdfs/all-indian-government-schemes-list-2026-716.pdf filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
data/schemes_pdfs/Government[[:space:]]of[[:space:]]India[[:space:]]Welfare[[:space:]]Schemes[[:space:]]&[[:space:]]Policies[[:space:]]For[[:space:]]Competitive[[:space:]]Exams.pdf filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
data/schemes_pdfs/Government[[:space:]]Welfare[[:space:]]Schemes[[:space:]]&[[:space:]]Policies[[:space:]]-[[:space:]]Disha[[:space:]]Experts.pdf filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
hf_cache/models--sentence-transformers--all-MiniLM-L6-v2/blobs/53aa51172d142c89d9012cce15ae4d6cc0ca6895895114379cacb4fab128d9db filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
rag/scheme_index/index.faiss filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Environment
|
| 2 |
+
.env
|
| 3 |
+
.venv/
|
| 4 |
+
venv/
|
| 5 |
+
env/
|
| 6 |
+
|
| 7 |
+
# Python
|
| 8 |
+
__pycache__/
|
| 9 |
+
*.py[cod]
|
| 10 |
+
*$py.class
|
| 11 |
+
*.so
|
| 12 |
+
|
| 13 |
+
# HuggingFace Cache (downloaded models)
|
| 14 |
+
hf_cache/
|
| 15 |
+
|
| 16 |
+
# RAG Indexes (now included for production)
|
| 17 |
+
# rag/scheme_index/
|
| 18 |
+
# rag/exam_index/
|
| 19 |
+
|
| 20 |
+
# Outputs
|
| 21 |
+
outputs/*.json
|
| 22 |
+
|
| 23 |
+
# IDE
|
| 24 |
+
.vscode/
|
| 25 |
+
.idea/
|
| 26 |
+
|
| 27 |
+
# Data files (optional - uncomment if PDFs are large)
|
| 28 |
+
# data/schemes_pdfs/*.pdf
|
| 29 |
+
# data/exams_pdfs/*.pdf
|
ARCHITECTURE.txt
ADDED
|
@@ -0,0 +1,330 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
JanSahayak Architecture Overview
|
| 3 |
+
================================
|
| 4 |
+
|
| 5 |
+
SYSTEM COMPONENTS
|
| 6 |
+
-----------------
|
| 7 |
+
|
| 8 |
+
1. AGENTS (agents/)
|
| 9 |
+
- profiling_agent.py → User Profile Extraction
|
| 10 |
+
- scheme_agent.py → Government Scheme Recommendations
|
| 11 |
+
- exam_agent.py → Competitive Exam Recommendations
|
| 12 |
+
- search_agent.py → Live Web Search (Tavily)
|
| 13 |
+
- rag_agent.py → Vector Database Retrieval
|
| 14 |
+
- document_agent.py → PDF/Image Text Extraction
|
| 15 |
+
- benefit_agent.py → Missed Benefits Calculator
|
| 16 |
+
|
| 17 |
+
2. PROMPTS (prompts/)
|
| 18 |
+
- profiling_prompt.py → User profiling instructions
|
| 19 |
+
- scheme_prompt.py → Scheme recommendation template
|
| 20 |
+
- exam_prompt.py → Exam recommendation template
|
| 21 |
+
- rag_prompt.py → RAG retrieval instructions
|
| 22 |
+
|
| 23 |
+
3. RAG SYSTEM (rag/)
|
| 24 |
+
- embeddings.py → HuggingFace embeddings (CPU)
|
| 25 |
+
- scheme_vectorstore.py → FAISS store for schemes
|
| 26 |
+
- exam_vectorstore.py → FAISS store for exams
|
| 27 |
+
|
| 28 |
+
4. TOOLS (tools/)
|
| 29 |
+
- tavily_tool.py → Live government website search
|
| 30 |
+
|
| 31 |
+
5. WORKFLOW (graph/)
|
| 32 |
+
- workflow.py → LangGraph orchestration
|
| 33 |
+
|
| 34 |
+
6. I/O HANDLERS (agent_io/)
|
| 35 |
+
- profiling_io.py → Profiling agent I/O
|
| 36 |
+
- scheme_io.py → Scheme agent I/O
|
| 37 |
+
- exam_io.py → Exam agent I/O
|
| 38 |
+
- benefit_io.py → Benefit agent I/O
|
| 39 |
+
|
| 40 |
+
7. DATA (data/)
|
| 41 |
+
- schemes_pdfs/ → Government scheme PDFs
|
| 42 |
+
- exams_pdfs/ → Competitive exam PDFs
|
| 43 |
+
|
| 44 |
+
8. OUTPUTS (outputs/)
|
| 45 |
+
- results_*.json → Generated analysis results
|
| 46 |
+
|
| 47 |
+
9. CONFIGURATION
|
| 48 |
+
- config.py → Configuration loader
|
| 49 |
+
- .env → API keys (user creates)
|
| 50 |
+
- requirements.txt → Python dependencies
|
| 51 |
+
|
| 52 |
+
10. ENTRY POINTS
|
| 53 |
+
- main.py → Main application
|
| 54 |
+
- setup.py → Setup wizard
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
WORKFLOW EXECUTION
|
| 58 |
+
------------------
|
| 59 |
+
|
| 60 |
+
User Input
|
| 61 |
+
↓
|
| 62 |
+
[Profiling Agent]
|
| 63 |
+
↓
|
| 64 |
+
├─→ [Scheme Agent] ──→ [Benefit Agent] ──┐
|
| 65 |
+
│ ↓ │
|
| 66 |
+
│ [RAG Search] │
|
| 67 |
+
│ ↓ │
|
| 68 |
+
│ [Tavily Search] │
|
| 69 |
+
│ │
|
| 70 |
+
└─→ [Exam Agent] ────────────────────────┤
|
| 71 |
+
↓ │
|
| 72 |
+
[RAG Search] │
|
| 73 |
+
↓ │
|
| 74 |
+
[Tavily Search] │
|
| 75 |
+
↓
|
| 76 |
+
[Final Output]
|
| 77 |
+
↓
|
| 78 |
+
[JSON Results File]
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
TECHNOLOGY STACK
|
| 82 |
+
----------------
|
| 83 |
+
|
| 84 |
+
LLM & AI:
|
| 85 |
+
- Groq API (llama-3.3-70b-versatile) → Fast inference
|
| 86 |
+
- LangChain → Agent framework
|
| 87 |
+
- LangGraph → Workflow orchestration
|
| 88 |
+
|
| 89 |
+
Embeddings & Search:
|
| 90 |
+
- HuggingFace Transformers → sentence-transformers/all-MiniLM-L6-v2
|
| 91 |
+
- FAISS (CPU) → Vector similarity search
|
| 92 |
+
|
| 93 |
+
Web Search:
|
| 94 |
+
- Tavily API → Government website search
|
| 95 |
+
|
| 96 |
+
Document Processing:
|
| 97 |
+
- PyPDF → PDF text extraction
|
| 98 |
+
- Pytesseract → OCR for images
|
| 99 |
+
- Pillow → Image processing
|
| 100 |
+
|
| 101 |
+
Infrastructure:
|
| 102 |
+
- Python 3.8+
|
| 103 |
+
- CPU-only deployment (no GPU needed)
|
| 104 |
+
- PyTorch CPU version
|
| 105 |
+
|
| 106 |
+
|
| 107 |
+
DATA FLOW
|
| 108 |
+
---------
|
| 109 |
+
|
| 110 |
+
1. User Input Processing:
|
| 111 |
+
Raw Text → Profiling Agent → Structured JSON Profile
|
| 112 |
+
|
| 113 |
+
2. Scheme Recommendation:
|
| 114 |
+
Profile → RAG Query → Vectorstore Search → Top-K Documents
|
| 115 |
+
Profile + Documents → Tavily Search (optional) → Web Results
|
| 116 |
+
Profile + Documents + Web Results → LLM → Recommendations
|
| 117 |
+
|
| 118 |
+
3. Exam Recommendation:
|
| 119 |
+
Profile → RAG Query → Vectorstore Search → Top-K Documents
|
| 120 |
+
Profile + Documents → Tavily Search (optional) → Web Results
|
| 121 |
+
Profile + Documents + Web Results → LLM → Recommendations
|
| 122 |
+
|
| 123 |
+
4. Benefit Calculation:
|
| 124 |
+
Profile + Scheme Recommendations → LLM → Missed Benefits Analysis
|
| 125 |
+
|
| 126 |
+
5. Final Output:
|
| 127 |
+
All Results → JSON Compilation → File Save → User Display
|
| 128 |
+
|
| 129 |
+
|
| 130 |
+
API INTERACTIONS
|
| 131 |
+
----------------
|
| 132 |
+
|
| 133 |
+
1. Groq API:
|
| 134 |
+
- Used by: All LLM-powered agents
|
| 135 |
+
- Model: llama-3.3-70b-versatile
|
| 136 |
+
- Purpose: Natural language understanding & generation
|
| 137 |
+
- Rate: Per-request basis
|
| 138 |
+
|
| 139 |
+
2. Tavily API:
|
| 140 |
+
- Used by: search_agent, scheme_agent, exam_agent
|
| 141 |
+
- Purpose: Live government website search
|
| 142 |
+
- Filter: .gov.in domains preferred
|
| 143 |
+
- Depth: Advanced search mode
|
| 144 |
+
|
| 145 |
+
3. HuggingFace:
|
| 146 |
+
- Used by: embeddings module
|
| 147 |
+
- Model: sentence-transformers/all-MiniLM-L6-v2
|
| 148 |
+
- Purpose: Document embeddings for RAG
|
| 149 |
+
- Local: Runs on CPU, cached after first download
|
| 150 |
+
|
| 151 |
+
|
| 152 |
+
VECTORSTORE ARCHITECTURE
|
| 153 |
+
------------------------
|
| 154 |
+
|
| 155 |
+
Scheme Vectorstore (rag/scheme_index/):
|
| 156 |
+
├── index.faiss → FAISS index file
|
| 157 |
+
├── index.pkl → Metadata pickle
|
| 158 |
+
└── [Embedded chunks from schemes_pdfs/]
|
| 159 |
+
|
| 160 |
+
Exam Vectorstore (rag/exam_index/):
|
| 161 |
+
├── index.faiss → FAISS index file
|
| 162 |
+
├── index.pkl → Metadata pickle
|
| 163 |
+
└── [Embedded chunks from exams_pdfs/]
|
| 164 |
+
|
| 165 |
+
Embedding Dimension: 384
|
| 166 |
+
Similarity Metric: Cosine similarity
|
| 167 |
+
Chunk Size: Auto (from PyPDF)
|
| 168 |
+
|
| 169 |
+
|
| 170 |
+
AGENT SPECIALIZATIONS
|
| 171 |
+
---------------------
|
| 172 |
+
|
| 173 |
+
1. Profiling Agent:
|
| 174 |
+
- Extraction-focused
|
| 175 |
+
- Low temperature (0.1)
|
| 176 |
+
- JSON output required
|
| 177 |
+
- No external tools
|
| 178 |
+
|
| 179 |
+
2. Scheme Agent:
|
| 180 |
+
- RAG + Web search
|
| 181 |
+
- Temperature: 0.3
|
| 182 |
+
- Tools: Vectorstore, Tavily
|
| 183 |
+
- Output: Detailed scheme info
|
| 184 |
+
|
| 185 |
+
3. Exam Agent:
|
| 186 |
+
- RAG + Web search
|
| 187 |
+
- Temperature: 0.3
|
| 188 |
+
- Tools: Vectorstore, Tavily
|
| 189 |
+
- Output: Detailed exam info
|
| 190 |
+
|
| 191 |
+
4. Benefit Agent:
|
| 192 |
+
- Calculation-focused
|
| 193 |
+
- Temperature: 0.2
|
| 194 |
+
- No external tools
|
| 195 |
+
- Output: Financial analysis
|
| 196 |
+
|
| 197 |
+
5. Search Agent:
|
| 198 |
+
- Web search only
|
| 199 |
+
- Tool: Tavily API
|
| 200 |
+
- Focus: .gov.in domains
|
| 201 |
+
- Output: Live search results
|
| 202 |
+
|
| 203 |
+
6. RAG Agent:
|
| 204 |
+
- Vectorstore query only
|
| 205 |
+
- Tool: FAISS
|
| 206 |
+
- Similarity search
|
| 207 |
+
- Output: Relevant documents
|
| 208 |
+
|
| 209 |
+
7. Document Agent:
|
| 210 |
+
- File processing
|
| 211 |
+
- Tools: PyPDF, Pytesseract
|
| 212 |
+
- Supports: PDF, Images
|
| 213 |
+
- Output: Extracted text
|
| 214 |
+
|
| 215 |
+
|
| 216 |
+
SECURITY & PRIVACY
|
| 217 |
+
------------------
|
| 218 |
+
|
| 219 |
+
- API keys stored in .env (not committed to git)
|
| 220 |
+
- User data processed locally except LLM calls
|
| 221 |
+
- No data stored on external servers (except API providers)
|
| 222 |
+
- PDF data remains local
|
| 223 |
+
- Vectorstores are local
|
| 224 |
+
- Output files saved locally
|
| 225 |
+
|
| 226 |
+
|
| 227 |
+
SCALABILITY NOTES
|
| 228 |
+
-----------------
|
| 229 |
+
|
| 230 |
+
Current Setup (Single User):
|
| 231 |
+
- Synchronous workflow
|
| 232 |
+
- Local vectorstores
|
| 233 |
+
- CPU processing
|
| 234 |
+
|
| 235 |
+
Potential Scaling:
|
| 236 |
+
- Add Redis for caching
|
| 237 |
+
- Use cloud vectorstore (Pinecone, Weaviate)
|
| 238 |
+
- Parallel agent execution
|
| 239 |
+
- GPU acceleration for embeddings
|
| 240 |
+
- Database for user profiles
|
| 241 |
+
- API service deployment
|
| 242 |
+
|
| 243 |
+
|
| 244 |
+
ERROR HANDLING
|
| 245 |
+
--------------
|
| 246 |
+
|
| 247 |
+
Each agent includes:
|
| 248 |
+
- Try-catch blocks
|
| 249 |
+
- Error state tracking
|
| 250 |
+
- Graceful degradation
|
| 251 |
+
- Partial results on failure
|
| 252 |
+
- Error reporting in final output
|
| 253 |
+
|
| 254 |
+
|
| 255 |
+
MONITORING & LOGGING
|
| 256 |
+
--------------------
|
| 257 |
+
|
| 258 |
+
Current:
|
| 259 |
+
- Console print statements
|
| 260 |
+
- Agent start/completion messages
|
| 261 |
+
- Error messages
|
| 262 |
+
- Final output summary
|
| 263 |
+
|
| 264 |
+
Future Enhancement:
|
| 265 |
+
- Structured logging (logging module)
|
| 266 |
+
- Performance metrics
|
| 267 |
+
- API usage tracking
|
| 268 |
+
- User feedback collection
|
| 269 |
+
|
| 270 |
+
|
| 271 |
+
EXTENSIBILITY
|
| 272 |
+
-------------
|
| 273 |
+
|
| 274 |
+
Adding New Agent:
|
| 275 |
+
1. Create agent file in agents/
|
| 276 |
+
2. Add prompt template in prompts/
|
| 277 |
+
3. Create node function in workflow.py
|
| 278 |
+
4. Add node to graph
|
| 279 |
+
5. Define edges (connections)
|
| 280 |
+
6. Optional: Create I/O handler
|
| 281 |
+
|
| 282 |
+
Adding New Data Source:
|
| 283 |
+
1. Create vectorstore module in rag/
|
| 284 |
+
2. Add PDFs to data/ subdirectory
|
| 285 |
+
3. Build vectorstore
|
| 286 |
+
4. Create agent or modify existing
|
| 287 |
+
|
| 288 |
+
Adding New Tool:
|
| 289 |
+
1. Create tool in tools/
|
| 290 |
+
2. Import in agent
|
| 291 |
+
3. Use in agent logic
|
| 292 |
+
|
| 293 |
+
|
| 294 |
+
PERFORMANCE BENCHMARKS (Typical)
|
| 295 |
+
---------------------------------
|
| 296 |
+
|
| 297 |
+
Vectorstore Building:
|
| 298 |
+
- 10 PDFs: ~2-5 minutes
|
| 299 |
+
- 100 PDFs: ~20-30 minutes
|
| 300 |
+
|
| 301 |
+
Query Performance:
|
| 302 |
+
- Profiling: ~1-2 seconds
|
| 303 |
+
- RAG Search: ~0.5-1 second
|
| 304 |
+
- LLM Call: ~1-3 seconds
|
| 305 |
+
- Web Search: ~2-4 seconds
|
| 306 |
+
- Full Workflow: ~10-20 seconds
|
| 307 |
+
|
| 308 |
+
Memory Usage:
|
| 309 |
+
- Base: ~500 MB
|
| 310 |
+
- With models: ~2-3 GB
|
| 311 |
+
- With large PDFs: +500 MB per 100 PDFs
|
| 312 |
+
|
| 313 |
+
|
| 314 |
+
FUTURE ENHANCEMENTS
|
| 315 |
+
-------------------
|
| 316 |
+
|
| 317 |
+
1. Multilingual Support (Hindi, regional languages)
|
| 318 |
+
2. Voice input/output
|
| 319 |
+
3. Mobile app integration
|
| 320 |
+
4. Database for user history
|
| 321 |
+
5. Notification system for deadlines
|
| 322 |
+
6. Document upload interface
|
| 323 |
+
7. Real-time scheme updates
|
| 324 |
+
8. Community feedback integration
|
| 325 |
+
9. State-specific customization
|
| 326 |
+
10. Integration with government portals
|
| 327 |
+
|
| 328 |
+
|
| 329 |
+
END OF ARCHITECTURE DOCUMENT
|
| 330 |
+
"""
|
Dockerfile
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# HuggingFace Spaces Dockerfile
|
| 2 |
+
FROM python:3.12-slim
|
| 3 |
+
|
| 4 |
+
WORKDIR /app
|
| 5 |
+
|
| 6 |
+
# Install system dependencies
|
| 7 |
+
RUN apt-get update && apt-get install -y \
|
| 8 |
+
build-essential \
|
| 9 |
+
curl \
|
| 10 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 11 |
+
|
| 12 |
+
# Copy requirements first for better caching
|
| 13 |
+
COPY requirements.txt .
|
| 14 |
+
|
| 15 |
+
# Install Python dependencies
|
| 16 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 17 |
+
|
| 18 |
+
# Copy application files
|
| 19 |
+
COPY . .
|
| 20 |
+
|
| 21 |
+
# Expose port 7860 (HuggingFace Spaces default)
|
| 22 |
+
EXPOSE 7860
|
| 23 |
+
|
| 24 |
+
# Set environment variable for port
|
| 25 |
+
ENV PORT=7860
|
| 26 |
+
|
| 27 |
+
# Run the application
|
| 28 |
+
CMD ["python", "app.py"]
|
PROJECT_STRUCTURE.txt
ADDED
|
@@ -0,0 +1,387 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
JanSahayak - Multi-Agent Government Intelligence System
|
| 2 |
+
========================================================
|
| 3 |
+
|
| 4 |
+
📦 JanSahayak/
|
| 5 |
+
│
|
| 6 |
+
├── 📄 main.py # Main entry point
|
| 7 |
+
├── 📄 setup.py # Setup wizard & utilities
|
| 8 |
+
├── 📄 config.py # Configuration loader
|
| 9 |
+
├── 📄 requirements.txt # Python dependencies
|
| 10 |
+
│
|
| 11 |
+
├── 📄 README.md # Project overview
|
| 12 |
+
├── 📄 USAGE_GUIDE.md # Comprehensive usage guide
|
| 13 |
+
├── 📄 ARCHITECTURE.txt # System architecture
|
| 14 |
+
│
|
| 15 |
+
├── 📄 .env.example # Example environment file
|
| 16 |
+
├── 📄 .gitignore # Git ignore rules
|
| 17 |
+
│
|
| 18 |
+
├── 📁 agents/ # Agent modules
|
| 19 |
+
│ ├── __init__.py
|
| 20 |
+
│ ├── profiling_agent.py # 🧾 User profiling
|
| 21 |
+
│ ├── scheme_agent.py # 🏛️ Scheme recommendations
|
| 22 |
+
│ ├── exam_agent.py # 🎓 Exam recommendations
|
| 23 |
+
│ ├── search_agent.py # 🔎 Web search (Tavily)
|
| 24 |
+
│ ├── rag_agent.py # 📚 RAG retrieval
|
| 25 |
+
│ ├── document_agent.py # 📂 Document processing
|
| 26 |
+
│ └── benefit_agent.py # 💰 Benefit calculator
|
| 27 |
+
│
|
| 28 |
+
├── 📁 prompts/ # Prompt templates
|
| 29 |
+
│ ├── __init__.py
|
| 30 |
+
│ ├── profiling_prompt.py # Profiling instructions
|
| 31 |
+
│ ├── scheme_prompt.py # Scheme recommendation template
|
| 32 |
+
│ ├── exam_prompt.py # Exam recommendation template
|
| 33 |
+
│ └── rag_prompt.py # RAG retrieval template
|
| 34 |
+
│
|
| 35 |
+
├── 📁 rag/ # RAG system
|
| 36 |
+
│ ├── __init__.py
|
| 37 |
+
│ ├── embeddings.py # HuggingFace embeddings
|
| 38 |
+
│ ├── scheme_vectorstore.py # Scheme FAISS store
|
| 39 |
+
│ ├── exam_vectorstore.py # Exam FAISS store
|
| 40 |
+
│ ├── scheme_index/ # Generated vectorstore
|
| 41 |
+
│ │ ├── index.faiss
|
| 42 |
+
│ │ └── index.pkl
|
| 43 |
+
│ └── exam_index/ # Generated vectorstore
|
| 44 |
+
│ ├── index.faiss
|
| 45 |
+
│ └── index.pkl
|
| 46 |
+
│
|
| 47 |
+
├── 📁 tools/ # External tools
|
| 48 |
+
│ ├── __init__.py
|
| 49 |
+
│ └── tavily_tool.py # Tavily search integration
|
| 50 |
+
│
|
| 51 |
+
├── 📁 graph/ # Workflow orchestration
|
| 52 |
+
│ ├── __init__.py
|
| 53 |
+
│ └── workflow.py # LangGraph workflow
|
| 54 |
+
│
|
| 55 |
+
├── 📁 agent_io/ # Agent I/O handlers
|
| 56 |
+
│ ├── __init__.py
|
| 57 |
+
│ ├── profiling_io.py # Profiling I/O
|
| 58 |
+
│ ├── scheme_io.py # Scheme I/O
|
| 59 |
+
│ ├── exam_io.py # Exam I/O
|
| 60 |
+
│ └── benefit_io.py # Benefit I/O
|
| 61 |
+
│
|
| 62 |
+
├── 📁 data/ # PDF data
|
| 63 |
+
│ ├── schemes_pdfs/ # Government scheme PDFs
|
| 64 |
+
│ │ └── README.txt
|
| 65 |
+
│ └── exams_pdfs/ # Competitive exam PDFs
|
| 66 |
+
│ └── README.txt
|
| 67 |
+
│
|
| 68 |
+
└── 📁 outputs/ # Generated results
|
| 69 |
+
├── README.txt
|
| 70 |
+
└── results_*.json # Analysis results
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
KEY FILES DESCRIPTION
|
| 74 |
+
=====================
|
| 75 |
+
|
| 76 |
+
📄 main.py
|
| 77 |
+
----------
|
| 78 |
+
Main application entry point with:
|
| 79 |
+
- Interactive mode for user input
|
| 80 |
+
- File mode for batch processing
|
| 81 |
+
- Result saving and formatting
|
| 82 |
+
- Summary display
|
| 83 |
+
|
| 84 |
+
📄 setup.py
|
| 85 |
+
-----------
|
| 86 |
+
Setup wizard that:
|
| 87 |
+
- Checks dependencies
|
| 88 |
+
- Verifies API keys
|
| 89 |
+
- Validates PDF data
|
| 90 |
+
- Builds vectorstores
|
| 91 |
+
|
| 92 |
+
📄 config.py
|
| 93 |
+
------------
|
| 94 |
+
Loads configuration from .env:
|
| 95 |
+
- GROQ_API_KEY
|
| 96 |
+
- TAVILY_API_KEY
|
| 97 |
+
- HF_TOKEN
|
| 98 |
+
|
| 99 |
+
📁 agents/
|
| 100 |
+
----------
|
| 101 |
+
7 specialized agents:
|
| 102 |
+
1. profiling_agent.py → Extract user profile
|
| 103 |
+
2. scheme_agent.py → Recommend schemes
|
| 104 |
+
3. exam_agent.py → Recommend exams
|
| 105 |
+
4. search_agent.py → Live web search
|
| 106 |
+
5. rag_agent.py → Vector search
|
| 107 |
+
6. document_agent.py → Process PDFs/images
|
| 108 |
+
7. benefit_agent.py → Calculate missed benefits
|
| 109 |
+
|
| 110 |
+
📁 prompts/
|
| 111 |
+
-----------
|
| 112 |
+
Prompt engineering templates for:
|
| 113 |
+
- User profiling instructions
|
| 114 |
+
- Scheme recommendation format
|
| 115 |
+
- Exam recommendation format
|
| 116 |
+
- RAG retrieval guidance
|
| 117 |
+
|
| 118 |
+
📁 rag/
|
| 119 |
+
-------
|
| 120 |
+
RAG (Retrieval Augmented Generation) system:
|
| 121 |
+
- embeddings.py → HuggingFace embeddings
|
| 122 |
+
- scheme_vectorstore.py → Scheme database
|
| 123 |
+
- exam_vectorstore.py → Exam database
|
| 124 |
+
- *_index/ → Generated FAISS indexes
|
| 125 |
+
|
| 126 |
+
📁 tools/
|
| 127 |
+
---------
|
| 128 |
+
External tool integrations:
|
| 129 |
+
- tavily_tool.py → Tavily API for government website search
|
| 130 |
+
|
| 131 |
+
📁 graph/
|
| 132 |
+
---------
|
| 133 |
+
LangGraph workflow orchestration:
|
| 134 |
+
- workflow.py → Defines agent connections and execution flow
|
| 135 |
+
|
| 136 |
+
📁 agent_io/
|
| 137 |
+
------------
|
| 138 |
+
Input/Output handlers for each agent:
|
| 139 |
+
- Separate I/O files for tracking
|
| 140 |
+
- JSON-based data exchange
|
| 141 |
+
- Timestamp tracking
|
| 142 |
+
|
| 143 |
+
📁 data/
|
| 144 |
+
--------
|
| 145 |
+
Training data for RAG:
|
| 146 |
+
- schemes_pdfs/ → Government scheme documents
|
| 147 |
+
- exams_pdfs/ → Competitive exam documents
|
| 148 |
+
|
| 149 |
+
📁 outputs/
|
| 150 |
+
-----------
|
| 151 |
+
Generated analysis results:
|
| 152 |
+
- results_YYYYMMDD_HHMMSS.json
|
| 153 |
+
- Contains all agent outputs
|
| 154 |
+
|
| 155 |
+
|
| 156 |
+
WORKFLOW VISUALIZATION
|
| 157 |
+
======================
|
| 158 |
+
|
| 159 |
+
User Input (Text)
|
| 160 |
+
↓
|
| 161 |
+
┌───────────────┐
|
| 162 |
+
│ Profiling │
|
| 163 |
+
│ Agent │
|
| 164 |
+
└───────┬───────┘
|
| 165 |
+
│
|
| 166 |
+
Structured Profile
|
| 167 |
+
│
|
| 168 |
+
┌───────────────┼───────────────┐
|
| 169 |
+
↓ ↓
|
| 170 |
+
┌───────────────┐ ┌───────────────┐
|
| 171 |
+
│ Scheme │ │ Exam │
|
| 172 |
+
│ Agent │ │ Agent │
|
| 173 |
+
└───────┬───────┘ └───────┬───────┘
|
| 174 |
+
│ │
|
| 175 |
+
├─→ RAG Search ├─→ RAG Search
|
| 176 |
+
├─→ Web Search └─→ Web Search
|
| 177 |
+
↓ │
|
| 178 |
+
┌───────────────┐ │
|
| 179 |
+
│ Benefit │ │
|
| 180 |
+
│ Agent │ │
|
| 181 |
+
└───────┬───────┘ │
|
| 182 |
+
│ │
|
| 183 |
+
└───────────────┬───────────────┘
|
| 184 |
+
↓
|
| 185 |
+
┌───────────────┐
|
| 186 |
+
│ Final │
|
| 187 |
+
│ Output │
|
| 188 |
+
└───────────────┘
|
| 189 |
+
↓
|
| 190 |
+
JSON File
|
| 191 |
+
|
| 192 |
+
|
| 193 |
+
TECHNOLOGY COMPONENTS
|
| 194 |
+
=====================
|
| 195 |
+
|
| 196 |
+
🧠 Brain (LLM)
|
| 197 |
+
- Groq API (llama-3.3-70b-versatile)
|
| 198 |
+
- Fast inference (<2s per call)
|
| 199 |
+
- Powers all agents
|
| 200 |
+
|
| 201 |
+
📚 Memory (RAG)
|
| 202 |
+
- HuggingFace embeddings (all-MiniLM-L6-v2)
|
| 203 |
+
- FAISS vectorstore (CPU)
|
| 204 |
+
- Semantic search
|
| 205 |
+
|
| 206 |
+
🔍 Live Search
|
| 207 |
+
- Tavily API
|
| 208 |
+
- Government website focus
|
| 209 |
+
- Real-time information
|
| 210 |
+
|
| 211 |
+
🔗 Orchestration
|
| 212 |
+
- LangChain (agent framework)
|
| 213 |
+
- LangGraph (workflow)
|
| 214 |
+
- State management
|
| 215 |
+
|
| 216 |
+
📄 Document Processing
|
| 217 |
+
- PyPDF (PDF extraction)
|
| 218 |
+
- Pytesseract (OCR)
|
| 219 |
+
- Pillow (image handling)
|
| 220 |
+
|
| 221 |
+
|
| 222 |
+
QUICK START CHECKLIST
|
| 223 |
+
======================
|
| 224 |
+
|
| 225 |
+
□ 1. Install dependencies
|
| 226 |
+
pip install -r requirements.txt
|
| 227 |
+
|
| 228 |
+
□ 2. Create .env file
|
| 229 |
+
Copy .env.example to .env
|
| 230 |
+
Add GROQ_API_KEY and TAVILY_API_KEY
|
| 231 |
+
|
| 232 |
+
□ 3. Add PDF data
|
| 233 |
+
Place PDFs in data/schemes_pdfs/
|
| 234 |
+
Place PDFs in data/exams_pdfs/
|
| 235 |
+
|
| 236 |
+
□ 4. Run setup
|
| 237 |
+
python setup.py
|
| 238 |
+
|
| 239 |
+
□ 5. Build vectorstores
|
| 240 |
+
Automatic during setup, or:
|
| 241 |
+
python setup.py --build-vectorstores
|
| 242 |
+
|
| 243 |
+
□ 6. Run the system
|
| 244 |
+
python main.py
|
| 245 |
+
|
| 246 |
+
|
| 247 |
+
USAGE EXAMPLES
|
| 248 |
+
==============
|
| 249 |
+
|
| 250 |
+
Interactive Mode:
|
| 251 |
+
-----------------
|
| 252 |
+
$ python main.py
|
| 253 |
+
|
| 254 |
+
Enter your details:
|
| 255 |
+
I am 25 years old, male, from Maharashtra.
|
| 256 |
+
My family income is 3 lakh per year.
|
| 257 |
+
I belong to OBC category.
|
| 258 |
+
I completed Bachelor's in Engineering.
|
| 259 |
+
I am unemployed and looking for government jobs.
|
| 260 |
+
I am interested in technical and banking sectors.
|
| 261 |
+
|
| 262 |
+
[Press Enter twice to submit]
|
| 263 |
+
|
| 264 |
+
|
| 265 |
+
File Mode:
|
| 266 |
+
----------
|
| 267 |
+
$ python main.py user_input.txt
|
| 268 |
+
|
| 269 |
+
|
| 270 |
+
Testing Individual Agents:
|
| 271 |
+
---------------------------
|
| 272 |
+
# Test profiling
|
| 273 |
+
python -m agents.profiling_agent
|
| 274 |
+
|
| 275 |
+
# Test scheme agent
|
| 276 |
+
python -m agents.scheme_agent
|
| 277 |
+
|
| 278 |
+
# Test exam agent
|
| 279 |
+
python -m agents.exam_agent
|
| 280 |
+
|
| 281 |
+
|
| 282 |
+
Building Vectorstores:
|
| 283 |
+
-----------------------
|
| 284 |
+
python setup.py --build-vectorstores
|
| 285 |
+
|
| 286 |
+
Or in Python:
|
| 287 |
+
from rag.scheme_vectorstore import build_scheme_vectorstore
|
| 288 |
+
from rag.exam_vectorstore import build_exam_vectorstore
|
| 289 |
+
|
| 290 |
+
build_scheme_vectorstore()
|
| 291 |
+
build_exam_vectorstore()
|
| 292 |
+
|
| 293 |
+
|
| 294 |
+
OUTPUT FORMAT
|
| 295 |
+
=============
|
| 296 |
+
|
| 297 |
+
Generated file: outputs/results_20260302_143022.json
|
| 298 |
+
|
| 299 |
+
{
|
| 300 |
+
"user_profile": {
|
| 301 |
+
"age": 25,
|
| 302 |
+
"gender": "Male",
|
| 303 |
+
"state": "Maharashtra",
|
| 304 |
+
"income": "300000",
|
| 305 |
+
"caste": "OBC",
|
| 306 |
+
"education": "Bachelor's in Engineering",
|
| 307 |
+
"employment_status": "Unemployed",
|
| 308 |
+
"interests": "Technical, Banking"
|
| 309 |
+
},
|
| 310 |
+
"scheme_recommendations": "...",
|
| 311 |
+
"exam_recommendations": "...",
|
| 312 |
+
"missed_benefits_analysis": "...",
|
| 313 |
+
"errors": []
|
| 314 |
+
}
|
| 315 |
+
|
| 316 |
+
|
| 317 |
+
SYSTEM REQUIREMENTS
|
| 318 |
+
===================
|
| 319 |
+
|
| 320 |
+
✅ Python 3.8 or higher
|
| 321 |
+
✅ 4GB RAM minimum (8GB recommended)
|
| 322 |
+
✅ 2GB storage for dependencies
|
| 323 |
+
✅ Internet connection (for APIs)
|
| 324 |
+
✅ CPU only (no GPU needed)
|
| 325 |
+
|
| 326 |
+
|
| 327 |
+
API KEYS REQUIRED
|
| 328 |
+
=================
|
| 329 |
+
|
| 330 |
+
🔑 GROQ_API_KEY
|
| 331 |
+
Get from: https://console.groq.com/
|
| 332 |
+
Purpose: LLM inference
|
| 333 |
+
Cost: Free tier available
|
| 334 |
+
|
| 335 |
+
🔑 TAVILY_API_KEY
|
| 336 |
+
Get from: https://tavily.com/
|
| 337 |
+
Purpose: Web search
|
| 338 |
+
Cost: Free tier available
|
| 339 |
+
|
| 340 |
+
🔑 HF_TOKEN (Optional)
|
| 341 |
+
Get from: https://huggingface.co/settings/tokens
|
| 342 |
+
Purpose: Model downloads
|
| 343 |
+
Cost: Free
|
| 344 |
+
|
| 345 |
+
|
| 346 |
+
SUPPORT & DOCUMENTATION
|
| 347 |
+
========================
|
| 348 |
+
|
| 349 |
+
📖 Full Usage Guide: USAGE_GUIDE.md
|
| 350 |
+
🏗️ Architecture Details: ARCHITECTURE.txt
|
| 351 |
+
❓ Quick Start: README.md
|
| 352 |
+
🐛 Troubleshooting: See USAGE_GUIDE.md
|
| 353 |
+
|
| 354 |
+
For issues:
|
| 355 |
+
1. Check setup: python setup.py --check
|
| 356 |
+
2. Verify .env file has correct API keys
|
| 357 |
+
3. Ensure PDFs are in data/ directories
|
| 358 |
+
4. Rebuild vectorstores if needed
|
| 359 |
+
|
| 360 |
+
|
| 361 |
+
PROJECT STATUS
|
| 362 |
+
==============
|
| 363 |
+
|
| 364 |
+
✅ Core System: Complete
|
| 365 |
+
✅ All 7 Agents: Implemented
|
| 366 |
+
✅ RAG System: Functional
|
| 367 |
+
✅ Web Search: Integrated
|
| 368 |
+
✅ Workflow: Orchestrated
|
| 369 |
+
✅ I/O Handlers: Created
|
| 370 |
+
✅ Documentation: Comprehensive
|
| 371 |
+
|
| 372 |
+
Ready for deployment and testing!
|
| 373 |
+
|
| 374 |
+
|
| 375 |
+
NEXT STEPS
|
| 376 |
+
==========
|
| 377 |
+
|
| 378 |
+
1. Add your API keys to .env
|
| 379 |
+
2. Add government scheme and exam PDFs
|
| 380 |
+
3. Run setup wizard
|
| 381 |
+
4. Test the system
|
| 382 |
+
5. Customize prompts as needed
|
| 383 |
+
6. Add more PDF data over time
|
| 384 |
+
7. Monitor and improve
|
| 385 |
+
|
| 386 |
+
|
| 387 |
+
Happy Analyzing! 🎉
|
README.md
CHANGED
|
@@ -1,11 +1,238 @@
|
|
| 1 |
-
---
|
| 2 |
-
title:
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
-
sdk: docker
|
| 7 |
-
pinned: false
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: JanSahayak
|
| 3 |
+
emoji: 🙏
|
| 4 |
+
colorFrom: blue
|
| 5 |
+
colorTo: green
|
| 6 |
+
sdk: docker
|
| 7 |
+
pinned: false
|
| 8 |
+
---
|
| 9 |
+
|
| 10 |
+
# 🙏 JanSahayak - AI-Powered Government Schemes & Exams Assistant
|
| 11 |
+
|
| 12 |
+
> Your personal AI assistant for discovering government schemes and competitive exam opportunities in India
|
| 13 |
+
|
| 14 |
+
[](https://huggingface.co/spaces)
|
| 15 |
+
[](https://flask.palletsprojects.com/)
|
| 16 |
+
[](https://www.langchain.com/)
|
| 17 |
+
|
| 18 |
+
---
|
| 19 |
+
|
| 20 |
+
## 🌟 Features
|
| 21 |
+
|
| 22 |
+
### 🤖 Multi-Agent AI System
|
| 23 |
+
- **Profiling Agent**: Extracts structured user information
|
| 24 |
+
- **Scheme Agent**: Recommends relevant government schemes
|
| 25 |
+
- **Exam Agent**: Suggests competitive exams based on qualifications
|
| 26 |
+
- **RAG Agent**: Retrieves information from curated document database
|
| 27 |
+
|
| 28 |
+
### 💡 Intelligent Capabilities
|
| 29 |
+
- ✅ Natural language understanding of user profiles
|
| 30 |
+
- ✅ Smart recommendations based on eligibility criteria
|
| 31 |
+
- ✅ RAG (Retrieval-Augmented Generation) with FAISS vectorstore
|
| 32 |
+
- ✅ Real-time web search via Tavily API
|
| 33 |
+
- ✅ PDF generation for saving recommendations
|
| 34 |
+
- ✅ Beautiful web interface with modern UI
|
| 35 |
+
|
| 36 |
+
---
|
| 37 |
+
|
| 38 |
+
## 🚀 Deploy to Hugging Face Spaces (Recommended)
|
| 39 |
+
|
| 40 |
+
### Why Hugging Face Spaces?
|
| 41 |
+
- ✅ **16GB RAM for FREE** (perfect for RAG apps!)
|
| 42 |
+
- ✅ Built for ML/AI applications
|
| 43 |
+
- ✅ Git-based deployment
|
| 44 |
+
- ✅ Public URL instantly
|
| 45 |
+
- ✅ Persistent storage
|
| 46 |
+
|
| 47 |
+
### Quick Deploy Steps:
|
| 48 |
+
|
| 49 |
+
**Method 1: Using HF CLI (Easiest)**
|
| 50 |
+
|
| 51 |
+
```bash
|
| 52 |
+
# Install HF CLI
|
| 53 |
+
pip install huggingface_hub[cli]
|
| 54 |
+
|
| 55 |
+
# Login
|
| 56 |
+
huggingface-cli login
|
| 57 |
+
|
| 58 |
+
# Create Space and push
|
| 59 |
+
huggingface-cli repo create jansahayak --type space --space_sdk gradio
|
| 60 |
+
git remote add hf https://huggingface.co/spaces/YOUR_USERNAME/jansahayak
|
| 61 |
+
git push hf main
|
| 62 |
+
```
|
| 63 |
+
|
| 64 |
+
**Method 2: Manual Setup**
|
| 65 |
+
|
| 66 |
+
1. **Create Space** on [huggingface.co/spaces](https://huggingface.co/spaces)
|
| 67 |
+
- Click "Create new Space"
|
| 68 |
+
- Name: `jansahayak`
|
| 69 |
+
- SDK: Select "Gradio" (works with Flask)
|
| 70 |
+
- Hardware: CPU basic (Free - 16GB RAM!)
|
| 71 |
+
- License: MIT
|
| 72 |
+
|
| 73 |
+
2. **Clone YOUR Space repo** (not GitHub!)
|
| 74 |
+
```bash
|
| 75 |
+
git clone https://huggingface.co/spaces/YOUR_USERNAME/jansahayak
|
| 76 |
+
cd jansahayak
|
| 77 |
+
```
|
| 78 |
+
|
| 79 |
+
3. **Copy your project files**
|
| 80 |
+
```bash
|
| 81 |
+
# Copy all files from your JanSahayak folder to the cloned space folder
|
| 82 |
+
cp -r /path/to/JanSahayak/* .
|
| 83 |
+
```
|
| 84 |
+
|
| 85 |
+
4. **Add Environment Variables** (Space Settings → Variables and secrets)
|
| 86 |
+
```
|
| 87 |
+
GROQ_API_KEY=your_groq_key
|
| 88 |
+
TAVILY_API_KEY=your_tavily_key
|
| 89 |
+
HF_TOKEN=your_hf_token (optional)
|
| 90 |
+
SKIP_VECTORSTORES=false
|
| 91 |
+
```
|
| 92 |
+
|
| 93 |
+
5. **Push to Space**
|
| 94 |
+
```bash
|
| 95 |
+
git add .
|
| 96 |
+
git commit -m "Initial commit"
|
| 97 |
+
git push
|
| 98 |
+
```
|
| 99 |
+
|
| 100 |
+
Your app will be live at: `https://huggingface.co/spaces/YOUR_USERNAME/jansahayak`
|
| 101 |
+
|
| 102 |
+
### Important Notes:
|
| 103 |
+
- HF Spaces uses its own Git repo (not GitHub directly)
|
| 104 |
+
- App runs on port 7860 by default (Flask uses 5000, update if needed)
|
| 105 |
+
- First deployment may take 5-10 minutes to install dependencies
|
| 106 |
+
- Check Space logs if deployment fails
|
| 107 |
+
|
| 108 |
+
---
|
| 109 |
+
|
| 110 |
+
## 🛠️ Local Development
|
| 111 |
+
|
| 112 |
+
```bash
|
| 113 |
+
# Clone and setup
|
| 114 |
+
git clone https://github.com/YOUR_USERNAME/JanSahayak.git
|
| 115 |
+
cd JanSahayak
|
| 116 |
+
|
| 117 |
+
# Create virtual environment
|
| 118 |
+
python -m venv .venv
|
| 119 |
+
source .venv/bin/activate # Linux/Mac
|
| 120 |
+
.venv\Scripts\activate # Windows
|
| 121 |
+
|
| 122 |
+
# Install dependencies
|
| 123 |
+
pip install -r requirements.txt
|
| 124 |
+
|
| 125 |
+
# Configure API keys
|
| 126 |
+
cp .env.example .env
|
| 127 |
+
# Edit .env with your keys
|
| 128 |
+
|
| 129 |
+
# Build vectorstores (optional - if you have PDFs)
|
| 130 |
+
python init_embeddings.py
|
| 131 |
+
|
| 132 |
+
# Run app
|
| 133 |
+
python app.py
|
| 134 |
+
# or use launcher scripts: start_web.bat (Windows) / ./start_web.sh (Linux/Mac)
|
| 135 |
+
```
|
| 136 |
+
|
| 137 |
+
Visit `http://localhost:5000`
|
| 138 |
+
|
| 139 |
+
---
|
| 140 |
+
|
| 141 |
+
## 🔑 Get API Keys
|
| 142 |
+
|
| 143 |
+
| Service | URL | Free Tier | Used For |
|
| 144 |
+
|---------|-----|-----------|----------|
|
| 145 |
+
| **Groq** | [console.groq.com](https://console.groq.com) | ✅ Yes | LLM Inference |
|
| 146 |
+
| **Tavily** | [tavily.com](https://tavily.com) | 1000 searches/mo | Web Search |
|
| 147 |
+
| **HuggingFace** | [huggingface.co/settings/tokens](https://huggingface.co/settings/tokens) | ✅ Yes | Model Downloads |
|
| 148 |
+
|
| 149 |
+
---
|
| 150 |
+
|
| 151 |
+
## 💾 Adding Custom Documents
|
| 152 |
+
|
| 153 |
+
### Government Schemes PDFs
|
| 154 |
+
1. Place PDFs in `data/schemes_pdfs/`
|
| 155 |
+
2. Run `python init_embeddings.py`
|
| 156 |
+
3. Restart app
|
| 157 |
+
|
| 158 |
+
### Exam Information PDFs
|
| 159 |
+
1. Place PDFs in `data/exams_pdfs/`
|
| 160 |
+
2. Run `python init_embeddings.py`
|
| 161 |
+
3. Restart app
|
| 162 |
+
|
| 163 |
+
Automatically indexed and searchable via RAG!
|
| 164 |
+
|
| 165 |
+
---
|
| 166 |
+
|
| 167 |
+
## 🧪 Technology Stack
|
| 168 |
+
|
| 169 |
+
- **Backend**: Flask
|
| 170 |
+
- **AI**: LangChain + LangGraph
|
| 171 |
+
- **LLM**: Groq (Llama 3.3 70B)
|
| 172 |
+
- **Embeddings**: sentence-transformers/all-MiniLM-L6-v2
|
| 173 |
+
- **Vector DB**: FAISS (local)
|
| 174 |
+
- **Search**: Tavily API
|
| 175 |
+
- **Frontend**: HTML5 + CSS3 + JavaScript
|
| 176 |
+
|
| 177 |
+
---
|
| 178 |
+
|
| 179 |
+
## 📁 Project Structure
|
| 180 |
+
|
| 181 |
+
```
|
| 182 |
+
JanSahayak/
|
| 183 |
+
├── app.py # Flask web app
|
| 184 |
+
├── main.py # CLI interface
|
| 185 |
+
├── agents/ # AI agents
|
| 186 |
+
│ ├── profiling_agent.py
|
| 187 |
+
│ ├── scheme_agent.py
|
| 188 |
+
│ ├── exam_agent.py
|
| 189 |
+
│ └── rag_agent.py
|
| 190 |
+
├── rag/ # RAG components
|
| 191 |
+
│ ├── embeddings.py
|
| 192 |
+
│ ├── scheme_vectorstore.py
|
| 193 |
+
│ └── exam_vectorstore.py
|
| 194 |
+
├── data/ # Documents
|
| 195 |
+
│ ├── schemes_pdfs/
|
| 196 |
+
│ └── exams_pdfs/
|
| 197 |
+
├── templates/ # HTML templates
|
| 198 |
+
└── static/ # CSS/JS
|
| 199 |
+
```
|
| 200 |
+
|
| 201 |
+
---
|
| 202 |
+
|
| 203 |
+
## 🐛 Troubleshooting
|
| 204 |
+
|
| 205 |
+
**Memory issues on local machine?**
|
| 206 |
+
```env
|
| 207 |
+
# Set in .env
|
| 208 |
+
SKIP_VECTORSTORES=true
|
| 209 |
+
```
|
| 210 |
+
Uses web search only (no embeddings needed)
|
| 211 |
+
|
| 212 |
+
**Vectorstore errors?**
|
| 213 |
+
```bash
|
| 214 |
+
rm -rf rag/scheme_index rag/exam_index
|
| 215 |
+
python init_embeddings.py
|
| 216 |
+
```
|
| 217 |
+
|
| 218 |
+
---
|
| 219 |
+
|
| 220 |
+
## 🤝 Contributing
|
| 221 |
+
|
| 222 |
+
Contributions welcome! Fork → Create branch → Submit PR
|
| 223 |
+
|
| 224 |
+
---
|
| 225 |
+
|
| 226 |
+
## 📜 License
|
| 227 |
+
|
| 228 |
+
MIT License
|
| 229 |
+
|
| 230 |
+
---
|
| 231 |
+
|
| 232 |
+
## 🙏 Acknowledgments
|
| 233 |
+
|
| 234 |
+
Built with [LangChain](https://www.langchain.com/), [Groq](https://groq.com/), [Tavily](https://tavily.com/), and ❤️
|
| 235 |
+
|
| 236 |
+
---
|
| 237 |
+
|
| 238 |
+
Made for the people of India 🇮🇳
|
agent_io/__init__.py
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Agent I/O Module Init
|
| 3 |
+
"""
|
agent_io/benefit_io.py
ADDED
|
@@ -0,0 +1,117 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Benefit Agent I/O Handler
|
| 3 |
+
Manages input/output for missed benefits calculator agent
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import json
|
| 7 |
+
import os
|
| 8 |
+
from datetime import datetime
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
class BenefitIO:
|
| 12 |
+
"""Handles input/output operations for benefit calculator agent"""
|
| 13 |
+
|
| 14 |
+
def __init__(self, input_file: str = "agent_io/benefit_input.json",
|
| 15 |
+
output_file: str = "agent_io/benefit_output.json"):
|
| 16 |
+
self.input_file = input_file
|
| 17 |
+
self.output_file = output_file
|
| 18 |
+
self._ensure_directory()
|
| 19 |
+
|
| 20 |
+
def _ensure_directory(self):
|
| 21 |
+
"""Create agent_io directory if it doesn't exist"""
|
| 22 |
+
os.makedirs(os.path.dirname(self.input_file), exist_ok=True)
|
| 23 |
+
|
| 24 |
+
def read_input(self) -> dict:
|
| 25 |
+
"""
|
| 26 |
+
Read benefit calculator input from file
|
| 27 |
+
|
| 28 |
+
Returns:
|
| 29 |
+
Input configuration dictionary
|
| 30 |
+
"""
|
| 31 |
+
try:
|
| 32 |
+
if os.path.exists(self.input_file):
|
| 33 |
+
with open(self.input_file, 'r', encoding='utf-8') as f:
|
| 34 |
+
return json.load(f)
|
| 35 |
+
else:
|
| 36 |
+
return {"error": "Input file not found"}
|
| 37 |
+
except Exception as e:
|
| 38 |
+
return {"error": str(e)}
|
| 39 |
+
|
| 40 |
+
def write_input(self, profile_data: dict, scheme_recommendations: str, years: int = 5):
|
| 41 |
+
"""
|
| 42 |
+
Write input for benefit calculator
|
| 43 |
+
|
| 44 |
+
Args:
|
| 45 |
+
profile_data: User profile dictionary
|
| 46 |
+
scheme_recommendations: Eligible schemes text
|
| 47 |
+
years: Number of years to calculate (default: 5)
|
| 48 |
+
"""
|
| 49 |
+
input_data = {
|
| 50 |
+
"timestamp": datetime.now().isoformat(),
|
| 51 |
+
"profile": profile_data,
|
| 52 |
+
"scheme_recommendations": scheme_recommendations,
|
| 53 |
+
"calculation_years": years,
|
| 54 |
+
"agent": "benefit_calculator"
|
| 55 |
+
}
|
| 56 |
+
|
| 57 |
+
with open(self.input_file, 'w', encoding='utf-8') as f:
|
| 58 |
+
json.dump(input_data, f, indent=2, ensure_ascii=False)
|
| 59 |
+
|
| 60 |
+
def write_output(self, calculation: dict, metadata: dict = None):
|
| 61 |
+
"""
|
| 62 |
+
Write benefit calculation to output file
|
| 63 |
+
|
| 64 |
+
Args:
|
| 65 |
+
calculation: Missed benefits calculation
|
| 66 |
+
metadata: Optional metadata about calculation
|
| 67 |
+
"""
|
| 68 |
+
output_data = {
|
| 69 |
+
"timestamp": datetime.now().isoformat(),
|
| 70 |
+
"calculation": calculation,
|
| 71 |
+
"metadata": metadata or {},
|
| 72 |
+
"agent": "benefit_calculator"
|
| 73 |
+
}
|
| 74 |
+
|
| 75 |
+
with open(self.output_file, 'w', encoding='utf-8') as f:
|
| 76 |
+
json.dump(output_data, f, indent=2, ensure_ascii=False)
|
| 77 |
+
|
| 78 |
+
def read_output(self) -> dict:
|
| 79 |
+
"""
|
| 80 |
+
Read previous benefit calculations
|
| 81 |
+
|
| 82 |
+
Returns:
|
| 83 |
+
Previous calculations dictionary
|
| 84 |
+
"""
|
| 85 |
+
try:
|
| 86 |
+
if os.path.exists(self.output_file):
|
| 87 |
+
with open(self.output_file, 'r', encoding='utf-8') as f:
|
| 88 |
+
return json.load(f)
|
| 89 |
+
else:
|
| 90 |
+
return {"error": "Output file not found"}
|
| 91 |
+
except Exception as e:
|
| 92 |
+
return {"error": str(e)}
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
if __name__ == "__main__":
|
| 96 |
+
# Test BenefitIO
|
| 97 |
+
io = BenefitIO()
|
| 98 |
+
|
| 99 |
+
# Sample input
|
| 100 |
+
profile = {
|
| 101 |
+
"age": 25,
|
| 102 |
+
"income": "300000"
|
| 103 |
+
}
|
| 104 |
+
|
| 105 |
+
schemes = "PM Kisan: ₹6000/year"
|
| 106 |
+
|
| 107 |
+
io.write_input(profile, schemes, years=5)
|
| 108 |
+
print("Input written successfully")
|
| 109 |
+
|
| 110 |
+
# Sample output
|
| 111 |
+
calculation = {
|
| 112 |
+
"total_missed": "₹30,000",
|
| 113 |
+
"breakdown": {"2022": "₹6000", "2023": "₹6000"}
|
| 114 |
+
}
|
| 115 |
+
|
| 116 |
+
io.write_output(calculation)
|
| 117 |
+
print("Output written successfully")
|
agent_io/exam_io.py
ADDED
|
@@ -0,0 +1,115 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Exam Agent I/O Handler
|
| 3 |
+
Manages input/output for exam recommendation agent
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import json
|
| 7 |
+
import os
|
| 8 |
+
from datetime import datetime
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
class ExamIO:
|
| 12 |
+
"""Handles input/output operations for exam agent"""
|
| 13 |
+
|
| 14 |
+
def __init__(self, input_file: str = "agent_io/exam_input.json",
|
| 15 |
+
output_file: str = "agent_io/exam_output.json"):
|
| 16 |
+
self.input_file = input_file
|
| 17 |
+
self.output_file = output_file
|
| 18 |
+
self._ensure_directory()
|
| 19 |
+
|
| 20 |
+
def _ensure_directory(self):
|
| 21 |
+
"""Create agent_io directory if it doesn't exist"""
|
| 22 |
+
os.makedirs(os.path.dirname(self.input_file), exist_ok=True)
|
| 23 |
+
|
| 24 |
+
def read_input(self) -> dict:
|
| 25 |
+
"""
|
| 26 |
+
Read exam agent input from file
|
| 27 |
+
|
| 28 |
+
Returns:
|
| 29 |
+
Input configuration dictionary
|
| 30 |
+
"""
|
| 31 |
+
try:
|
| 32 |
+
if os.path.exists(self.input_file):
|
| 33 |
+
with open(self.input_file, 'r', encoding='utf-8') as f:
|
| 34 |
+
return json.load(f)
|
| 35 |
+
else:
|
| 36 |
+
return {"error": "Input file not found"}
|
| 37 |
+
except Exception as e:
|
| 38 |
+
return {"error": str(e)}
|
| 39 |
+
|
| 40 |
+
def write_input(self, profile_data: dict, preferences: dict = None):
|
| 41 |
+
"""
|
| 42 |
+
Write input for exam agent
|
| 43 |
+
|
| 44 |
+
Args:
|
| 45 |
+
profile_data: Student profile dictionary
|
| 46 |
+
preferences: Optional student preferences
|
| 47 |
+
"""
|
| 48 |
+
input_data = {
|
| 49 |
+
"timestamp": datetime.now().isoformat(),
|
| 50 |
+
"profile": profile_data,
|
| 51 |
+
"preferences": preferences or {},
|
| 52 |
+
"agent": "exam_recommendation"
|
| 53 |
+
}
|
| 54 |
+
|
| 55 |
+
with open(self.input_file, 'w', encoding='utf-8') as f:
|
| 56 |
+
json.dump(input_data, f, indent=2, ensure_ascii=False)
|
| 57 |
+
|
| 58 |
+
def write_output(self, recommendations: dict, metadata: dict = None):
|
| 59 |
+
"""
|
| 60 |
+
Write exam recommendations to output file
|
| 61 |
+
|
| 62 |
+
Args:
|
| 63 |
+
recommendations: Exam recommendations from agent
|
| 64 |
+
metadata: Optional metadata about the recommendation process
|
| 65 |
+
"""
|
| 66 |
+
output_data = {
|
| 67 |
+
"timestamp": datetime.now().isoformat(),
|
| 68 |
+
"recommendations": recommendations,
|
| 69 |
+
"metadata": metadata or {},
|
| 70 |
+
"agent": "exam_recommendation"
|
| 71 |
+
}
|
| 72 |
+
|
| 73 |
+
with open(self.output_file, 'w', encoding='utf-8') as f:
|
| 74 |
+
json.dump(output_data, f, indent=2, ensure_ascii=False)
|
| 75 |
+
|
| 76 |
+
def read_output(self) -> dict:
|
| 77 |
+
"""
|
| 78 |
+
Read previous exam recommendations
|
| 79 |
+
|
| 80 |
+
Returns:
|
| 81 |
+
Previous recommendations dictionary
|
| 82 |
+
"""
|
| 83 |
+
try:
|
| 84 |
+
if os.path.exists(self.output_file):
|
| 85 |
+
with open(self.output_file, 'r', encoding='utf-8') as f:
|
| 86 |
+
return json.load(f)
|
| 87 |
+
else:
|
| 88 |
+
return {"error": "Output file not found"}
|
| 89 |
+
except Exception as e:
|
| 90 |
+
return {"error": str(e)}
|
| 91 |
+
|
| 92 |
+
|
| 93 |
+
if __name__ == "__main__":
|
| 94 |
+
# Test ExamIO
|
| 95 |
+
io = ExamIO()
|
| 96 |
+
|
| 97 |
+
# Sample input
|
| 98 |
+
profile = {
|
| 99 |
+
"age": 25,
|
| 100 |
+
"education": "Bachelor's in Engineering",
|
| 101 |
+
"interests": "Technical jobs"
|
| 102 |
+
}
|
| 103 |
+
|
| 104 |
+
io.write_input(profile, {"exam_type": "government"})
|
| 105 |
+
print("Input written successfully")
|
| 106 |
+
|
| 107 |
+
# Sample output
|
| 108 |
+
recommendations = {
|
| 109 |
+
"exams": [
|
| 110 |
+
{"name": "SSC CGL", "eligibility": "Graduate"}
|
| 111 |
+
]
|
| 112 |
+
}
|
| 113 |
+
|
| 114 |
+
io.write_output(recommendations, {"sources": 5})
|
| 115 |
+
print("Output written successfully")
|
agent_io/profiling_io.py
ADDED
|
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Profiling Agent I/O Handler
|
| 3 |
+
Manages input/output for user profiling agent
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import json
|
| 7 |
+
import os
|
| 8 |
+
from datetime import datetime
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
class ProfilingIO:
|
| 12 |
+
"""Handles input/output operations for profiling agent"""
|
| 13 |
+
|
| 14 |
+
def __init__(self, input_file: str = "agent_io/profiling_input.json",
|
| 15 |
+
output_file: str = "agent_io/profiling_output.json"):
|
| 16 |
+
self.input_file = input_file
|
| 17 |
+
self.output_file = output_file
|
| 18 |
+
self._ensure_directory()
|
| 19 |
+
|
| 20 |
+
def _ensure_directory(self):
|
| 21 |
+
"""Create agent_io directory if it doesn't exist"""
|
| 22 |
+
os.makedirs(os.path.dirname(self.input_file), exist_ok=True)
|
| 23 |
+
|
| 24 |
+
def read_input(self) -> dict:
|
| 25 |
+
"""
|
| 26 |
+
Read profiling agent input from file
|
| 27 |
+
|
| 28 |
+
Returns:
|
| 29 |
+
Raw user input dictionary
|
| 30 |
+
"""
|
| 31 |
+
try:
|
| 32 |
+
if os.path.exists(self.input_file):
|
| 33 |
+
with open(self.input_file, 'r', encoding='utf-8') as f:
|
| 34 |
+
return json.load(f)
|
| 35 |
+
else:
|
| 36 |
+
return {"error": "Input file not found"}
|
| 37 |
+
except Exception as e:
|
| 38 |
+
return {"error": str(e)}
|
| 39 |
+
|
| 40 |
+
def write_input(self, user_input: str, documents: list = None):
|
| 41 |
+
"""
|
| 42 |
+
Write raw user input for profiling
|
| 43 |
+
|
| 44 |
+
Args:
|
| 45 |
+
user_input: Raw text input from user
|
| 46 |
+
documents: Optional list of uploaded documents
|
| 47 |
+
"""
|
| 48 |
+
input_data = {
|
| 49 |
+
"timestamp": datetime.now().isoformat(),
|
| 50 |
+
"user_input": user_input,
|
| 51 |
+
"documents": documents or [],
|
| 52 |
+
"agent": "user_profiling"
|
| 53 |
+
}
|
| 54 |
+
|
| 55 |
+
with open(self.input_file, 'w', encoding='utf-8') as f:
|
| 56 |
+
json.dump(input_data, f, indent=2, ensure_ascii=False)
|
| 57 |
+
|
| 58 |
+
def write_output(self, profile_data: dict, confidence: dict = None):
|
| 59 |
+
"""
|
| 60 |
+
Write extracted profile to output file
|
| 61 |
+
|
| 62 |
+
Args:
|
| 63 |
+
profile_data: Structured profile data
|
| 64 |
+
confidence: Optional confidence scores for extracted fields
|
| 65 |
+
"""
|
| 66 |
+
output_data = {
|
| 67 |
+
"timestamp": datetime.now().isoformat(),
|
| 68 |
+
"profile": profile_data,
|
| 69 |
+
"confidence": confidence or {},
|
| 70 |
+
"agent": "user_profiling"
|
| 71 |
+
}
|
| 72 |
+
|
| 73 |
+
with open(self.output_file, 'w', encoding='utf-8') as f:
|
| 74 |
+
json.dump(output_data, f, indent=2, ensure_ascii=False)
|
| 75 |
+
|
| 76 |
+
def read_output(self) -> dict:
|
| 77 |
+
"""
|
| 78 |
+
Read extracted profile
|
| 79 |
+
|
| 80 |
+
Returns:
|
| 81 |
+
Structured profile dictionary
|
| 82 |
+
"""
|
| 83 |
+
try:
|
| 84 |
+
if os.path.exists(self.output_file):
|
| 85 |
+
with open(self.output_file, 'r', encoding='utf-8') as f:
|
| 86 |
+
return json.load(f)
|
| 87 |
+
else:
|
| 88 |
+
return {"error": "Output file not found"}
|
| 89 |
+
except Exception as e:
|
| 90 |
+
return {"error": str(e)}
|
| 91 |
+
|
| 92 |
+
|
| 93 |
+
if __name__ == "__main__":
|
| 94 |
+
# Test ProfilingIO
|
| 95 |
+
io = ProfilingIO()
|
| 96 |
+
|
| 97 |
+
# Sample input
|
| 98 |
+
user_text = "I am 25 years old from Maharashtra, OBC category, income 3 lakh."
|
| 99 |
+
io.write_input(user_text, documents=["resume.pdf"])
|
| 100 |
+
print("Input written successfully")
|
| 101 |
+
|
| 102 |
+
# Sample output
|
| 103 |
+
profile = {
|
| 104 |
+
"age": 25,
|
| 105 |
+
"state": "Maharashtra",
|
| 106 |
+
"caste": "OBC",
|
| 107 |
+
"income": "300000"
|
| 108 |
+
}
|
| 109 |
+
|
| 110 |
+
io.write_output(profile, confidence={"age": 1.0, "state": 1.0})
|
| 111 |
+
print("Output written successfully")
|
agent_io/scheme_io.py
ADDED
|
@@ -0,0 +1,116 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Scheme Agent I/O Handler
|
| 3 |
+
Manages input/output for scheme recommendation agent
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import json
|
| 7 |
+
import os
|
| 8 |
+
from datetime import datetime
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
class SchemeIO:
|
| 12 |
+
"""Handles input/output operations for scheme agent"""
|
| 13 |
+
|
| 14 |
+
def __init__(self, input_file: str = "agent_io/scheme_input.json",
|
| 15 |
+
output_file: str = "agent_io/scheme_output.json"):
|
| 16 |
+
self.input_file = input_file
|
| 17 |
+
self.output_file = output_file
|
| 18 |
+
self._ensure_directory()
|
| 19 |
+
|
| 20 |
+
def _ensure_directory(self):
|
| 21 |
+
"""Create agent_io directory if it doesn't exist"""
|
| 22 |
+
os.makedirs(os.path.dirname(self.input_file), exist_ok=True)
|
| 23 |
+
|
| 24 |
+
def read_input(self) -> dict:
|
| 25 |
+
"""
|
| 26 |
+
Read scheme agent input from file
|
| 27 |
+
|
| 28 |
+
Returns:
|
| 29 |
+
Input configuration dictionary
|
| 30 |
+
"""
|
| 31 |
+
try:
|
| 32 |
+
if os.path.exists(self.input_file):
|
| 33 |
+
with open(self.input_file, 'r', encoding='utf-8') as f:
|
| 34 |
+
return json.load(f)
|
| 35 |
+
else:
|
| 36 |
+
return {"error": "Input file not found"}
|
| 37 |
+
except Exception as e:
|
| 38 |
+
return {"error": str(e)}
|
| 39 |
+
|
| 40 |
+
def write_input(self, profile_data: dict, preferences: dict = None):
|
| 41 |
+
"""
|
| 42 |
+
Write input for scheme agent
|
| 43 |
+
|
| 44 |
+
Args:
|
| 45 |
+
profile_data: User profile dictionary
|
| 46 |
+
preferences: Optional user preferences
|
| 47 |
+
"""
|
| 48 |
+
input_data = {
|
| 49 |
+
"timestamp": datetime.now().isoformat(),
|
| 50 |
+
"profile": profile_data,
|
| 51 |
+
"preferences": preferences or {},
|
| 52 |
+
"agent": "scheme_recommendation"
|
| 53 |
+
}
|
| 54 |
+
|
| 55 |
+
with open(self.input_file, 'w', encoding='utf-8') as f:
|
| 56 |
+
json.dump(input_data, f, indent=2, ensure_ascii=False)
|
| 57 |
+
|
| 58 |
+
def write_output(self, recommendations: dict, metadata: dict = None):
|
| 59 |
+
"""
|
| 60 |
+
Write scheme recommendations to output file
|
| 61 |
+
|
| 62 |
+
Args:
|
| 63 |
+
recommendations: Scheme recommendations from agent
|
| 64 |
+
metadata: Optional metadata about the recommendation process
|
| 65 |
+
"""
|
| 66 |
+
output_data = {
|
| 67 |
+
"timestamp": datetime.now().isoformat(),
|
| 68 |
+
"recommendations": recommendations,
|
| 69 |
+
"metadata": metadata or {},
|
| 70 |
+
"agent": "scheme_recommendation"
|
| 71 |
+
}
|
| 72 |
+
|
| 73 |
+
with open(self.output_file, 'w', encoding='utf-8') as f:
|
| 74 |
+
json.dump(output_data, f, indent=2, ensure_ascii=False)
|
| 75 |
+
|
| 76 |
+
def read_output(self) -> dict:
|
| 77 |
+
"""
|
| 78 |
+
Read previous scheme recommendations
|
| 79 |
+
|
| 80 |
+
Returns:
|
| 81 |
+
Previous recommendations dictionary
|
| 82 |
+
"""
|
| 83 |
+
try:
|
| 84 |
+
if os.path.exists(self.output_file):
|
| 85 |
+
with open(self.output_file, 'r', encoding='utf-8') as f:
|
| 86 |
+
return json.load(f)
|
| 87 |
+
else:
|
| 88 |
+
return {"error": "Output file not found"}
|
| 89 |
+
except Exception as e:
|
| 90 |
+
return {"error": str(e)}
|
| 91 |
+
|
| 92 |
+
|
| 93 |
+
if __name__ == "__main__":
|
| 94 |
+
# Test SchemeIO
|
| 95 |
+
io = SchemeIO()
|
| 96 |
+
|
| 97 |
+
# Sample input
|
| 98 |
+
profile = {
|
| 99 |
+
"age": 25,
|
| 100 |
+
"income": "300000",
|
| 101 |
+
"state": "Maharashtra",
|
| 102 |
+
"caste": "OBC"
|
| 103 |
+
}
|
| 104 |
+
|
| 105 |
+
io.write_input(profile, {"priority": "high_benefit"})
|
| 106 |
+
print("Input written successfully")
|
| 107 |
+
|
| 108 |
+
# Sample output
|
| 109 |
+
recommendations = {
|
| 110 |
+
"schemes": [
|
| 111 |
+
{"name": "PM Kisan", "benefit": "₹6000/year"}
|
| 112 |
+
]
|
| 113 |
+
}
|
| 114 |
+
|
| 115 |
+
io.write_output(recommendations, {"sources": 5})
|
| 116 |
+
print("Output written successfully")
|
agents/__init__.py
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Agents Module Init
|
| 3 |
+
"""
|
agents/benefit_agent.py
ADDED
|
@@ -0,0 +1,213 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Missed Benefits Calculator Agent
|
| 3 |
+
Estimates potential benefits user might have missed
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import json
|
| 7 |
+
from langchain_groq import ChatGroq
|
| 8 |
+
from langchain_core.messages import HumanMessage, SystemMessage
|
| 9 |
+
from config import GROQ_API_KEY
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def get_llm():
|
| 13 |
+
"""Initialize Groq LLM"""
|
| 14 |
+
if not GROQ_API_KEY:
|
| 15 |
+
raise ValueError("GROQ_API_KEY not found in environment variables")
|
| 16 |
+
|
| 17 |
+
return ChatGroq(
|
| 18 |
+
api_key=GROQ_API_KEY,
|
| 19 |
+
model="llama-3.3-70b-versatile",
|
| 20 |
+
temperature=0.2
|
| 21 |
+
)
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
def calculate_missed_benefits(profile_data: dict, scheme_recommendations: str) -> dict:
|
| 25 |
+
"""
|
| 26 |
+
Calculates potential benefits the user might have missed in the past
|
| 27 |
+
|
| 28 |
+
Args:
|
| 29 |
+
profile_data: User profile dictionary
|
| 30 |
+
scheme_recommendations: Recommended schemes text
|
| 31 |
+
|
| 32 |
+
Returns:
|
| 33 |
+
Dictionary with missed benefits calculation
|
| 34 |
+
"""
|
| 35 |
+
try:
|
| 36 |
+
llm = get_llm()
|
| 37 |
+
|
| 38 |
+
profile_str = json.dumps(profile_data, indent=2)
|
| 39 |
+
|
| 40 |
+
prompt = f"""
|
| 41 |
+
You are a financial analyst specializing in Indian government welfare schemes.
|
| 42 |
+
|
| 43 |
+
Based on the user's profile and recommended schemes, calculate how much money/benefits
|
| 44 |
+
they might have missed in the past 5 years by not applying to eligible schemes.
|
| 45 |
+
|
| 46 |
+
**USER PROFILE:**
|
| 47 |
+
{profile_str}
|
| 48 |
+
|
| 49 |
+
**RECOMMENDED SCHEMES:**
|
| 50 |
+
{scheme_recommendations}
|
| 51 |
+
|
| 52 |
+
**ANALYSIS REQUIREMENTS:**
|
| 53 |
+
|
| 54 |
+
1. **Identify Eligible Schemes:**
|
| 55 |
+
- List schemes user was eligible for in past 5 years
|
| 56 |
+
- Consider age, income, education criteria over time
|
| 57 |
+
|
| 58 |
+
2. **Calculate Monetary Benefits:**
|
| 59 |
+
- One-time payments missed
|
| 60 |
+
- Annual recurring benefits missed
|
| 61 |
+
- Subsidies or discounts not availed
|
| 62 |
+
- Total missed amount (conservative estimate)
|
| 63 |
+
|
| 64 |
+
3. **Non-Monetary Benefits:**
|
| 65 |
+
- Training opportunities missed
|
| 66 |
+
- Healthcare benefits not utilized
|
| 67 |
+
- Educational scholarships lost
|
| 68 |
+
- Employment opportunities missed
|
| 69 |
+
|
| 70 |
+
4. **Year-wise Breakdown:**
|
| 71 |
+
- Provide year-wise missed benefit estimate
|
| 72 |
+
- Account for scheme start dates
|
| 73 |
+
- Consider eligibility changes over time
|
| 74 |
+
|
| 75 |
+
5. **Actionable Insights:**
|
| 76 |
+
- Can any benefits be claimed retroactively?
|
| 77 |
+
- Which schemes should be applied immediately?
|
| 78 |
+
- Priority ranking for current applications
|
| 79 |
+
|
| 80 |
+
**OUTPUT FORMAT:**
|
| 81 |
+
|
| 82 |
+
### Total Missed Benefits (Past 5 Years)
|
| 83 |
+
- **Monetary Loss:** ₹[Amount]
|
| 84 |
+
- **Non-Monetary Loss:** [Description]
|
| 85 |
+
|
| 86 |
+
### Year-wise Breakdown
|
| 87 |
+
**2022:**
|
| 88 |
+
- Scheme Name: ₹[Amount] | [Benefit Description]
|
| 89 |
+
|
| 90 |
+
**2023:**
|
| 91 |
+
- Scheme Name: ₹[Amount] | [Benefit Description]
|
| 92 |
+
|
| 93 |
+
[Continue for all years]
|
| 94 |
+
|
| 95 |
+
### Retroactive Claims Possible
|
| 96 |
+
- List schemes that allow backdated applications
|
| 97 |
+
- Required documentation for backdated claims
|
| 98 |
+
|
| 99 |
+
### Immediate Action Items
|
| 100 |
+
1. [Highest priority scheme to apply now]
|
| 101 |
+
2. [Second priority scheme]
|
| 102 |
+
3. [Third priority scheme]
|
| 103 |
+
|
| 104 |
+
### Future Projections
|
| 105 |
+
If user applies now, estimated benefits over next 5 years: ₹[Amount]
|
| 106 |
+
|
| 107 |
+
---
|
| 108 |
+
|
| 109 |
+
**IMPORTANT NOTES:**
|
| 110 |
+
- Provide conservative estimates (lower bound)
|
| 111 |
+
- Mark assumptions clearly
|
| 112 |
+
- Only include verified government schemes
|
| 113 |
+
- Consider state-specific schemes based on user's state
|
| 114 |
+
- Factor in income bracket changes over time
|
| 115 |
+
|
| 116 |
+
Proceed with calculation:
|
| 117 |
+
"""
|
| 118 |
+
|
| 119 |
+
messages = [
|
| 120 |
+
SystemMessage(content="You are a financial analyst for government welfare schemes. Provide realistic, conservative estimates."),
|
| 121 |
+
HumanMessage(content=prompt)
|
| 122 |
+
]
|
| 123 |
+
|
| 124 |
+
response = llm.invoke(messages)
|
| 125 |
+
|
| 126 |
+
return {
|
| 127 |
+
"calculation": response.content,
|
| 128 |
+
"profile_considered": profile_data.get('age', 'N/A'),
|
| 129 |
+
"schemes_analyzed": "Available in recommendations"
|
| 130 |
+
}
|
| 131 |
+
|
| 132 |
+
except Exception as e:
|
| 133 |
+
return {
|
| 134 |
+
"error": str(e),
|
| 135 |
+
"calculation": "Unable to calculate missed benefits"
|
| 136 |
+
}
|
| 137 |
+
|
| 138 |
+
|
| 139 |
+
def estimate_future_benefits(profile_data: dict, scheme_recommendations: str, years: int = 5) -> dict:
|
| 140 |
+
"""
|
| 141 |
+
Estimates potential benefits over the next N years if user applies now
|
| 142 |
+
|
| 143 |
+
Args:
|
| 144 |
+
profile_data: User profile dictionary
|
| 145 |
+
scheme_recommendations: Recommended schemes text
|
| 146 |
+
years: Number of years to project (default: 5)
|
| 147 |
+
|
| 148 |
+
Returns:
|
| 149 |
+
Dictionary with future benefits projection
|
| 150 |
+
"""
|
| 151 |
+
try:
|
| 152 |
+
llm = get_llm()
|
| 153 |
+
|
| 154 |
+
profile_str = json.dumps(profile_data, indent=2)
|
| 155 |
+
|
| 156 |
+
prompt = f"""
|
| 157 |
+
Based on the user's current profile and eligible schemes, estimate the total benefits
|
| 158 |
+
they can receive over the next {years} years if they apply immediately.
|
| 159 |
+
|
| 160 |
+
**USER PROFILE:**
|
| 161 |
+
{profile_str}
|
| 162 |
+
|
| 163 |
+
**ELIGIBLE SCHEMES:**
|
| 164 |
+
{scheme_recommendations}
|
| 165 |
+
|
| 166 |
+
Provide:
|
| 167 |
+
1. Year-wise projected benefits
|
| 168 |
+
2. Total estimated benefits over {years} years
|
| 169 |
+
3. Required actions to maximize benefits
|
| 170 |
+
4. Key deadlines to watch
|
| 171 |
+
|
| 172 |
+
Return structured calculation with conservative estimates.
|
| 173 |
+
"""
|
| 174 |
+
|
| 175 |
+
messages = [
|
| 176 |
+
SystemMessage(content="You are a financial projection analyst for government schemes."),
|
| 177 |
+
HumanMessage(content=prompt)
|
| 178 |
+
]
|
| 179 |
+
|
| 180 |
+
response = llm.invoke(messages)
|
| 181 |
+
|
| 182 |
+
return {
|
| 183 |
+
"projection": response.content,
|
| 184 |
+
"years_projected": years,
|
| 185 |
+
"profile_age": profile_data.get('age', 'N/A')
|
| 186 |
+
}
|
| 187 |
+
|
| 188 |
+
except Exception as e:
|
| 189 |
+
return {
|
| 190 |
+
"error": str(e),
|
| 191 |
+
"projection": "Unable to estimate future benefits"
|
| 192 |
+
}
|
| 193 |
+
|
| 194 |
+
|
| 195 |
+
if __name__ == "__main__":
|
| 196 |
+
# Test the agent
|
| 197 |
+
test_profile = {
|
| 198 |
+
"age": 25,
|
| 199 |
+
"income": "300000",
|
| 200 |
+
"caste": "OBC",
|
| 201 |
+
"state": "Maharashtra",
|
| 202 |
+
"education": "Bachelor's in Engineering",
|
| 203 |
+
"employment_status": "Unemployed"
|
| 204 |
+
}
|
| 205 |
+
|
| 206 |
+
test_schemes = """
|
| 207 |
+
1. PM Kisan Samman Nidhi: ₹6000 per year
|
| 208 |
+
2. Post Matric Scholarship (OBC): ₹5000-10000 per year
|
| 209 |
+
3. Skill Development Scheme: Free training worth ₹20000
|
| 210 |
+
"""
|
| 211 |
+
|
| 212 |
+
result = calculate_missed_benefits(test_profile, test_schemes)
|
| 213 |
+
print(json.dumps(result, indent=2))
|
agents/document_agent.py
ADDED
|
@@ -0,0 +1,165 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Document Processing Agent
|
| 3 |
+
Handles PDF and image text extraction
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import os
|
| 7 |
+
import pytesseract
|
| 8 |
+
from PIL import Image
|
| 9 |
+
from pypdf import PdfReader
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def process_pdf(file_path: str) -> dict:
|
| 13 |
+
"""
|
| 14 |
+
Extracts text from PDF file
|
| 15 |
+
|
| 16 |
+
Args:
|
| 17 |
+
file_path: Path to PDF file
|
| 18 |
+
|
| 19 |
+
Returns:
|
| 20 |
+
Dictionary with extracted text and metadata
|
| 21 |
+
"""
|
| 22 |
+
try:
|
| 23 |
+
if not os.path.exists(file_path):
|
| 24 |
+
return {"error": f"File not found: {file_path}", "text": ""}
|
| 25 |
+
|
| 26 |
+
reader = PdfReader(file_path)
|
| 27 |
+
text = ""
|
| 28 |
+
|
| 29 |
+
for page_num, page in enumerate(reader.pages):
|
| 30 |
+
page_text = page.extract_text()
|
| 31 |
+
text += f"\n--- Page {page_num + 1} ---\n{page_text}"
|
| 32 |
+
|
| 33 |
+
return {
|
| 34 |
+
"file_path": file_path,
|
| 35 |
+
"pages": len(reader.pages),
|
| 36 |
+
"text": text,
|
| 37 |
+
"success": True
|
| 38 |
+
}
|
| 39 |
+
|
| 40 |
+
except Exception as e:
|
| 41 |
+
return {
|
| 42 |
+
"error": str(e),
|
| 43 |
+
"file_path": file_path,
|
| 44 |
+
"text": "",
|
| 45 |
+
"success": False
|
| 46 |
+
}
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
def process_image(file_path: str, language: str = 'eng+hin') -> dict:
|
| 50 |
+
"""
|
| 51 |
+
Extracts text from image using OCR
|
| 52 |
+
|
| 53 |
+
Args:
|
| 54 |
+
file_path: Path to image file
|
| 55 |
+
language: Tesseract language code (default: English + Hindi)
|
| 56 |
+
|
| 57 |
+
Returns:
|
| 58 |
+
Dictionary with extracted text and metadata
|
| 59 |
+
"""
|
| 60 |
+
try:
|
| 61 |
+
if not os.path.exists(file_path):
|
| 62 |
+
return {"error": f"File not found: {file_path}", "text": ""}
|
| 63 |
+
|
| 64 |
+
img = Image.open(file_path)
|
| 65 |
+
text = pytesseract.image_to_string(img, lang=language)
|
| 66 |
+
|
| 67 |
+
return {
|
| 68 |
+
"file_path": file_path,
|
| 69 |
+
"image_size": img.size,
|
| 70 |
+
"text": text,
|
| 71 |
+
"success": True
|
| 72 |
+
}
|
| 73 |
+
|
| 74 |
+
except Exception as e:
|
| 75 |
+
return {
|
| 76 |
+
"error": str(e),
|
| 77 |
+
"file_path": file_path,
|
| 78 |
+
"text": "",
|
| 79 |
+
"success": False
|
| 80 |
+
}
|
| 81 |
+
|
| 82 |
+
|
| 83 |
+
def process_resume(file_path: str) -> dict:
|
| 84 |
+
"""
|
| 85 |
+
Processes resume (PDF or image) and extracts relevant information
|
| 86 |
+
|
| 87 |
+
Args:
|
| 88 |
+
file_path: Path to resume file
|
| 89 |
+
|
| 90 |
+
Returns:
|
| 91 |
+
Extracted resume information
|
| 92 |
+
"""
|
| 93 |
+
file_ext = os.path.splitext(file_path)[1].lower()
|
| 94 |
+
|
| 95 |
+
if file_ext == '.pdf':
|
| 96 |
+
result = process_pdf(file_path)
|
| 97 |
+
elif file_ext in ['.jpg', '.jpeg', '.png', '.tiff', '.bmp']:
|
| 98 |
+
result = process_image(file_path)
|
| 99 |
+
else:
|
| 100 |
+
return {
|
| 101 |
+
"error": f"Unsupported file format: {file_ext}",
|
| 102 |
+
"text": "",
|
| 103 |
+
"success": False
|
| 104 |
+
}
|
| 105 |
+
|
| 106 |
+
if result.get("success"):
|
| 107 |
+
# Basic resume parsing (can be enhanced)
|
| 108 |
+
text = result["text"]
|
| 109 |
+
result["document_type"] = "resume"
|
| 110 |
+
result["contains_email"] = "@" in text
|
| 111 |
+
result["contains_phone"] = any(char.isdigit() for char in text)
|
| 112 |
+
|
| 113 |
+
return result
|
| 114 |
+
|
| 115 |
+
|
| 116 |
+
def batch_process_documents(folder_path: str, file_type: str = "pdf") -> list:
|
| 117 |
+
"""
|
| 118 |
+
Processes multiple documents in a folder
|
| 119 |
+
|
| 120 |
+
Args:
|
| 121 |
+
folder_path: Path to folder containing documents
|
| 122 |
+
file_type: Type of files to process ("pdf" or "image")
|
| 123 |
+
|
| 124 |
+
Returns:
|
| 125 |
+
List of processing results for each document
|
| 126 |
+
"""
|
| 127 |
+
results = []
|
| 128 |
+
|
| 129 |
+
if not os.path.exists(folder_path):
|
| 130 |
+
return [{"error": f"Folder not found: {folder_path}"}]
|
| 131 |
+
|
| 132 |
+
extensions = {
|
| 133 |
+
"pdf": [".pdf"],
|
| 134 |
+
"image": [".jpg", ".jpeg", ".png", ".tiff", ".bmp"]
|
| 135 |
+
}
|
| 136 |
+
|
| 137 |
+
valid_extensions = extensions.get(file_type, [".pdf"])
|
| 138 |
+
|
| 139 |
+
for filename in os.listdir(folder_path):
|
| 140 |
+
file_path = os.path.join(folder_path, filename)
|
| 141 |
+
file_ext = os.path.splitext(filename)[1].lower()
|
| 142 |
+
|
| 143 |
+
if file_ext in valid_extensions:
|
| 144 |
+
if file_type == "pdf":
|
| 145 |
+
result = process_pdf(file_path)
|
| 146 |
+
else:
|
| 147 |
+
result = process_image(file_path)
|
| 148 |
+
|
| 149 |
+
results.append(result)
|
| 150 |
+
|
| 151 |
+
return results
|
| 152 |
+
|
| 153 |
+
|
| 154 |
+
if __name__ == "__main__":
|
| 155 |
+
# Test the agent
|
| 156 |
+
# Note: You'll need to provide actual file paths to test
|
| 157 |
+
|
| 158 |
+
# Example usage
|
| 159 |
+
print("Document Processing Agent")
|
| 160 |
+
print("=" * 50)
|
| 161 |
+
print("Available functions:")
|
| 162 |
+
print("1. process_pdf(file_path)")
|
| 163 |
+
print("2. process_image(file_path)")
|
| 164 |
+
print("3. process_resume(file_path)")
|
| 165 |
+
print("4. batch_process_documents(folder_path, file_type)")
|
agents/exam_agent.py
ADDED
|
@@ -0,0 +1,138 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Exam Recommendation Agent
|
| 3 |
+
Provides competitive exam recommendations based on student profile
|
| 4 |
+
Uses FAISS for local vector storage
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import json
|
| 8 |
+
from langchain_groq import ChatGroq
|
| 9 |
+
from langchain_core.messages import HumanMessage, SystemMessage
|
| 10 |
+
from rag.exam_vectorstore import load_exam_vectorstore
|
| 11 |
+
from prompts.exam_prompt import EXAM_PROMPT
|
| 12 |
+
from tools.tavily_tool import government_focused_search
|
| 13 |
+
from config import GROQ_API_KEY
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
def get_llm():
|
| 17 |
+
"""Initialize Groq LLM"""
|
| 18 |
+
if not GROQ_API_KEY:
|
| 19 |
+
raise ValueError("GROQ_API_KEY not found in environment variables")
|
| 20 |
+
|
| 21 |
+
return ChatGroq(
|
| 22 |
+
api_key=GROQ_API_KEY,
|
| 23 |
+
model="llama-3.3-70b-versatile",
|
| 24 |
+
temperature=0.3
|
| 25 |
+
)
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
def run_exam_agent(profile_data: dict, use_web_search: bool = True, vectorstore=None) -> dict:
|
| 29 |
+
"""
|
| 30 |
+
Recommends competitive exams based on student profile
|
| 31 |
+
|
| 32 |
+
Args:
|
| 33 |
+
profile_data: Structured user profile
|
| 34 |
+
use_web_search: Whether to use Tavily for live search
|
| 35 |
+
vectorstore: Pre-loaded FAISS vectorstore (optional, avoids repeated loading)
|
| 36 |
+
|
| 37 |
+
Returns:
|
| 38 |
+
Exam recommendations dictionary
|
| 39 |
+
"""
|
| 40 |
+
try:
|
| 41 |
+
# Use provided vectorstore or try to load it
|
| 42 |
+
context = ""
|
| 43 |
+
sources_used = 0
|
| 44 |
+
|
| 45 |
+
if vectorstore is not None:
|
| 46 |
+
print("✅ Using pre-loaded vectorstore")
|
| 47 |
+
try:
|
| 48 |
+
# Create search query from profile
|
| 49 |
+
search_query = f"""
|
| 50 |
+
Student Profile:
|
| 51 |
+
Education: {profile_data.get('education', 'N/A')}
|
| 52 |
+
Age: {profile_data.get('age', 'N/A')}
|
| 53 |
+
Interests: {profile_data.get('interests', 'N/A')}
|
| 54 |
+
Skills: {profile_data.get('skills', 'N/A')}
|
| 55 |
+
Occupation: {profile_data.get('occupation', 'N/A')}
|
| 56 |
+
"""
|
| 57 |
+
|
| 58 |
+
# RAG retrieval
|
| 59 |
+
docs = vectorstore.similarity_search(search_query, k=5)
|
| 60 |
+
context = "\n\n".join([f"Document {i+1}:\n{d.page_content}" for i, d in enumerate(docs)])
|
| 61 |
+
sources_used = len(docs)
|
| 62 |
+
print(f"✓ Retrieved {sources_used} exam documents from vectorstore")
|
| 63 |
+
except Exception as e:
|
| 64 |
+
print(f"⚠️ Error querying vectorstore: {str(e)}")
|
| 65 |
+
context = "Vectorstore query failed. Using live web search."
|
| 66 |
+
else:
|
| 67 |
+
print("ℹ️ No vectorstore provided, using web search only")
|
| 68 |
+
context = "No local exam database available. Using live web search."
|
| 69 |
+
|
| 70 |
+
# Create profile string
|
| 71 |
+
profile_str = json.dumps(profile_data, indent=2)
|
| 72 |
+
|
| 73 |
+
# Web search (fallback or enhancement)
|
| 74 |
+
web_context = ""
|
| 75 |
+
if use_web_search:
|
| 76 |
+
try:
|
| 77 |
+
education = profile_data.get('education', 'graduate')
|
| 78 |
+
interests = profile_data.get('interests', 'government jobs')
|
| 79 |
+
web_query = f"competitive exams India {education} {interests} eligibility 2026"
|
| 80 |
+
print(f"🔍 Searching web: {web_query}")
|
| 81 |
+
web_results = government_focused_search(web_query)
|
| 82 |
+
web_context = f"\n\nLive Web Search Results:\n{web_results}"
|
| 83 |
+
print("✓ Web search completed")
|
| 84 |
+
except Exception as e:
|
| 85 |
+
web_context = f"\n\nWeb search unavailable: {str(e)}"
|
| 86 |
+
print(f"⚠ Web search failed: {str(e)}")
|
| 87 |
+
|
| 88 |
+
# Combine contexts
|
| 89 |
+
full_context = context + web_context
|
| 90 |
+
|
| 91 |
+
# If no context at all, return helpful message
|
| 92 |
+
if not full_context.strip():
|
| 93 |
+
return {
|
| 94 |
+
"recommendations": "Unable to retrieve exam information. Please ensure Tavily API key is configured or vectorstore is built.",
|
| 95 |
+
"sources_used": 0,
|
| 96 |
+
"web_search_used": use_web_search
|
| 97 |
+
}
|
| 98 |
+
|
| 99 |
+
# Generate recommendations
|
| 100 |
+
llm = get_llm()
|
| 101 |
+
|
| 102 |
+
prompt = EXAM_PROMPT.format(
|
| 103 |
+
context=full_context,
|
| 104 |
+
profile=profile_str
|
| 105 |
+
)
|
| 106 |
+
|
| 107 |
+
messages = [
|
| 108 |
+
SystemMessage(content="You are an expert competitive exam advisor. Provide accurate, verified information only."),
|
| 109 |
+
HumanMessage(content=prompt)
|
| 110 |
+
]
|
| 111 |
+
|
| 112 |
+
response = llm.invoke(messages)
|
| 113 |
+
|
| 114 |
+
return {
|
| 115 |
+
"recommendations": response.content,
|
| 116 |
+
"sources_used": sources_used,
|
| 117 |
+
"web_search_used": use_web_search
|
| 118 |
+
}
|
| 119 |
+
|
| 120 |
+
except Exception as e:
|
| 121 |
+
return {
|
| 122 |
+
"error": str(e),
|
| 123 |
+
"recommendations": []
|
| 124 |
+
}
|
| 125 |
+
|
| 126 |
+
|
| 127 |
+
if __name__ == "__main__":
|
| 128 |
+
# Test the agent
|
| 129 |
+
test_profile = {
|
| 130 |
+
"education": "Bachelor's in Engineering",
|
| 131 |
+
"age": 25,
|
| 132 |
+
"interests": "Technical jobs, government sector",
|
| 133 |
+
"skills": "Programming, problem solving",
|
| 134 |
+
"occupation": "Student"
|
| 135 |
+
}
|
| 136 |
+
|
| 137 |
+
result = run_exam_agent(test_profile, use_web_search=False)
|
| 138 |
+
print(json.dumps(result, indent=2))
|
agents/profiling_agent.py
ADDED
|
@@ -0,0 +1,149 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
User Profiling Agent
|
| 3 |
+
Extracts structured user information for eligibility matching
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import json
|
| 7 |
+
from langchain_groq import ChatGroq
|
| 8 |
+
from langchain_core.messages import HumanMessage, SystemMessage
|
| 9 |
+
from prompts.profiling_prompt import PROFILING_PROMPT
|
| 10 |
+
from config import GROQ_API_KEY
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
def get_llm():
|
| 14 |
+
"""Initialize Groq LLM"""
|
| 15 |
+
if not GROQ_API_KEY:
|
| 16 |
+
raise ValueError("GROQ_API_KEY not found in environment variables")
|
| 17 |
+
|
| 18 |
+
return ChatGroq(
|
| 19 |
+
api_key=GROQ_API_KEY,
|
| 20 |
+
model="llama-3.3-70b-versatile",
|
| 21 |
+
temperature=0.1 # Low temperature for structured extraction
|
| 22 |
+
)
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
def extract_json_from_text(text: str) -> dict:
|
| 26 |
+
"""Extract JSON from text that might contain markdown or extra content"""
|
| 27 |
+
import re
|
| 28 |
+
|
| 29 |
+
# Try direct JSON parse first
|
| 30 |
+
try:
|
| 31 |
+
return json.loads(text.strip())
|
| 32 |
+
except json.JSONDecodeError:
|
| 33 |
+
pass
|
| 34 |
+
|
| 35 |
+
# Try to extract JSON from markdown code blocks
|
| 36 |
+
json_pattern = r'```(?:json)?\s*(\{.*?\})\s*```'
|
| 37 |
+
matches = re.findall(json_pattern, text, re.DOTALL)
|
| 38 |
+
if matches:
|
| 39 |
+
try:
|
| 40 |
+
return json.loads(matches[0])
|
| 41 |
+
except json.JSONDecodeError:
|
| 42 |
+
pass
|
| 43 |
+
|
| 44 |
+
# Try to find complete JSON object (improved pattern)
|
| 45 |
+
# Match from first { to last }
|
| 46 |
+
start_idx = text.find('{')
|
| 47 |
+
end_idx = text.rfind('}')
|
| 48 |
+
|
| 49 |
+
if start_idx != -1 and end_idx != -1 and end_idx > start_idx:
|
| 50 |
+
try:
|
| 51 |
+
potential_json = text[start_idx:end_idx+1]
|
| 52 |
+
return json.loads(potential_json)
|
| 53 |
+
except json.JSONDecodeError:
|
| 54 |
+
pass
|
| 55 |
+
|
| 56 |
+
# Fallback: try to find any JSON-like structure
|
| 57 |
+
json_pattern = r'\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}'
|
| 58 |
+
matches = re.findall(json_pattern, text, re.DOTALL)
|
| 59 |
+
for match in matches:
|
| 60 |
+
try:
|
| 61 |
+
return json.loads(match)
|
| 62 |
+
except json.JSONDecodeError:
|
| 63 |
+
continue
|
| 64 |
+
|
| 65 |
+
return None
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
def run_profiling_agent(user_input: str) -> dict:
|
| 69 |
+
"""
|
| 70 |
+
Extracts structured profile information from user input
|
| 71 |
+
|
| 72 |
+
Args:
|
| 73 |
+
user_input: Raw user input text
|
| 74 |
+
|
| 75 |
+
Returns:
|
| 76 |
+
Structured profile dictionary
|
| 77 |
+
"""
|
| 78 |
+
try:
|
| 79 |
+
llm = get_llm()
|
| 80 |
+
|
| 81 |
+
prompt = PROFILING_PROMPT.format(user_input=user_input)
|
| 82 |
+
|
| 83 |
+
messages = [
|
| 84 |
+
SystemMessage(content="You are an expert user profiling agent. Return ONLY a valid JSON object, nothing else."),
|
| 85 |
+
HumanMessage(content=prompt)
|
| 86 |
+
]
|
| 87 |
+
|
| 88 |
+
response = llm.invoke(messages)
|
| 89 |
+
|
| 90 |
+
print(f"\n🤖 LLM Response (first 200 chars): {response.content[:200]}...")
|
| 91 |
+
|
| 92 |
+
# Extract JSON from response
|
| 93 |
+
profile_data = extract_json_from_text(response.content)
|
| 94 |
+
|
| 95 |
+
if profile_data:
|
| 96 |
+
# Normalize keys to lowercase with underscores
|
| 97 |
+
normalized_profile = {}
|
| 98 |
+
for key, value in profile_data.items():
|
| 99 |
+
normalized_key = key.lower().replace(' ', '_').replace('-', '_')
|
| 100 |
+
normalized_profile[normalized_key] = value
|
| 101 |
+
|
| 102 |
+
print(f"✅ Profile extracted: {list(normalized_profile.keys())}")
|
| 103 |
+
return normalized_profile
|
| 104 |
+
else:
|
| 105 |
+
# Fallback: Create basic profile from user input
|
| 106 |
+
print("⚠️ Could not parse JSON, creating basic profile")
|
| 107 |
+
return {
|
| 108 |
+
"user_input": user_input,
|
| 109 |
+
"raw_profile": response.content,
|
| 110 |
+
"note": "Profile extraction incomplete. Using raw input."
|
| 111 |
+
}
|
| 112 |
+
|
| 113 |
+
except Exception as e:
|
| 114 |
+
print(f"❌ Profiling error: {str(e)}")
|
| 115 |
+
return {
|
| 116 |
+
"error": str(e),
|
| 117 |
+
"user_input": user_input
|
| 118 |
+
}
|
| 119 |
+
|
| 120 |
+
|
| 121 |
+
def validate_profile(profile_data: dict) -> bool:
|
| 122 |
+
"""
|
| 123 |
+
Validates that profile has minimum required information
|
| 124 |
+
|
| 125 |
+
Args:
|
| 126 |
+
profile_data: Profile dictionary
|
| 127 |
+
|
| 128 |
+
Returns:
|
| 129 |
+
True if valid, False otherwise
|
| 130 |
+
"""
|
| 131 |
+
required_fields = ['age', 'state', 'education']
|
| 132 |
+
|
| 133 |
+
for field in required_fields:
|
| 134 |
+
if field not in profile_data or profile_data[field] == "Not Provided":
|
| 135 |
+
return False
|
| 136 |
+
|
| 137 |
+
return True
|
| 138 |
+
|
| 139 |
+
|
| 140 |
+
if __name__ == "__main__":
|
| 141 |
+
# Test the agent
|
| 142 |
+
test_input = """
|
| 143 |
+
I am a 25-year-old male from Maharashtra. I completed my Bachelor's in Engineering.
|
| 144 |
+
My family income is around 3 lakh per year. I belong to the OBC category.
|
| 145 |
+
I am currently unemployed and looking for government job opportunities.
|
| 146 |
+
"""
|
| 147 |
+
|
| 148 |
+
result = run_profiling_agent(test_input)
|
| 149 |
+
print(json.dumps(result, indent=2))
|
agents/rag_agent.py
ADDED
|
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
RAG Retrieval Agent
|
| 3 |
+
Dedicated agent for vector database queries
|
| 4 |
+
Uses FAISS for local vector storage
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import json
|
| 8 |
+
from rag.scheme_vectorstore import load_scheme_vectorstore
|
| 9 |
+
from rag.exam_vectorstore import load_exam_vectorstore
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def run_rag_agent(query: str, database: str = "schemes", k: int = 5) -> dict:
|
| 13 |
+
"""
|
| 14 |
+
Performs RAG retrieval from specified vectorstore
|
| 15 |
+
|
| 16 |
+
Args:
|
| 17 |
+
query: Search query
|
| 18 |
+
database: "schemes" or "exams"
|
| 19 |
+
k: Number of documents to retrieve
|
| 20 |
+
|
| 21 |
+
Returns:
|
| 22 |
+
Retrieved documents dictionary
|
| 23 |
+
"""
|
| 24 |
+
try:
|
| 25 |
+
if database == "schemes":
|
| 26 |
+
vectorstore = load_scheme_vectorstore()
|
| 27 |
+
elif database == "exams":
|
| 28 |
+
vectorstore = load_exam_vectorstore()
|
| 29 |
+
else:
|
| 30 |
+
return {
|
| 31 |
+
"error": f"Invalid database: {database}. Use 'schemes' or 'exams'",
|
| 32 |
+
"documents": []
|
| 33 |
+
}
|
| 34 |
+
|
| 35 |
+
# Similarity search
|
| 36 |
+
docs = vectorstore.similarity_search(query, k=k)
|
| 37 |
+
|
| 38 |
+
# Format results
|
| 39 |
+
formatted_docs = []
|
| 40 |
+
for i, doc in enumerate(docs):
|
| 41 |
+
formatted_docs.append({
|
| 42 |
+
"id": i + 1,
|
| 43 |
+
"content": doc.page_content,
|
| 44 |
+
"metadata": doc.metadata,
|
| 45 |
+
"source": doc.metadata.get('source', 'Unknown')
|
| 46 |
+
})
|
| 47 |
+
|
| 48 |
+
return {
|
| 49 |
+
"query": query,
|
| 50 |
+
"database": database,
|
| 51 |
+
"documents_found": len(formatted_docs),
|
| 52 |
+
"documents": formatted_docs
|
| 53 |
+
}
|
| 54 |
+
|
| 55 |
+
except FileNotFoundError as e:
|
| 56 |
+
return {
|
| 57 |
+
"error": f"Vectorstore not found for {database}. Please build it first.",
|
| 58 |
+
"documents": []
|
| 59 |
+
}
|
| 60 |
+
except Exception as e:
|
| 61 |
+
return {
|
| 62 |
+
"error": str(e),
|
| 63 |
+
"documents": []
|
| 64 |
+
}
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
def hybrid_rag_search(query: str, k: int = 3) -> dict:
|
| 68 |
+
"""
|
| 69 |
+
Searches both scheme and exam databases
|
| 70 |
+
|
| 71 |
+
Args:
|
| 72 |
+
query: Search query
|
| 73 |
+
k: Number of documents per database
|
| 74 |
+
|
| 75 |
+
Returns:
|
| 76 |
+
Combined results from both databases
|
| 77 |
+
"""
|
| 78 |
+
scheme_results = run_rag_agent(query, database="schemes", k=k)
|
| 79 |
+
exam_results = run_rag_agent(query, database="exams", k=k)
|
| 80 |
+
|
| 81 |
+
return {
|
| 82 |
+
"query": query,
|
| 83 |
+
"scheme_results": scheme_results,
|
| 84 |
+
"exam_results": exam_results
|
| 85 |
+
}
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
if __name__ == "__main__":
|
| 89 |
+
# Test the agent
|
| 90 |
+
result = run_rag_agent("agricultural schemes for farmers", database="schemes", k=3)
|
| 91 |
+
print(json.dumps(result, indent=2))
|
agents/scheme_agent.py
ADDED
|
@@ -0,0 +1,142 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Scheme Recommendation Agent
|
| 3 |
+
Provides RAG-based government scheme recommendations
|
| 4 |
+
Uses FAISS for local vector storage
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import json
|
| 8 |
+
from langchain_groq import ChatGroq
|
| 9 |
+
from langchain_core.messages import HumanMessage, SystemMessage
|
| 10 |
+
from rag.scheme_vectorstore import load_scheme_vectorstore
|
| 11 |
+
from prompts.scheme_prompt import SCHEME_PROMPT
|
| 12 |
+
from tools.tavily_tool import government_focused_search
|
| 13 |
+
from config import GROQ_API_KEY
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
def get_llm():
|
| 17 |
+
"""Initialize Groq LLM"""
|
| 18 |
+
if not GROQ_API_KEY:
|
| 19 |
+
raise ValueError("GROQ_API_KEY not found in environment variables")
|
| 20 |
+
|
| 21 |
+
return ChatGroq(
|
| 22 |
+
api_key=GROQ_API_KEY,
|
| 23 |
+
model="llama-3.3-70b-versatile",
|
| 24 |
+
temperature=0.3
|
| 25 |
+
)
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
def run_scheme_agent(profile_data: dict, use_web_search: bool = True, vectorstore=None) -> dict:
|
| 29 |
+
"""
|
| 30 |
+
Recommends government schemes based on user profile
|
| 31 |
+
|
| 32 |
+
Args:
|
| 33 |
+
profile_data: Structured user profile
|
| 34 |
+
use_web_search: Whether to use Tavily for live search
|
| 35 |
+
vectorstore: Pre-loaded FAISS vectorstore (optional, avoids repeated loading)
|
| 36 |
+
|
| 37 |
+
Returns:
|
| 38 |
+
Scheme recommendations dictionary
|
| 39 |
+
"""
|
| 40 |
+
try:
|
| 41 |
+
# Use provided vectorstore or try to load it
|
| 42 |
+
context = ""
|
| 43 |
+
sources_used = 0
|
| 44 |
+
|
| 45 |
+
if vectorstore is not None:
|
| 46 |
+
print("✅ Using pre-loaded vectorstore")
|
| 47 |
+
try:
|
| 48 |
+
# Create search query from profile
|
| 49 |
+
search_query = f"""
|
| 50 |
+
User Profile:
|
| 51 |
+
Income: {profile_data.get('income', 'N/A')}
|
| 52 |
+
Caste: {profile_data.get('caste', 'N/A')}
|
| 53 |
+
State: {profile_data.get('state', 'N/A')}
|
| 54 |
+
Age: {profile_data.get('age', 'N/A')}
|
| 55 |
+
Gender: {profile_data.get('gender', 'N/A')}
|
| 56 |
+
Employment: {profile_data.get('employment_status', 'N/A')}
|
| 57 |
+
"""
|
| 58 |
+
|
| 59 |
+
# RAG retrieval
|
| 60 |
+
docs = vectorstore.similarity_search(search_query, k=5)
|
| 61 |
+
context = "\n\n".join([f"Document {i+1}:\n{d.page_content}" for i, d in enumerate(docs)])
|
| 62 |
+
sources_used = len(docs)
|
| 63 |
+
print(f"✓ Retrieved {sources_used} scheme documents from vectorstore")
|
| 64 |
+
except Exception as e:
|
| 65 |
+
print(f"⚠️ Error querying vectorstore: {str(e)}")
|
| 66 |
+
context = "Vectorstore query failed. Using live web search."
|
| 67 |
+
else:
|
| 68 |
+
print("ℹ️ No vectorstore provided, using web search only")
|
| 69 |
+
context = "No local scheme database available. Using live web search."
|
| 70 |
+
|
| 71 |
+
# Create profile string
|
| 72 |
+
profile_str = json.dumps(profile_data, indent=2)
|
| 73 |
+
|
| 74 |
+
# Web search (fallback or enhancement)
|
| 75 |
+
web_context = ""
|
| 76 |
+
if use_web_search:
|
| 77 |
+
try:
|
| 78 |
+
state = profile_data.get('state', 'India')
|
| 79 |
+
caste = profile_data.get('caste', '')
|
| 80 |
+
income = profile_data.get('income', '')
|
| 81 |
+
web_query = f"government schemes India {state} {caste} eligibility benefits 2026"
|
| 82 |
+
print(f"🔍 Searching web: {web_query}")
|
| 83 |
+
web_results = government_focused_search(web_query)
|
| 84 |
+
web_context = f"\n\nLive Web Search Results:\n{web_results}"
|
| 85 |
+
print("✓ Web search completed")
|
| 86 |
+
except Exception as e:
|
| 87 |
+
web_context = f"\n\nWeb search unavailable: {str(e)}"
|
| 88 |
+
print(f"⚠ Web search failed: {str(e)}")
|
| 89 |
+
|
| 90 |
+
# Combine contexts
|
| 91 |
+
full_context = context + web_context
|
| 92 |
+
|
| 93 |
+
# If no context at all, return helpful message
|
| 94 |
+
if not full_context.strip():
|
| 95 |
+
return {
|
| 96 |
+
"recommendations": "Unable to retrieve scheme information. Please ensure Tavily API key is configured or vectorstore is built.",
|
| 97 |
+
"sources_used": 0,
|
| 98 |
+
"web_search_used": use_web_search
|
| 99 |
+
}
|
| 100 |
+
|
| 101 |
+
# Generate recommendations
|
| 102 |
+
llm = get_llm()
|
| 103 |
+
|
| 104 |
+
prompt = SCHEME_PROMPT.format(
|
| 105 |
+
context=full_context,
|
| 106 |
+
profile=profile_str
|
| 107 |
+
)
|
| 108 |
+
|
| 109 |
+
messages = [
|
| 110 |
+
SystemMessage(content="You are an expert government scheme advisor. Provide accurate, verified information only."),
|
| 111 |
+
HumanMessage(content=prompt)
|
| 112 |
+
]
|
| 113 |
+
|
| 114 |
+
response = llm.invoke(messages)
|
| 115 |
+
|
| 116 |
+
return {
|
| 117 |
+
"recommendations": response.content,
|
| 118 |
+
"sources_used": sources_used,
|
| 119 |
+
"web_search_used": use_web_search
|
| 120 |
+
}
|
| 121 |
+
|
| 122 |
+
except Exception as e:
|
| 123 |
+
return {
|
| 124 |
+
"error": str(e),
|
| 125 |
+
"recommendations": []
|
| 126 |
+
}
|
| 127 |
+
|
| 128 |
+
|
| 129 |
+
if __name__ == "__main__":
|
| 130 |
+
# Test the agent
|
| 131 |
+
test_profile = {
|
| 132 |
+
"income": "300000",
|
| 133 |
+
"caste": "OBC",
|
| 134 |
+
"state": "Maharashtra",
|
| 135 |
+
"age": 25,
|
| 136 |
+
"gender": "Male",
|
| 137 |
+
"employment_status": "Unemployed",
|
| 138 |
+
"education": "Bachelor's in Engineering"
|
| 139 |
+
}
|
| 140 |
+
|
| 141 |
+
result = run_scheme_agent(test_profile, use_web_search=False)
|
| 142 |
+
print(json.dumps(result, indent=2))
|
agents/search_agent.py
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Web Search Agent
|
| 3 |
+
Uses Tavily to search government websites for real-time information
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
from tools.tavily_tool import tavily_search, government_focused_search
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
def run_search_agent(query: str, government_only: bool = True) -> dict:
|
| 10 |
+
"""
|
| 11 |
+
Performs web search for government information
|
| 12 |
+
|
| 13 |
+
Args:
|
| 14 |
+
query: Search query
|
| 15 |
+
government_only: If True, restricts to .gov.in domains
|
| 16 |
+
|
| 17 |
+
Returns:
|
| 18 |
+
Search results dictionary
|
| 19 |
+
"""
|
| 20 |
+
try:
|
| 21 |
+
if government_only:
|
| 22 |
+
results = government_focused_search(query)
|
| 23 |
+
else:
|
| 24 |
+
results = tavily_search(query)
|
| 25 |
+
|
| 26 |
+
return {
|
| 27 |
+
"query": query,
|
| 28 |
+
"results": results,
|
| 29 |
+
"government_only": government_only
|
| 30 |
+
}
|
| 31 |
+
|
| 32 |
+
except Exception as e:
|
| 33 |
+
return {
|
| 34 |
+
"query": query,
|
| 35 |
+
"error": str(e),
|
| 36 |
+
"results": []
|
| 37 |
+
}
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
def search_scheme_details(scheme_name: str) -> dict:
|
| 41 |
+
"""
|
| 42 |
+
Search for specific scheme details
|
| 43 |
+
|
| 44 |
+
Args:
|
| 45 |
+
scheme_name: Name of the government scheme
|
| 46 |
+
|
| 47 |
+
Returns:
|
| 48 |
+
Scheme details from official sources
|
| 49 |
+
"""
|
| 50 |
+
query = f"{scheme_name} official website application process eligibility"
|
| 51 |
+
return run_search_agent(query, government_only=True)
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
def search_exam_details(exam_name: str) -> dict:
|
| 55 |
+
"""
|
| 56 |
+
Search for specific exam details
|
| 57 |
+
|
| 58 |
+
Args:
|
| 59 |
+
exam_name: Name of the competitive exam
|
| 60 |
+
|
| 61 |
+
Returns:
|
| 62 |
+
Exam details from official sources
|
| 63 |
+
"""
|
| 64 |
+
query = f"{exam_name} official notification eligibility exam pattern 2026"
|
| 65 |
+
return run_search_agent(query, government_only=True)
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
if __name__ == "__main__":
|
| 69 |
+
# Test the agent
|
| 70 |
+
result = run_search_agent("pradhan mantri kisan samman nidhi yojana", government_only=True)
|
| 71 |
+
print(result)
|
app.py
ADDED
|
@@ -0,0 +1,599 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
JanSahayak Flask Web Application
|
| 3 |
+
Beautiful UI for Multi-Agent Government Intelligence System
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
from flask import Flask, render_template, request, jsonify, session, send_file
|
| 7 |
+
import json
|
| 8 |
+
import os
|
| 9 |
+
from datetime import datetime
|
| 10 |
+
from graph.workflow import run_workflow
|
| 11 |
+
import uuid
|
| 12 |
+
import io
|
| 13 |
+
import re
|
| 14 |
+
from reportlab.lib.pagesizes import letter, A4
|
| 15 |
+
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
|
| 16 |
+
from reportlab.lib.units import inch
|
| 17 |
+
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak
|
| 18 |
+
from reportlab.lib import colors
|
| 19 |
+
from reportlab.lib.enums import TA_CENTER, TA_LEFT, TA_JUSTIFY
|
| 20 |
+
|
| 21 |
+
app = Flask(__name__)
|
| 22 |
+
app.secret_key = os.urandom(24) # For session management
|
| 23 |
+
|
| 24 |
+
# Store active sessions
|
| 25 |
+
sessions = {}
|
| 26 |
+
|
| 27 |
+
# Global vectorstores (loaded on first use for faster startup)
|
| 28 |
+
SCHEME_VECTORSTORE = None
|
| 29 |
+
EXAM_VECTORSTORE = None
|
| 30 |
+
VECTORSTORES_INITIALIZED = False
|
| 31 |
+
|
| 32 |
+
# Check if running on a memory-constrained platform
|
| 33 |
+
SKIP_VECTORSTORES = os.environ.get('SKIP_VECTORSTORES', 'false').lower() == 'true'
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
def initialize_vectorstores():
|
| 37 |
+
"""Load vectorstores lazily on first use to avoid blocking port binding"""
|
| 38 |
+
global SCHEME_VECTORSTORE, EXAM_VECTORSTORE, VECTORSTORES_INITIALIZED
|
| 39 |
+
|
| 40 |
+
if VECTORSTORES_INITIALIZED:
|
| 41 |
+
return # Already initialized
|
| 42 |
+
|
| 43 |
+
# Skip vectorstore loading on memory-constrained platforms (use web search only)
|
| 44 |
+
if SKIP_VECTORSTORES:
|
| 45 |
+
print("\n" + "="*70)
|
| 46 |
+
print("⚡ LIGHTWEIGHT MODE: Skipping vectorstore loading")
|
| 47 |
+
print("="*70)
|
| 48 |
+
print("✅ Using Tavily web search only (no embeddings model)")
|
| 49 |
+
print("✅ Low memory usage (<200MB)")
|
| 50 |
+
print("✅ Real-time, up-to-date information")
|
| 51 |
+
print("="*70 + "\n")
|
| 52 |
+
SCHEME_VECTORSTORE = None
|
| 53 |
+
EXAM_VECTORSTORE = None
|
| 54 |
+
VECTORSTORES_INITIALIZED = True
|
| 55 |
+
return
|
| 56 |
+
|
| 57 |
+
print("\n" + "="*70)
|
| 58 |
+
print("📚 Initializing Vector Stores (lazy loading)")
|
| 59 |
+
print("="*70)
|
| 60 |
+
|
| 61 |
+
# Load scheme vectorstore
|
| 62 |
+
try:
|
| 63 |
+
from rag.scheme_vectorstore import load_scheme_vectorstore
|
| 64 |
+
SCHEME_VECTORSTORE = load_scheme_vectorstore()
|
| 65 |
+
print("✅ Scheme vectorstore loaded successfully")
|
| 66 |
+
except Exception as e:
|
| 67 |
+
print(f"⚠️ Scheme vectorstore not available: {str(e)}")
|
| 68 |
+
print(" Will use web search only for schemes")
|
| 69 |
+
SCHEME_VECTORSTORE = None
|
| 70 |
+
|
| 71 |
+
# Load exam vectorstore
|
| 72 |
+
try:
|
| 73 |
+
from rag.exam_vectorstore import load_exam_vectorstore
|
| 74 |
+
EXAM_VECTORSTORE = load_exam_vectorstore()
|
| 75 |
+
print("✅ Exam vectorstore loaded successfully")
|
| 76 |
+
except Exception as e:
|
| 77 |
+
print(f"⚠️ Exam vectorstore not available: {str(e)}")
|
| 78 |
+
print(" Will use web search only for exams")
|
| 79 |
+
EXAM_VECTORSTORE = None
|
| 80 |
+
|
| 81 |
+
VECTORSTORES_INITIALIZED = True
|
| 82 |
+
print("="*70 + "\n")
|
| 83 |
+
|
| 84 |
+
|
| 85 |
+
def format_markdown(text):
|
| 86 |
+
"""Convert markdown-style text to HTML"""
|
| 87 |
+
if not text or not isinstance(text, str):
|
| 88 |
+
return text
|
| 89 |
+
|
| 90 |
+
import re
|
| 91 |
+
|
| 92 |
+
# Convert headers (### heading)
|
| 93 |
+
text = re.sub(r'###\s+(.+?)(?=\n|$)', r'<h4>\1</h4>', text)
|
| 94 |
+
text = re.sub(r'##\s+(.+?)(?=\n|$)', r'<h3>\1</h3>', text)
|
| 95 |
+
|
| 96 |
+
# Convert bold (**text**)
|
| 97 |
+
text = re.sub(r'\*\*(.+?)\*\*', r'<strong>\1</strong>', text)
|
| 98 |
+
|
| 99 |
+
# Convert italic (*text*)
|
| 100 |
+
text = re.sub(r'\*(.+?)\*', r'<em>\1</em>', text)
|
| 101 |
+
|
| 102 |
+
# Convert bullet points (- item or * item)
|
| 103 |
+
text = re.sub(r'^[\-\*]\s+(.+)$', r'<li>\1</li>', text, flags=re.MULTILINE)
|
| 104 |
+
text = re.sub(r'(<li>.*?</li>)', r'<ul>\1</ul>', text, flags=re.DOTALL)
|
| 105 |
+
text = text.replace('</ul>\n<ul>', '\n') # Merge consecutive lists
|
| 106 |
+
|
| 107 |
+
# Convert line breaks
|
| 108 |
+
text = text.replace('\n\n', '</p><p>')
|
| 109 |
+
text = text.replace('\n', '<br>')
|
| 110 |
+
|
| 111 |
+
# Wrap in paragraph if not starting with a tag
|
| 112 |
+
if not text.startswith('<'):
|
| 113 |
+
text = f'<p>{text}</p>'
|
| 114 |
+
|
| 115 |
+
return text
|
| 116 |
+
|
| 117 |
+
|
| 118 |
+
# Register Jinja filter
|
| 119 |
+
app.jinja_env.filters['format_markdown'] = format_markdown
|
| 120 |
+
|
| 121 |
+
|
| 122 |
+
@app.route('/')
|
| 123 |
+
def index():
|
| 124 |
+
"""Landing page with input form"""
|
| 125 |
+
return render_template('index.html')
|
| 126 |
+
|
| 127 |
+
|
| 128 |
+
@app.route('/about')
|
| 129 |
+
def about():
|
| 130 |
+
"""About page"""
|
| 131 |
+
return render_template('about.html')
|
| 132 |
+
|
| 133 |
+
|
| 134 |
+
@app.route('/health')
|
| 135 |
+
def health():
|
| 136 |
+
"""Health check endpoint for monitoring"""
|
| 137 |
+
from config import GROQ_API_KEY, TAVILY_API_KEY, HF_TOKEN
|
| 138 |
+
|
| 139 |
+
return jsonify({
|
| 140 |
+
'status': 'ok',
|
| 141 |
+
'service': 'JanSahayak',
|
| 142 |
+
'api_keys_configured': {
|
| 143 |
+
'groq': bool(GROQ_API_KEY),
|
| 144 |
+
'tavily': bool(TAVILY_API_KEY),
|
| 145 |
+
'hf_token': bool(HF_TOKEN)
|
| 146 |
+
}
|
| 147 |
+
})
|
| 148 |
+
|
| 149 |
+
|
| 150 |
+
@app.route('/analyze', methods=['POST'])
|
| 151 |
+
def analyze():
|
| 152 |
+
"""Process user input and run workflow"""
|
| 153 |
+
try:
|
| 154 |
+
# First check if API keys are configured
|
| 155 |
+
from config import GROQ_API_KEY, TAVILY_API_KEY
|
| 156 |
+
|
| 157 |
+
if not GROQ_API_KEY or GROQ_API_KEY == "":
|
| 158 |
+
return jsonify({
|
| 159 |
+
'success': False,
|
| 160 |
+
'error': 'GROQ_API_KEY is not configured. Please set environment variables on Render.'
|
| 161 |
+
}), 500
|
| 162 |
+
|
| 163 |
+
if not TAVILY_API_KEY or TAVILY_API_KEY == "":
|
| 164 |
+
return jsonify({
|
| 165 |
+
'success': False,
|
| 166 |
+
'error': 'TAVILY_API_KEY is not configured. Please set environment variables on Render.'
|
| 167 |
+
}), 500
|
| 168 |
+
|
| 169 |
+
# Initialize vectorstores lazily on first request
|
| 170 |
+
initialize_vectorstores()
|
| 171 |
+
|
| 172 |
+
# Get user input
|
| 173 |
+
user_input = request.json.get('user_input', '')
|
| 174 |
+
structured_data = request.json.get('structured_data', None)
|
| 175 |
+
|
| 176 |
+
if not user_input.strip():
|
| 177 |
+
return jsonify({
|
| 178 |
+
'success': False,
|
| 179 |
+
'error': 'Please provide your details'
|
| 180 |
+
}), 400
|
| 181 |
+
|
| 182 |
+
# Generate session ID
|
| 183 |
+
session_id = str(uuid.uuid4())
|
| 184 |
+
|
| 185 |
+
# Store in session (including structured data if available)
|
| 186 |
+
sessions[session_id] = {
|
| 187 |
+
'status': 'processing',
|
| 188 |
+
'input': user_input,
|
| 189 |
+
'structured_data': structured_data,
|
| 190 |
+
'started_at': datetime.now().isoformat()
|
| 191 |
+
}
|
| 192 |
+
|
| 193 |
+
# Extract user interests from structured data
|
| 194 |
+
user_interests = structured_data.get('interests', ['schemes', 'exams']) if structured_data else ['schemes', 'exams']
|
| 195 |
+
|
| 196 |
+
# Prepare structured profile if available
|
| 197 |
+
structured_profile = None
|
| 198 |
+
if structured_data:
|
| 199 |
+
structured_profile = {
|
| 200 |
+
'name': structured_data.get('name', 'Not Provided'),
|
| 201 |
+
'age': structured_data.get('age', 'Not Provided'),
|
| 202 |
+
'gender': structured_data.get('gender', 'Not Provided'),
|
| 203 |
+
'state': structured_data.get('state', 'Not Provided'),
|
| 204 |
+
'education': structured_data.get('education', 'Not Provided'),
|
| 205 |
+
'employment_status': structured_data.get('employment', 'Not Provided'),
|
| 206 |
+
'income': structured_data.get('income', 'Not Provided'),
|
| 207 |
+
'caste': structured_data.get('category', 'Not Provided'),
|
| 208 |
+
'specialization': structured_data.get('specialization', 'Not Provided'),
|
| 209 |
+
'career_interest': structured_data.get('career_interest', 'Not Provided'),
|
| 210 |
+
'interests': structured_data.get('interests', [])
|
| 211 |
+
}
|
| 212 |
+
|
| 213 |
+
# Run workflow with interests, structured profile, and pre-loaded vectorstores
|
| 214 |
+
result = run_workflow(
|
| 215 |
+
user_input,
|
| 216 |
+
user_interests,
|
| 217 |
+
structured_profile,
|
| 218 |
+
scheme_vectorstore=SCHEME_VECTORSTORE,
|
| 219 |
+
exam_vectorstore=EXAM_VECTORSTORE
|
| 220 |
+
)
|
| 221 |
+
|
| 222 |
+
# Ensure user_profile key exists in result
|
| 223 |
+
if 'user_profile' not in result and 'profile' in result:
|
| 224 |
+
result['user_profile'] = result['profile']
|
| 225 |
+
|
| 226 |
+
# Update session
|
| 227 |
+
sessions[session_id]['status'] = 'completed'
|
| 228 |
+
sessions[session_id]['result'] = result
|
| 229 |
+
sessions[session_id]['completed_at'] = datetime.now().isoformat()
|
| 230 |
+
|
| 231 |
+
# Save to file
|
| 232 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 233 |
+
filename = f"outputs/results_{timestamp}.json"
|
| 234 |
+
os.makedirs('outputs', exist_ok=True)
|
| 235 |
+
|
| 236 |
+
with open(filename, 'w', encoding='utf-8') as f:
|
| 237 |
+
json.dump(result, f, indent=2, ensure_ascii=False)
|
| 238 |
+
|
| 239 |
+
return jsonify({
|
| 240 |
+
'success': True,
|
| 241 |
+
'session_id': session_id,
|
| 242 |
+
'result': result,
|
| 243 |
+
'filename': filename
|
| 244 |
+
})
|
| 245 |
+
|
| 246 |
+
except ImportError as e:
|
| 247 |
+
print(f"Import Error in /analyze: {str(e)}")
|
| 248 |
+
return jsonify({
|
| 249 |
+
'success': False,
|
| 250 |
+
'error': f'Configuration error: {str(e)}. Please ensure all dependencies are installed.'
|
| 251 |
+
}), 500
|
| 252 |
+
except TimeoutError as e:
|
| 253 |
+
print(f"Timeout Error in /analyze: {str(e)}")
|
| 254 |
+
return jsonify({
|
| 255 |
+
'success': False,
|
| 256 |
+
'error': 'Request timed out. The analysis is taking longer than expected. Please try again.'
|
| 257 |
+
}), 504
|
| 258 |
+
except Exception as e:
|
| 259 |
+
print(f"Error in /analyze: {str(e)}")
|
| 260 |
+
import traceback
|
| 261 |
+
traceback.print_exc()
|
| 262 |
+
return jsonify({
|
| 263 |
+
'success': False,
|
| 264 |
+
'error': f'An error occurred during analysis: {str(e)}'
|
| 265 |
+
}), 500
|
| 266 |
+
|
| 267 |
+
|
| 268 |
+
@app.route('/result/<session_id>')
|
| 269 |
+
def result(session_id):
|
| 270 |
+
"""Display results page"""
|
| 271 |
+
if session_id not in sessions:
|
| 272 |
+
return render_template('error.html',
|
| 273 |
+
error='Session not found'), 404
|
| 274 |
+
|
| 275 |
+
session_data = sessions[session_id]
|
| 276 |
+
|
| 277 |
+
if session_data['status'] != 'completed':
|
| 278 |
+
return render_template('error.html',
|
| 279 |
+
error='Analysis still in progress'), 400
|
| 280 |
+
|
| 281 |
+
return render_template('results.html',
|
| 282 |
+
session_id=session_id,
|
| 283 |
+
session_data=session_data,
|
| 284 |
+
result=session_data['result'])
|
| 285 |
+
|
| 286 |
+
|
| 287 |
+
@app.route('/api/status/<session_id>')
|
| 288 |
+
def status(session_id):
|
| 289 |
+
"""Check analysis status"""
|
| 290 |
+
if session_id not in sessions:
|
| 291 |
+
return jsonify({'error': 'Session not found'}), 404
|
| 292 |
+
|
| 293 |
+
return jsonify(sessions[session_id])
|
| 294 |
+
|
| 295 |
+
|
| 296 |
+
@app.route('/history')
|
| 297 |
+
def history():
|
| 298 |
+
"""View analysis history"""
|
| 299 |
+
output_files = []
|
| 300 |
+
|
| 301 |
+
if os.path.exists('outputs'):
|
| 302 |
+
files = [f for f in os.listdir('outputs') if f.endswith('.json')]
|
| 303 |
+
files.sort(reverse=True)
|
| 304 |
+
|
| 305 |
+
for filename in files[:10]: # Show last 10
|
| 306 |
+
filepath = os.path.join('outputs', filename)
|
| 307 |
+
with open(filepath, 'r', encoding='utf-8') as f:
|
| 308 |
+
data = json.load(f)
|
| 309 |
+
output_files.append({
|
| 310 |
+
'filename': filename,
|
| 311 |
+
'timestamp': filename.replace('results_', '').replace('.json', ''),
|
| 312 |
+
'profile': data.get('user_profile', {}),
|
| 313 |
+
'errors': data.get('errors', [])
|
| 314 |
+
})
|
| 315 |
+
|
| 316 |
+
return render_template('history.html', files=output_files)
|
| 317 |
+
|
| 318 |
+
|
| 319 |
+
@app.route('/api/file/<filename>')
|
| 320 |
+
def get_file(filename):
|
| 321 |
+
"""Download result file"""
|
| 322 |
+
try:
|
| 323 |
+
filepath = os.path.join('outputs', filename)
|
| 324 |
+
with open(filepath, 'r', encoding='utf-8') as f:
|
| 325 |
+
data = json.load(f)
|
| 326 |
+
return jsonify(data)
|
| 327 |
+
except Exception as e:
|
| 328 |
+
return jsonify({'error': str(e)}), 404
|
| 329 |
+
|
| 330 |
+
|
| 331 |
+
@app.route('/download/pdf/<session_id>')
|
| 332 |
+
def download_pdf(session_id):
|
| 333 |
+
"""Generate and download PDF report"""
|
| 334 |
+
try:
|
| 335 |
+
if session_id not in sessions:
|
| 336 |
+
return jsonify({'error': 'Session not found'}), 404
|
| 337 |
+
|
| 338 |
+
session_data = sessions[session_id]
|
| 339 |
+
result = session_data.get('result', {})
|
| 340 |
+
|
| 341 |
+
# Create PDF in memory
|
| 342 |
+
buffer = io.BytesIO()
|
| 343 |
+
doc = SimpleDocTemplate(buffer, pagesize=letter,
|
| 344 |
+
rightMargin=72, leftMargin=72,
|
| 345 |
+
topMargin=72, bottomMargin=18)
|
| 346 |
+
|
| 347 |
+
# Container for PDF elements
|
| 348 |
+
elements = []
|
| 349 |
+
|
| 350 |
+
# Define styles
|
| 351 |
+
styles = getSampleStyleSheet()
|
| 352 |
+
title_style = ParagraphStyle(
|
| 353 |
+
'CustomTitle',
|
| 354 |
+
parent=styles['Heading1'],
|
| 355 |
+
fontSize=24,
|
| 356 |
+
textColor=colors.HexColor('#5B21B6'),
|
| 357 |
+
spaceAfter=30,
|
| 358 |
+
alignment=TA_CENTER
|
| 359 |
+
)
|
| 360 |
+
heading_style = ParagraphStyle(
|
| 361 |
+
'CustomHeading',
|
| 362 |
+
parent=styles['Heading2'],
|
| 363 |
+
fontSize=16,
|
| 364 |
+
textColor=colors.HexColor('#7C3AED'),
|
| 365 |
+
spaceAfter=12,
|
| 366 |
+
spaceBefore=12
|
| 367 |
+
)
|
| 368 |
+
normal_style = styles['BodyText']
|
| 369 |
+
normal_style.alignment = TA_JUSTIFY
|
| 370 |
+
|
| 371 |
+
# Get user name for personalization
|
| 372 |
+
profile = result.get('user_profile', {})
|
| 373 |
+
user_name = profile.get('name', 'Citizen')
|
| 374 |
+
if user_name and user_name != 'Not Provided':
|
| 375 |
+
user_name = user_name.strip()
|
| 376 |
+
else:
|
| 377 |
+
user_name = 'Citizen'
|
| 378 |
+
|
| 379 |
+
# Title with logo-like header
|
| 380 |
+
elements.append(Paragraph("🇮🇳 JanSahayak", title_style))
|
| 381 |
+
elements.append(Paragraph("Government Benefits Analysis Report", styles['Heading3']))
|
| 382 |
+
elements.append(Spacer(1, 0.2*inch))
|
| 383 |
+
|
| 384 |
+
# Personalized greeting
|
| 385 |
+
greeting = ParagraphStyle('Greeting', parent=styles['Normal'], fontSize=14,
|
| 386 |
+
textColor=colors.HexColor('#374151'), spaceBefore=6, spaceAfter=12)
|
| 387 |
+
elements.append(Paragraph(f"<b>Prepared for: {user_name}</b>", greeting))
|
| 388 |
+
|
| 389 |
+
# Timestamp
|
| 390 |
+
timestamp = datetime.now().strftime("%B %d, %Y at %I:%M %p")
|
| 391 |
+
elements.append(Paragraph(f"<i>Generated: {timestamp}</i>", styles['Normal']))
|
| 392 |
+
|
| 393 |
+
# Separator line
|
| 394 |
+
elements.append(Spacer(1, 0.2*inch))
|
| 395 |
+
elements.append(Table([['_'*100]], colWidths=[6.5*inch]))
|
| 396 |
+
elements.append(Spacer(1, 0.4*inch))
|
| 397 |
+
|
| 398 |
+
# User Profile Section
|
| 399 |
+
elements.append(Paragraph("Your Profile", heading_style))
|
| 400 |
+
profile = result.get('user_profile', {})
|
| 401 |
+
|
| 402 |
+
if profile:
|
| 403 |
+
profile_data = []
|
| 404 |
+
for key, value in profile.items():
|
| 405 |
+
if key not in ['raw_profile', 'user_input', 'error', 'note'] and value != 'Not Provided':
|
| 406 |
+
label = key.replace('_', ' ').title()
|
| 407 |
+
# Format interests list properly
|
| 408 |
+
if key == 'interests' and isinstance(value, list):
|
| 409 |
+
value = ', '.join([v.title() for v in value])
|
| 410 |
+
profile_data.append([Paragraph(f"<b>{label}:</b>", normal_style),
|
| 411 |
+
Paragraph(str(value), normal_style)])
|
| 412 |
+
|
| 413 |
+
if profile_data:
|
| 414 |
+
profile_table = Table(profile_data, colWidths=[2.2*inch, 4.3*inch])
|
| 415 |
+
profile_table.setStyle(TableStyle([
|
| 416 |
+
('BACKGROUND', (0, 0), (0, -1), colors.HexColor('#EEF2FF')), # Label column
|
| 417 |
+
('BACKGROUND', (1, 0), (1, -1), colors.white), # Value column
|
| 418 |
+
('TEXTCOLOR', (0, 0), (-1, -1), colors.HexColor('#1F2937')),
|
| 419 |
+
('ALIGN', (0, 0), (-1, -1), 'LEFT'),
|
| 420 |
+
('VALIGN', (0, 0), (-1, -1), 'TOP'),
|
| 421 |
+
('FONTNAME', (0, 0), (0, -1), 'Helvetica-Bold'), # Bold labels
|
| 422 |
+
('FONTNAME', (1, 0), (1, -1), 'Helvetica'),
|
| 423 |
+
('FONTSIZE', (0, 0), (-1, -1), 10),
|
| 424 |
+
('BOTTOMPADDING', (0, 0), (-1, -1), 10),
|
| 425 |
+
('TOPPADDING', (0, 0), (-1, -1), 10),
|
| 426 |
+
('LEFTPADDING', (0, 0), (-1, -1), 12),
|
| 427 |
+
('RIGHTPADDING', (0, 0), (-1, -1), 12),
|
| 428 |
+
('GRID', (0, 0), (-1, -1), 1, colors.HexColor('#D1D5DB')),
|
| 429 |
+
('ROWBACKGROUNDS', (0, 0), (-1, -1), [colors.white, colors.HexColor('#F9FAFB')]),
|
| 430 |
+
]))
|
| 431 |
+
elements.append(profile_table)
|
| 432 |
+
|
| 433 |
+
elements.append(Spacer(1, 0.4*inch))
|
| 434 |
+
|
| 435 |
+
# Helper function to clean and format text
|
| 436 |
+
def clean_text(text):
|
| 437 |
+
if not text or not isinstance(text, str):
|
| 438 |
+
return "No information available"
|
| 439 |
+
# Skip if "Not requested by user"
|
| 440 |
+
if "Not requested by user" in text:
|
| 441 |
+
return None
|
| 442 |
+
# Remove HTML tags
|
| 443 |
+
text = re.sub(r'<[^>]+>', '', text)
|
| 444 |
+
# Convert markdown headers to regular text with proper spacing
|
| 445 |
+
text = re.sub(r'###\s+(.+)', r'\n\1\n', text)
|
| 446 |
+
text = re.sub(r'##\s+(.+)', r'\n\1\n', text)
|
| 447 |
+
# Clean up bold markers
|
| 448 |
+
text = re.sub(r'\*\*(.+?)\*\*', r'\1', text)
|
| 449 |
+
# Clean up bullet points
|
| 450 |
+
text = re.sub(r'^\*\s+', '\u2022 ', text, flags=re.MULTILINE)
|
| 451 |
+
text = re.sub(r'^-\s+', '\u2022 ', text, flags=re.MULTILINE)
|
| 452 |
+
return text.strip()
|
| 453 |
+
|
| 454 |
+
# Section style for better visual separation
|
| 455 |
+
section_box_style = ParagraphStyle(
|
| 456 |
+
'SectionBox',
|
| 457 |
+
parent=normal_style,
|
| 458 |
+
leftIndent=20,
|
| 459 |
+
rightIndent=20,
|
| 460 |
+
spaceBefore=6,
|
| 461 |
+
spaceAfter=6,
|
| 462 |
+
borderColor=colors.HexColor('#E5E7EB'),
|
| 463 |
+
borderWidth=1,
|
| 464 |
+
borderPadding=10,
|
| 465 |
+
backColor=colors.HexColor('#F9FAFB')
|
| 466 |
+
)
|
| 467 |
+
|
| 468 |
+
# Government Schemes Section
|
| 469 |
+
schemes_text = clean_text(result.get('scheme_recommendations', 'No recommendations available'))
|
| 470 |
+
if schemes_text:
|
| 471 |
+
elements.append(Paragraph("\ud83c\udfdb\ufe0f Government Schemes for You", heading_style))
|
| 472 |
+
elements.append(Spacer(1, 0.1*inch))
|
| 473 |
+
|
| 474 |
+
# Split into paragraphs and add with better formatting
|
| 475 |
+
paragraphs = [p.strip() for p in schemes_text.split('\n\n') if p.strip()]
|
| 476 |
+
for para in paragraphs:
|
| 477 |
+
if para:
|
| 478 |
+
elements.append(Paragraph(para, normal_style))
|
| 479 |
+
elements.append(Spacer(1, 0.15*inch))
|
| 480 |
+
|
| 481 |
+
elements.append(Spacer(1, 0.2*inch))
|
| 482 |
+
|
| 483 |
+
# Competitive Exams Section
|
| 484 |
+
exams_text = clean_text(result.get('exam_recommendations', 'No recommendations available'))
|
| 485 |
+
if exams_text:
|
| 486 |
+
elements.append(Paragraph("\ud83c\udf93 Competitive Exams for You", heading_style))
|
| 487 |
+
elements.append(Spacer(1, 0.1*inch))
|
| 488 |
+
|
| 489 |
+
paragraphs = [p.strip() for p in exams_text.split('\n\n') if p.strip()]
|
| 490 |
+
for para in paragraphs:
|
| 491 |
+
if para:
|
| 492 |
+
elements.append(Paragraph(para, normal_style))
|
| 493 |
+
elements.append(Spacer(1, 0.15*inch))
|
| 494 |
+
|
| 495 |
+
elements.append(Spacer(1, 0.2*inch))
|
| 496 |
+
|
| 497 |
+
# Missed Benefits Section
|
| 498 |
+
benefits_text = clean_text(result.get('missed_benefits_analysis', 'No analysis available'))
|
| 499 |
+
if benefits_text:
|
| 500 |
+
elements.append(Paragraph("\ud83d\udcca Missed Benefits Analysis", heading_style))
|
| 501 |
+
elements.append(Spacer(1, 0.1*inch))
|
| 502 |
+
|
| 503 |
+
paragraphs = [p.strip() for p in benefits_text.split('\n\n') if p.strip()]
|
| 504 |
+
for para in paragraphs:
|
| 505 |
+
if para:
|
| 506 |
+
elements.append(Paragraph(para, normal_style))
|
| 507 |
+
elements.append(Spacer(1, 0.15*inch))
|
| 508 |
+
|
| 509 |
+
# Errors (if any)
|
| 510 |
+
errors = result.get('errors', [])
|
| 511 |
+
if errors:
|
| 512 |
+
elements.append(Spacer(1, 0.3*inch))
|
| 513 |
+
elements.append(Paragraph("Notices", heading_style))
|
| 514 |
+
for error in errors:
|
| 515 |
+
elements.append(Paragraph(f"• {error}", normal_style))
|
| 516 |
+
|
| 517 |
+
# Footer with disclaimer
|
| 518 |
+
elements.append(Spacer(1, 0.5*inch))
|
| 519 |
+
|
| 520 |
+
# Add separator before footer
|
| 521 |
+
elements.append(Table([['_'*100]], colWidths=[6.5*inch]))
|
| 522 |
+
elements.append(Spacer(1, 0.2*inch))
|
| 523 |
+
|
| 524 |
+
footer_style = ParagraphStyle('Footer', parent=styles['Normal'],
|
| 525 |
+
fontSize=9, textColor=colors.HexColor('#6B7280'),
|
| 526 |
+
alignment=TA_CENTER)
|
| 527 |
+
elements.append(Paragraph(
|
| 528 |
+
"<i>This report is generated by JanSahayak AI system. "
|
| 529 |
+
"For official information and application procedures, "
|
| 530 |
+
"please visit the respective government ministry websites or contact local government offices.</i>",
|
| 531 |
+
footer_style
|
| 532 |
+
))
|
| 533 |
+
elements.append(Spacer(1, 0.1*inch))
|
| 534 |
+
elements.append(Paragraph(
|
| 535 |
+
"<i>Generated by JanSahayak - Your Government Benefits Assistant</i>",
|
| 536 |
+
footer_style
|
| 537 |
+
))
|
| 538 |
+
|
| 539 |
+
# Build PDF
|
| 540 |
+
doc.build(elements)
|
| 541 |
+
|
| 542 |
+
# Prepare response
|
| 543 |
+
buffer.seek(0)
|
| 544 |
+
|
| 545 |
+
# Create filename with user's name
|
| 546 |
+
safe_name = re.sub(r'[^a-zA-Z0-9\s]', '', user_name).replace(' ', '_')
|
| 547 |
+
timestamp_str = datetime.now().strftime("%Y%m%d")
|
| 548 |
+
filename = f'JanSahayak_{safe_name}_{timestamp_str}.pdf'
|
| 549 |
+
|
| 550 |
+
return send_file(
|
| 551 |
+
buffer,
|
| 552 |
+
as_attachment=True,
|
| 553 |
+
download_name=filename,
|
| 554 |
+
mimetype='application/pdf'
|
| 555 |
+
)
|
| 556 |
+
|
| 557 |
+
except Exception as e:
|
| 558 |
+
print(f"PDF Generation Error: {str(e)}")
|
| 559 |
+
return jsonify({'error': str(e)}), 500
|
| 560 |
+
|
| 561 |
+
|
| 562 |
+
if __name__ == '__main__':
|
| 563 |
+
# Get port from environment variable (for deployment platforms)
|
| 564 |
+
port = int(os.environ.get('PORT', 5000))
|
| 565 |
+
|
| 566 |
+
# Check if running in production
|
| 567 |
+
is_production = os.environ.get('FLASK_ENV') != 'development'
|
| 568 |
+
|
| 569 |
+
print("\n" + "="*70)
|
| 570 |
+
print("🙏 JANSAHAYAK - Starting Web Server")
|
| 571 |
+
print("="*70)
|
| 572 |
+
|
| 573 |
+
# Check API keys on startup
|
| 574 |
+
from config import GROQ_API_KEY, TAVILY_API_KEY, HF_TOKEN
|
| 575 |
+
|
| 576 |
+
if not GROQ_API_KEY or GROQ_API_KEY == "":
|
| 577 |
+
print("⚠️ WARNING: GROQ_API_KEY is not set!")
|
| 578 |
+
print(" The application will not work without this API key.")
|
| 579 |
+
else:
|
| 580 |
+
print("✅ GROQ_API_KEY is configured")
|
| 581 |
+
|
| 582 |
+
if not TAVILY_API_KEY or TAVILY_API_KEY == "":
|
| 583 |
+
print("⚠️ WARNING: TAVILY_API_KEY is not set!")
|
| 584 |
+
print(" The application will not work without this API key.")
|
| 585 |
+
else:
|
| 586 |
+
print("✅ TAVILY_API_KEY is configured")
|
| 587 |
+
|
| 588 |
+
if not HF_TOKEN or HF_TOKEN == "":
|
| 589 |
+
print("⚠️ WARNING: HF_TOKEN is not set (optional but recommended)")
|
| 590 |
+
else:
|
| 591 |
+
print("✅ HF_TOKEN is configured")
|
| 592 |
+
|
| 593 |
+
print(f"\n📱 Starting Flask server on port {port}...")
|
| 594 |
+
print(f"🌍 Environment: {'Production' if is_production else 'Development'}")
|
| 595 |
+
print("🔄 Vectorstores will be loaded on first request")
|
| 596 |
+
print("🛑 Press CTRL+C to stop the server\n")
|
| 597 |
+
|
| 598 |
+
# Start Flask FIRST to bind to port, then load vectorstores in background
|
| 599 |
+
app.run(debug=not is_production, host='0.0.0.0', port=port, threaded=True)
|
config.py
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from dotenv import load_dotenv
|
| 3 |
+
|
| 4 |
+
load_dotenv()
|
| 5 |
+
|
| 6 |
+
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
|
| 7 |
+
TAVILY_API_KEY = os.getenv("TAVILY_API_KEY")
|
| 8 |
+
HF_TOKEN = os.getenv("HF_TOKEN")
|
data/exams_pdfs/README.txt
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Placeholder for competitive exam PDFs
|
| 2 |
+
# Add your competitive exam PDF files to this directory
|
| 3 |
+
|
| 4 |
+
# Examples of exams to add:
|
| 5 |
+
# - UPSC (Civil Services, NDA, CDS)
|
| 6 |
+
# - SSC (CGL, CHSL, MTS, JE)
|
| 7 |
+
# - Banking (IBPS, SBI PO/Clerk, RBI)
|
| 8 |
+
# - Railways (RRB NTPC, ALP, Group D)
|
| 9 |
+
# - State PSC exams
|
| 10 |
+
# - Defense exams (NDA, CDS, AFCAT)
|
| 11 |
+
# - Teaching exams (CTET, TET)
|
| 12 |
+
|
| 13 |
+
# Download official notifications and syllabi from exam conducting bodies
|
data/exams_pdfs/exam.pdf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ae2c52c533fe29a081d8fe477d079e1d2e610aa398be5a8324f63b583c5beacf
|
| 3 |
+
size 149005
|
data/schemes_pdfs/Government Welfare Schemes & Policies - Disha Experts.pdf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ad6e1fb3d26677250c6597ca9ed83f24000f8c062529f7188b693839f0c6ade9
|
| 3 |
+
size 2410388
|
data/schemes_pdfs/Government of India Welfare Schemes & Policies For Competitive Exams.pdf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:11f0608bcece884567ea3e98720c8e557d32d4fe203f3f1dde5356fcf39f7ee7
|
| 3 |
+
size 2387327
|
data/schemes_pdfs/README.txt
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Placeholder for government scheme PDFs
|
| 2 |
+
# Add your government scheme PDF files to this directory
|
| 3 |
+
|
| 4 |
+
# Examples of schemes to add:
|
| 5 |
+
# - PM Kisan Samman Nidhi
|
| 6 |
+
# - Ayushman Bharat
|
| 7 |
+
# - PM Awas Yojana
|
| 8 |
+
# - Skill Development Schemes
|
| 9 |
+
# - Scholarships (SC/ST/OBC/Minority)
|
| 10 |
+
# - State-specific schemes
|
| 11 |
+
|
| 12 |
+
# Download official PDFs from government websites (.gov.in domains)
|
data/schemes_pdfs/all-indian-government-schemes-list-2026-716.pdf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e4f7683bac6e79e923ac9441191f073cdbb67c41fcf84d5b401b02ce51520648
|
| 3 |
+
size 511889
|
graph/__init__.py
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Graph Module Init
|
| 3 |
+
"""
|
graph/workflow.py
ADDED
|
@@ -0,0 +1,319 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
LangGraph Workflow
|
| 3 |
+
Orchestrates multi-agent system using LangGraph
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
from typing import TypedDict, Annotated
|
| 7 |
+
from langgraph.graph import StateGraph, END
|
| 8 |
+
import operator
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
class AgentState(TypedDict):
|
| 12 |
+
"""
|
| 13 |
+
State object that gets passed between agents
|
| 14 |
+
Contains all intermediate and final results
|
| 15 |
+
"""
|
| 16 |
+
# Input
|
| 17 |
+
user_input: str
|
| 18 |
+
user_interests: list # ['schemes', 'exams']
|
| 19 |
+
|
| 20 |
+
# Pre-loaded vectorstores
|
| 21 |
+
scheme_vectorstore: object # FAISS vectorstore or None
|
| 22 |
+
exam_vectorstore: object # FAISS vectorstore or None
|
| 23 |
+
|
| 24 |
+
# Profiling Agent Output
|
| 25 |
+
profile: dict
|
| 26 |
+
|
| 27 |
+
# Scheme Agent Output
|
| 28 |
+
scheme_recommendations: str
|
| 29 |
+
|
| 30 |
+
# Exam Agent Output
|
| 31 |
+
exam_recommendations: str
|
| 32 |
+
|
| 33 |
+
# Benefit Agent Output
|
| 34 |
+
missed_benefits: str
|
| 35 |
+
|
| 36 |
+
# Final Output
|
| 37 |
+
final_output: dict
|
| 38 |
+
|
| 39 |
+
# Error tracking
|
| 40 |
+
errors: Annotated[list, operator.add]
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
def profiling_node(state: AgentState) -> dict:
|
| 44 |
+
"""
|
| 45 |
+
Node: User Profiling Agent
|
| 46 |
+
Extracts structured profile from user input
|
| 47 |
+
"""
|
| 48 |
+
from agents.profiling_agent import run_profiling_agent
|
| 49 |
+
|
| 50 |
+
try:
|
| 51 |
+
# Check if we already have a structured profile (from form)
|
| 52 |
+
existing_profile = state.get("profile", {})
|
| 53 |
+
|
| 54 |
+
# If we have useful profile data already, skip LLM profiling
|
| 55 |
+
useful_fields = [k for k in existing_profile.keys() if k not in ['raw_profile', 'user_input', 'error', 'note'] and existing_profile[k] not in ['Not Provided', 'N/A', '', None]]
|
| 56 |
+
|
| 57 |
+
if len(useful_fields) >= 3:
|
| 58 |
+
print("\n✅ Using pre-extracted profile data (skipping LLM profiling)")
|
| 59 |
+
return {"profile": existing_profile}
|
| 60 |
+
|
| 61 |
+
print("\n🔍 Running Profiling Agent...")
|
| 62 |
+
user_input = state.get("user_input", "")
|
| 63 |
+
profile = run_profiling_agent(user_input)
|
| 64 |
+
|
| 65 |
+
# Merge with existing profile if available
|
| 66 |
+
if existing_profile:
|
| 67 |
+
profile = {**profile, **existing_profile} # existing_profile takes precedence
|
| 68 |
+
|
| 69 |
+
if "error" in profile and len(profile) <= 2: # Only error and maybe user_input
|
| 70 |
+
print("❌ Profile extraction failed, using fallback data")
|
| 71 |
+
return {
|
| 72 |
+
"profile": existing_profile if existing_profile else {},
|
| 73 |
+
"errors": ["Profiling failed: " + profile.get("error", "Unknown error")]
|
| 74 |
+
}
|
| 75 |
+
|
| 76 |
+
print("✅ Profile extracted successfully")
|
| 77 |
+
return {"profile": profile}
|
| 78 |
+
|
| 79 |
+
except Exception as e:
|
| 80 |
+
print(f"❌ Profiling Agent Error: {str(e)}")
|
| 81 |
+
existing_profile = state.get("profile", {})
|
| 82 |
+
return {
|
| 83 |
+
"profile": existing_profile if existing_profile else {},
|
| 84 |
+
"errors": [f"Profiling: {str(e)}"]
|
| 85 |
+
}
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
def scheme_node(state: AgentState) -> dict:
|
| 89 |
+
"""
|
| 90 |
+
Node: Scheme Recommendation Agent
|
| 91 |
+
Recommends government schemes based on profile
|
| 92 |
+
"""
|
| 93 |
+
from agents.scheme_agent import run_scheme_agent
|
| 94 |
+
|
| 95 |
+
try:
|
| 96 |
+
# Check if user wants scheme recommendations
|
| 97 |
+
interests = state.get("user_interests", ["schemes", "exams"])
|
| 98 |
+
if "schemes" not in interests:
|
| 99 |
+
print("\n⏭️ Skipping Scheme Agent (not requested)")
|
| 100 |
+
return {"scheme_recommendations": "Not requested by user"}
|
| 101 |
+
|
| 102 |
+
print("\n🏛️ Running Scheme Recommendation Agent...")
|
| 103 |
+
profile = state.get("profile", {})
|
| 104 |
+
scheme_vectorstore = state.get("scheme_vectorstore", None)
|
| 105 |
+
|
| 106 |
+
# Check if profile has useful data (at least 2 fields with actual values)
|
| 107 |
+
useful_fields = [k for k in profile.keys()
|
| 108 |
+
if k not in ['raw_profile', 'user_input', 'error', 'note']
|
| 109 |
+
and profile[k] not in ['Not Provided', 'N/A', '', None]]
|
| 110 |
+
|
| 111 |
+
if not profile or len(useful_fields) < 2:
|
| 112 |
+
print(f"⚠️ Limited profile data ({len(useful_fields)} fields), will rely more on web search")
|
| 113 |
+
else:
|
| 114 |
+
print(f"✅ Profile has {len(useful_fields)} useful fields")
|
| 115 |
+
|
| 116 |
+
result = run_scheme_agent(profile, use_web_search=True, vectorstore=scheme_vectorstore)
|
| 117 |
+
print("✅ Scheme recommendations generated")
|
| 118 |
+
return {"scheme_recommendations": result.get("recommendations", "")}
|
| 119 |
+
|
| 120 |
+
except Exception as e:
|
| 121 |
+
print(f"❌ Scheme Agent Error: {str(e)}")
|
| 122 |
+
return {
|
| 123 |
+
"scheme_recommendations": f"Error generating recommendations: {str(e)}",
|
| 124 |
+
"errors": [f"Scheme: {str(e)}"]
|
| 125 |
+
}
|
| 126 |
+
|
| 127 |
+
|
| 128 |
+
def exam_node(state: AgentState) -> dict:
|
| 129 |
+
"""
|
| 130 |
+
Node: Exam Recommendation Agent
|
| 131 |
+
Recommends competitive exams based on profile
|
| 132 |
+
"""
|
| 133 |
+
from agents.exam_agent import run_exam_agent
|
| 134 |
+
|
| 135 |
+
try:
|
| 136 |
+
# Check if user wants exam recommendations
|
| 137 |
+
interests = state.get("user_interests", ["schemes", "exams"])
|
| 138 |
+
if "exams" not in interests:
|
| 139 |
+
print("\n⏭️ Skipping Exam Agent (not requested)")
|
| 140 |
+
return {"exam_recommendations": "Not requested by user"}
|
| 141 |
+
|
| 142 |
+
print("\n🎓 Running Exam Recommendation Agent...")
|
| 143 |
+
profile = state.get("profile", {})
|
| 144 |
+
exam_vectorstore = state.get("exam_vectorstore", None)
|
| 145 |
+
|
| 146 |
+
# Check if profile has useful data
|
| 147 |
+
useful_fields = [k for k in profile.keys() if k not in ['raw_profile', 'user_input', 'error', 'note']]
|
| 148 |
+
|
| 149 |
+
if not profile or len(useful_fields) < 2:
|
| 150 |
+
print("⚠️ Insufficient profile data, using web search only")
|
| 151 |
+
# Still try with whatever we have
|
| 152 |
+
|
| 153 |
+
result = run_exam_agent(profile, use_web_search=True, vectorstore=exam_vectorstore)
|
| 154 |
+
print("✅ Exam recommendations generated")
|
| 155 |
+
return {"exam_recommendations": result.get("recommendations", "")}
|
| 156 |
+
|
| 157 |
+
except Exception as e:
|
| 158 |
+
print(f"❌ Exam Agent Error: {str(e)}")
|
| 159 |
+
return {
|
| 160 |
+
"exam_recommendations": f"Error generating recommendations: {str(e)}",
|
| 161 |
+
"errors": [f"Exam: {str(e)}"]
|
| 162 |
+
}
|
| 163 |
+
|
| 164 |
+
|
| 165 |
+
def benefit_node(state: AgentState) -> dict:
|
| 166 |
+
"""
|
| 167 |
+
Node: Missed Benefits Calculator Agent
|
| 168 |
+
Calculates potential missed benefits
|
| 169 |
+
"""
|
| 170 |
+
from agents.benefit_agent import calculate_missed_benefits
|
| 171 |
+
|
| 172 |
+
try:
|
| 173 |
+
print("\n💰 Running Benefit Calculator Agent...")
|
| 174 |
+
profile = state.get("profile", {})
|
| 175 |
+
scheme_recommendations = state.get("scheme_recommendations", "")
|
| 176 |
+
|
| 177 |
+
if not profile or not scheme_recommendations:
|
| 178 |
+
print("⚠️ Insufficient data for benefit calculation")
|
| 179 |
+
return {"missed_benefits": "Insufficient data"}
|
| 180 |
+
|
| 181 |
+
result = calculate_missed_benefits(profile, scheme_recommendations)
|
| 182 |
+
print("✅ Benefit calculation completed")
|
| 183 |
+
return {"missed_benefits": result.get("calculation", "")}
|
| 184 |
+
|
| 185 |
+
except Exception as e:
|
| 186 |
+
print(f"❌ Benefit Agent Error: {str(e)}")
|
| 187 |
+
return {
|
| 188 |
+
"missed_benefits": "",
|
| 189 |
+
"errors": [f"Benefit: {str(e)}"]
|
| 190 |
+
}
|
| 191 |
+
|
| 192 |
+
|
| 193 |
+
def output_node(state: AgentState) -> dict:
|
| 194 |
+
"""
|
| 195 |
+
Node: Final Output Compiler
|
| 196 |
+
Compiles all agent outputs into final response
|
| 197 |
+
"""
|
| 198 |
+
print("\n📊 Compiling Final Output...")
|
| 199 |
+
|
| 200 |
+
final_output = {
|
| 201 |
+
"user_profile": state.get("profile", {}),
|
| 202 |
+
"scheme_recommendations": state.get("scheme_recommendations", ""),
|
| 203 |
+
"exam_recommendations": state.get("exam_recommendations", ""),
|
| 204 |
+
"missed_benefits_analysis": state.get("missed_benefits", ""),
|
| 205 |
+
"errors": state.get("errors", [])
|
| 206 |
+
}
|
| 207 |
+
|
| 208 |
+
print("✅ Final output ready")
|
| 209 |
+
|
| 210 |
+
return {"final_output": final_output}
|
| 211 |
+
|
| 212 |
+
|
| 213 |
+
def build_workflow():
|
| 214 |
+
"""
|
| 215 |
+
Builds the LangGraph workflow
|
| 216 |
+
|
| 217 |
+
Returns:
|
| 218 |
+
Compiled workflow graph
|
| 219 |
+
"""
|
| 220 |
+
# Create workflow
|
| 221 |
+
workflow = StateGraph(AgentState)
|
| 222 |
+
|
| 223 |
+
# Add nodes
|
| 224 |
+
workflow.add_node("profiling", profiling_node)
|
| 225 |
+
workflow.add_node("scheme", scheme_node)
|
| 226 |
+
workflow.add_node("exam", exam_node)
|
| 227 |
+
workflow.add_node("benefit", benefit_node)
|
| 228 |
+
workflow.add_node("output", output_node)
|
| 229 |
+
|
| 230 |
+
# Set entry point
|
| 231 |
+
workflow.set_entry_point("profiling")
|
| 232 |
+
|
| 233 |
+
# Define edges (workflow flow)
|
| 234 |
+
# Step 1: Profiling runs first
|
| 235 |
+
workflow.add_edge("profiling", "scheme")
|
| 236 |
+
workflow.add_edge("profiling", "exam")
|
| 237 |
+
|
| 238 |
+
# Step 2: Both scheme and exam converge to benefit (runs after both complete)
|
| 239 |
+
workflow.add_edge("scheme", "benefit")
|
| 240 |
+
workflow.add_edge("exam", "benefit")
|
| 241 |
+
|
| 242 |
+
# Step 3: Benefit goes to output
|
| 243 |
+
workflow.add_edge("benefit", "output")
|
| 244 |
+
|
| 245 |
+
# Set finish point
|
| 246 |
+
workflow.add_edge("output", END)
|
| 247 |
+
|
| 248 |
+
# Compile workflow
|
| 249 |
+
return workflow.compile()
|
| 250 |
+
|
| 251 |
+
|
| 252 |
+
def run_workflow(user_input: str, user_interests: list = None, structured_profile: dict = None,
|
| 253 |
+
scheme_vectorstore=None, exam_vectorstore=None) -> dict:
|
| 254 |
+
"""
|
| 255 |
+
Runs the complete multi-agent workflow
|
| 256 |
+
|
| 257 |
+
Args:
|
| 258 |
+
user_input: Raw user input text
|
| 259 |
+
user_interests: List of interests ['schemes', 'exams']
|
| 260 |
+
structured_profile: Pre-extracted profile data from form (optional)
|
| 261 |
+
scheme_vectorstore: Pre-loaded scheme vectorstore (optional)
|
| 262 |
+
exam_vectorstore: Pre-loaded exam vectorstore (optional)
|
| 263 |
+
|
| 264 |
+
Returns:
|
| 265 |
+
Final compiled output dictionary
|
| 266 |
+
"""
|
| 267 |
+
print("="*60)
|
| 268 |
+
print("🚀 Starting JanSahayak Multi-Agent System")
|
| 269 |
+
print("="*60)
|
| 270 |
+
|
| 271 |
+
if user_interests:
|
| 272 |
+
print(f"🎯 User Interests: {', '.join(user_interests)}")
|
| 273 |
+
|
| 274 |
+
if structured_profile:
|
| 275 |
+
print("📋 Using structured profile data from form")
|
| 276 |
+
|
| 277 |
+
if scheme_vectorstore:
|
| 278 |
+
print("📚 Using pre-loaded scheme vectorstore")
|
| 279 |
+
if exam_vectorstore:
|
| 280 |
+
print("📚 Using pre-loaded exam vectorstore")
|
| 281 |
+
|
| 282 |
+
# Build workflow
|
| 283 |
+
app = build_workflow()
|
| 284 |
+
|
| 285 |
+
# Initialize state
|
| 286 |
+
initial_state = {
|
| 287 |
+
"user_input": user_input,
|
| 288 |
+
"user_interests": user_interests or ["schemes", "exams"],
|
| 289 |
+
"profile": structured_profile if structured_profile else {},
|
| 290 |
+
"scheme_vectorstore": scheme_vectorstore,
|
| 291 |
+
"exam_vectorstore": exam_vectorstore,
|
| 292 |
+
"errors": []
|
| 293 |
+
}
|
| 294 |
+
|
| 295 |
+
# Run workflow
|
| 296 |
+
result = app.invoke(initial_state)
|
| 297 |
+
|
| 298 |
+
print("\n" + "="*60)
|
| 299 |
+
print("✅ Workflow Completed")
|
| 300 |
+
print("="*60)
|
| 301 |
+
|
| 302 |
+
return result.get("final_output", {})
|
| 303 |
+
|
| 304 |
+
|
| 305 |
+
if __name__ == "__main__":
|
| 306 |
+
# Test workflow
|
| 307 |
+
test_input = """
|
| 308 |
+
I am a 25-year-old male from Maharashtra. I completed my Bachelor's in Engineering.
|
| 309 |
+
My family income is around 3 lakh per year. I belong to the OBC category.
|
| 310 |
+
I am currently unemployed and looking for government job opportunities.
|
| 311 |
+
I am interested in technical positions and government jobs.
|
| 312 |
+
"""
|
| 313 |
+
|
| 314 |
+
result = run_workflow(test_input)
|
| 315 |
+
|
| 316 |
+
print("\n📄 Final Result:")
|
| 317 |
+
print("="*60)
|
| 318 |
+
import json
|
| 319 |
+
print(json.dumps(result, indent=2, ensure_ascii=False))
|
hf_cache/.locks/models--sentence-transformers--all-MiniLM-L6-v2/53aa51172d142c89d9012cce15ae4d6cc0ca6895895114379cacb4fab128d9db.lock
ADDED
|
File without changes
|
hf_cache/.locks/models--sentence-transformers--all-MiniLM-L6-v2/58d4a9a45664eb9e12de9549c548c09b6134c17f.lock
ADDED
|
File without changes
|
hf_cache/.locks/models--sentence-transformers--all-MiniLM-L6-v2/59d594003bf59880a884c574bf88ef7555bb0202.lock
ADDED
|
File without changes
|
hf_cache/.locks/models--sentence-transformers--all-MiniLM-L6-v2/72b987fd805cfa2b58c4c8c952b274a11bfd5a00.lock
ADDED
|
File without changes
|
hf_cache/.locks/models--sentence-transformers--all-MiniLM-L6-v2/952a9b81c0bfd99800fabf352f69c7ccd46c5e43.lock
ADDED
|
File without changes
|
hf_cache/.locks/models--sentence-transformers--all-MiniLM-L6-v2/c79f2b6a0cea6f4b564fed1938984bace9d30ff0.lock
ADDED
|
File without changes
|
hf_cache/.locks/models--sentence-transformers--all-MiniLM-L6-v2/cb202bfe2e3c98645018a6d12f182a434c9d3e02.lock
ADDED
|
File without changes
|
hf_cache/.locks/models--sentence-transformers--all-MiniLM-L6-v2/d1514c3162bbe87b343f565fadc62e6c06f04f03.lock
ADDED
|
File without changes
|
hf_cache/.locks/models--sentence-transformers--all-MiniLM-L6-v2/e7b0375001f109a6b8873d756ad4f7bbb15fbaa5.lock
ADDED
|
File without changes
|
hf_cache/.locks/models--sentence-transformers--all-MiniLM-L6-v2/fb140275c155a9c7c5a3b3e0e77a9e839594a938.lock
ADDED
|
File without changes
|
hf_cache/.locks/models--sentence-transformers--all-MiniLM-L6-v2/fd1b291129c607e5d49799f87cb219b27f98acdf.lock
ADDED
|
File without changes
|
hf_cache/models--sentence-transformers--all-MiniLM-L6-v2/.no_exist/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/adapter_config.json
ADDED
|
File without changes
|
hf_cache/models--sentence-transformers--all-MiniLM-L6-v2/.no_exist/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/added_tokens.json
ADDED
|
File without changes
|
hf_cache/models--sentence-transformers--all-MiniLM-L6-v2/.no_exist/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/chat_template.jinja
ADDED
|
File without changes
|
hf_cache/models--sentence-transformers--all-MiniLM-L6-v2/blobs/53aa51172d142c89d9012cce15ae4d6cc0ca6895895114379cacb4fab128d9db
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:53aa51172d142c89d9012cce15ae4d6cc0ca6895895114379cacb4fab128d9db
|
| 3 |
+
size 90868376
|
hf_cache/models--sentence-transformers--all-MiniLM-L6-v2/blobs/58d4a9a45664eb9e12de9549c548c09b6134c17f
ADDED
|
@@ -0,0 +1,173 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
language: en
|
| 3 |
+
license: apache-2.0
|
| 4 |
+
library_name: sentence-transformers
|
| 5 |
+
tags:
|
| 6 |
+
- sentence-transformers
|
| 7 |
+
- feature-extraction
|
| 8 |
+
- sentence-similarity
|
| 9 |
+
- transformers
|
| 10 |
+
datasets:
|
| 11 |
+
- s2orc
|
| 12 |
+
- flax-sentence-embeddings/stackexchange_xml
|
| 13 |
+
- ms_marco
|
| 14 |
+
- gooaq
|
| 15 |
+
- yahoo_answers_topics
|
| 16 |
+
- code_search_net
|
| 17 |
+
- search_qa
|
| 18 |
+
- eli5
|
| 19 |
+
- snli
|
| 20 |
+
- multi_nli
|
| 21 |
+
- wikihow
|
| 22 |
+
- natural_questions
|
| 23 |
+
- trivia_qa
|
| 24 |
+
- embedding-data/sentence-compression
|
| 25 |
+
- embedding-data/flickr30k-captions
|
| 26 |
+
- embedding-data/altlex
|
| 27 |
+
- embedding-data/simple-wiki
|
| 28 |
+
- embedding-data/QQP
|
| 29 |
+
- embedding-data/SPECTER
|
| 30 |
+
- embedding-data/PAQ_pairs
|
| 31 |
+
- embedding-data/WikiAnswers
|
| 32 |
+
pipeline_tag: sentence-similarity
|
| 33 |
+
---
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
# all-MiniLM-L6-v2
|
| 37 |
+
This is a [sentence-transformers](https://www.SBERT.net) model: It maps sentences & paragraphs to a 384 dimensional dense vector space and can be used for tasks like clustering or semantic search.
|
| 38 |
+
|
| 39 |
+
## Usage (Sentence-Transformers)
|
| 40 |
+
Using this model becomes easy when you have [sentence-transformers](https://www.SBERT.net) installed:
|
| 41 |
+
|
| 42 |
+
```
|
| 43 |
+
pip install -U sentence-transformers
|
| 44 |
+
```
|
| 45 |
+
|
| 46 |
+
Then you can use the model like this:
|
| 47 |
+
```python
|
| 48 |
+
from sentence_transformers import SentenceTransformer
|
| 49 |
+
sentences = ["This is an example sentence", "Each sentence is converted"]
|
| 50 |
+
|
| 51 |
+
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
|
| 52 |
+
embeddings = model.encode(sentences)
|
| 53 |
+
print(embeddings)
|
| 54 |
+
```
|
| 55 |
+
|
| 56 |
+
## Usage (HuggingFace Transformers)
|
| 57 |
+
Without [sentence-transformers](https://www.SBERT.net), you can use the model like this: First, you pass your input through the transformer model, then you have to apply the right pooling-operation on-top of the contextualized word embeddings.
|
| 58 |
+
|
| 59 |
+
```python
|
| 60 |
+
from transformers import AutoTokenizer, AutoModel
|
| 61 |
+
import torch
|
| 62 |
+
import torch.nn.functional as F
|
| 63 |
+
|
| 64 |
+
#Mean Pooling - Take attention mask into account for correct averaging
|
| 65 |
+
def mean_pooling(model_output, attention_mask):
|
| 66 |
+
token_embeddings = model_output[0] #First element of model_output contains all token embeddings
|
| 67 |
+
input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
|
| 68 |
+
return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
# Sentences we want sentence embeddings for
|
| 72 |
+
sentences = ['This is an example sentence', 'Each sentence is converted']
|
| 73 |
+
|
| 74 |
+
# Load model from HuggingFace Hub
|
| 75 |
+
tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')
|
| 76 |
+
model = AutoModel.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')
|
| 77 |
+
|
| 78 |
+
# Tokenize sentences
|
| 79 |
+
encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt')
|
| 80 |
+
|
| 81 |
+
# Compute token embeddings
|
| 82 |
+
with torch.no_grad():
|
| 83 |
+
model_output = model(**encoded_input)
|
| 84 |
+
|
| 85 |
+
# Perform pooling
|
| 86 |
+
sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask'])
|
| 87 |
+
|
| 88 |
+
# Normalize embeddings
|
| 89 |
+
sentence_embeddings = F.normalize(sentence_embeddings, p=2, dim=1)
|
| 90 |
+
|
| 91 |
+
print("Sentence embeddings:")
|
| 92 |
+
print(sentence_embeddings)
|
| 93 |
+
```
|
| 94 |
+
|
| 95 |
+
------
|
| 96 |
+
|
| 97 |
+
## Background
|
| 98 |
+
|
| 99 |
+
The project aims to train sentence embedding models on very large sentence level datasets using a self-supervised
|
| 100 |
+
contrastive learning objective. We used the pretrained [`nreimers/MiniLM-L6-H384-uncased`](https://huggingface.co/nreimers/MiniLM-L6-H384-uncased) model and fine-tuned in on a
|
| 101 |
+
1B sentence pairs dataset. We use a contrastive learning objective: given a sentence from the pair, the model should predict which out of a set of randomly sampled other sentences, was actually paired with it in our dataset.
|
| 102 |
+
|
| 103 |
+
We developed this model during the
|
| 104 |
+
[Community week using JAX/Flax for NLP & CV](https://discuss.huggingface.co/t/open-to-the-community-community-week-using-jax-flax-for-nlp-cv/7104),
|
| 105 |
+
organized by Hugging Face. We developed this model as part of the project:
|
| 106 |
+
[Train the Best Sentence Embedding Model Ever with 1B Training Pairs](https://discuss.huggingface.co/t/train-the-best-sentence-embedding-model-ever-with-1b-training-pairs/7354). We benefited from efficient hardware infrastructure to run the project: 7 TPUs v3-8, as well as intervention from Googles Flax, JAX, and Cloud team member about efficient deep learning frameworks.
|
| 107 |
+
|
| 108 |
+
## Intended uses
|
| 109 |
+
|
| 110 |
+
Our model is intended to be used as a sentence and short paragraph encoder. Given an input text, it outputs a vector which captures
|
| 111 |
+
the semantic information. The sentence vector may be used for information retrieval, clustering or sentence similarity tasks.
|
| 112 |
+
|
| 113 |
+
By default, input text longer than 256 word pieces is truncated.
|
| 114 |
+
|
| 115 |
+
|
| 116 |
+
## Training procedure
|
| 117 |
+
|
| 118 |
+
### Pre-training
|
| 119 |
+
|
| 120 |
+
We use the pretrained [`nreimers/MiniLM-L6-H384-uncased`](https://huggingface.co/nreimers/MiniLM-L6-H384-uncased) model. Please refer to the model card for more detailed information about the pre-training procedure.
|
| 121 |
+
|
| 122 |
+
### Fine-tuning
|
| 123 |
+
|
| 124 |
+
We fine-tune the model using a contrastive objective. Formally, we compute the cosine similarity from each possible sentence pairs from the batch.
|
| 125 |
+
We then apply the cross entropy loss by comparing with true pairs.
|
| 126 |
+
|
| 127 |
+
#### Hyper parameters
|
| 128 |
+
|
| 129 |
+
We trained our model on a TPU v3-8. We train the model during 100k steps using a batch size of 1024 (128 per TPU core).
|
| 130 |
+
We use a learning rate warm up of 500. The sequence length was limited to 128 tokens. We used the AdamW optimizer with
|
| 131 |
+
a 2e-5 learning rate. The full training script is accessible in this current repository: `train_script.py`.
|
| 132 |
+
|
| 133 |
+
#### Training data
|
| 134 |
+
|
| 135 |
+
We use the concatenation from multiple datasets to fine-tune our model. The total number of sentence pairs is above 1 billion sentences.
|
| 136 |
+
We sampled each dataset given a weighted probability which configuration is detailed in the `data_config.json` file.
|
| 137 |
+
|
| 138 |
+
|
| 139 |
+
| Dataset | Paper | Number of training tuples |
|
| 140 |
+
|--------------------------------------------------------|:----------------------------------------:|:--------------------------:|
|
| 141 |
+
| [Reddit comments (2015-2018)](https://github.com/PolyAI-LDN/conversational-datasets/tree/master/reddit) | [paper](https://arxiv.org/abs/1904.06472) | 726,484,430 |
|
| 142 |
+
| [S2ORC](https://github.com/allenai/s2orc) Citation pairs (Abstracts) | [paper](https://aclanthology.org/2020.acl-main.447/) | 116,288,806 |
|
| 143 |
+
| [WikiAnswers](https://github.com/afader/oqa#wikianswers-corpus) Duplicate question pairs | [paper](https://doi.org/10.1145/2623330.2623677) | 77,427,422 |
|
| 144 |
+
| [PAQ](https://github.com/facebookresearch/PAQ) (Question, Answer) pairs | [paper](https://arxiv.org/abs/2102.07033) | 64,371,441 |
|
| 145 |
+
| [S2ORC](https://github.com/allenai/s2orc) Citation pairs (Titles) | [paper](https://aclanthology.org/2020.acl-main.447/) | 52,603,982 |
|
| 146 |
+
| [S2ORC](https://github.com/allenai/s2orc) (Title, Abstract) | [paper](https://aclanthology.org/2020.acl-main.447/) | 41,769,185 |
|
| 147 |
+
| [Stack Exchange](https://huggingface.co/datasets/flax-sentence-embeddings/stackexchange_xml) (Title, Body) pairs | - | 25,316,456 |
|
| 148 |
+
| [Stack Exchange](https://huggingface.co/datasets/flax-sentence-embeddings/stackexchange_xml) (Title+Body, Answer) pairs | - | 21,396,559 |
|
| 149 |
+
| [Stack Exchange](https://huggingface.co/datasets/flax-sentence-embeddings/stackexchange_xml) (Title, Answer) pairs | - | 21,396,559 |
|
| 150 |
+
| [MS MARCO](https://microsoft.github.io/msmarco/) triplets | [paper](https://doi.org/10.1145/3404835.3462804) | 9,144,553 |
|
| 151 |
+
| [GOOAQ: Open Question Answering with Diverse Answer Types](https://github.com/allenai/gooaq) | [paper](https://arxiv.org/pdf/2104.08727.pdf) | 3,012,496 |
|
| 152 |
+
| [Yahoo Answers](https://www.kaggle.com/soumikrakshit/yahoo-answers-dataset) (Title, Answer) | [paper](https://proceedings.neurips.cc/paper/2015/hash/250cf8b51c773f3f8dc8b4be867a9a02-Abstract.html) | 1,198,260 |
|
| 153 |
+
| [Code Search](https://huggingface.co/datasets/code_search_net) | - | 1,151,414 |
|
| 154 |
+
| [COCO](https://cocodataset.org/#home) Image captions | [paper](https://link.springer.com/chapter/10.1007%2F978-3-319-10602-1_48) | 828,395|
|
| 155 |
+
| [SPECTER](https://github.com/allenai/specter) citation triplets | [paper](https://doi.org/10.18653/v1/2020.acl-main.207) | 684,100 |
|
| 156 |
+
| [Yahoo Answers](https://www.kaggle.com/soumikrakshit/yahoo-answers-dataset) (Question, Answer) | [paper](https://proceedings.neurips.cc/paper/2015/hash/250cf8b51c773f3f8dc8b4be867a9a02-Abstract.html) | 681,164 |
|
| 157 |
+
| [Yahoo Answers](https://www.kaggle.com/soumikrakshit/yahoo-answers-dataset) (Title, Question) | [paper](https://proceedings.neurips.cc/paper/2015/hash/250cf8b51c773f3f8dc8b4be867a9a02-Abstract.html) | 659,896 |
|
| 158 |
+
| [SearchQA](https://huggingface.co/datasets/search_qa) | [paper](https://arxiv.org/abs/1704.05179) | 582,261 |
|
| 159 |
+
| [Eli5](https://huggingface.co/datasets/eli5) | [paper](https://doi.org/10.18653/v1/p19-1346) | 325,475 |
|
| 160 |
+
| [Flickr 30k](https://shannon.cs.illinois.edu/DenotationGraph/) | [paper](https://transacl.org/ojs/index.php/tacl/article/view/229/33) | 317,695 |
|
| 161 |
+
| [Stack Exchange](https://huggingface.co/datasets/flax-sentence-embeddings/stackexchange_xml) Duplicate questions (titles) | | 304,525 |
|
| 162 |
+
| AllNLI ([SNLI](https://nlp.stanford.edu/projects/snli/) and [MultiNLI](https://cims.nyu.edu/~sbowman/multinli/) | [paper SNLI](https://doi.org/10.18653/v1/d15-1075), [paper MultiNLI](https://doi.org/10.18653/v1/n18-1101) | 277,230 |
|
| 163 |
+
| [Stack Exchange](https://huggingface.co/datasets/flax-sentence-embeddings/stackexchange_xml) Duplicate questions (bodies) | | 250,519 |
|
| 164 |
+
| [Stack Exchange](https://huggingface.co/datasets/flax-sentence-embeddings/stackexchange_xml) Duplicate questions (titles+bodies) | | 250,460 |
|
| 165 |
+
| [Sentence Compression](https://github.com/google-research-datasets/sentence-compression) | [paper](https://www.aclweb.org/anthology/D13-1155/) | 180,000 |
|
| 166 |
+
| [Wikihow](https://github.com/pvl/wikihow_pairs_dataset) | [paper](https://arxiv.org/abs/1810.09305) | 128,542 |
|
| 167 |
+
| [Altlex](https://github.com/chridey/altlex/) | [paper](https://aclanthology.org/P16-1135.pdf) | 112,696 |
|
| 168 |
+
| [Quora Question Triplets](https://quoradata.quora.com/First-Quora-Dataset-Release-Question-Pairs) | - | 103,663 |
|
| 169 |
+
| [Simple Wikipedia](https://cs.pomona.edu/~dkauchak/simplification/) | [paper](https://www.aclweb.org/anthology/P11-2117/) | 102,225 |
|
| 170 |
+
| [Natural Questions (NQ)](https://ai.google.com/research/NaturalQuestions) | [paper](https://transacl.org/ojs/index.php/tacl/article/view/1455) | 100,231 |
|
| 171 |
+
| [SQuAD2.0](https://rajpurkar.github.io/SQuAD-explorer/) | [paper](https://aclanthology.org/P18-2124.pdf) | 87,599 |
|
| 172 |
+
| [TriviaQA](https://huggingface.co/datasets/trivia_qa) | - | 73,346 |
|
| 173 |
+
| **Total** | | **1,170,060,424** |
|
hf_cache/models--sentence-transformers--all-MiniLM-L6-v2/blobs/59d594003bf59880a884c574bf88ef7555bb0202
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"max_seq_length": 256,
|
| 3 |
+
"do_lower_case": false
|
| 4 |
+
}
|
hf_cache/models--sentence-transformers--all-MiniLM-L6-v2/blobs/72b987fd805cfa2b58c4c8c952b274a11bfd5a00
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_name_or_path": "nreimers/MiniLM-L6-H384-uncased",
|
| 3 |
+
"architectures": [
|
| 4 |
+
"BertModel"
|
| 5 |
+
],
|
| 6 |
+
"attention_probs_dropout_prob": 0.1,
|
| 7 |
+
"gradient_checkpointing": false,
|
| 8 |
+
"hidden_act": "gelu",
|
| 9 |
+
"hidden_dropout_prob": 0.1,
|
| 10 |
+
"hidden_size": 384,
|
| 11 |
+
"initializer_range": 0.02,
|
| 12 |
+
"intermediate_size": 1536,
|
| 13 |
+
"layer_norm_eps": 1e-12,
|
| 14 |
+
"max_position_embeddings": 512,
|
| 15 |
+
"model_type": "bert",
|
| 16 |
+
"num_attention_heads": 12,
|
| 17 |
+
"num_hidden_layers": 6,
|
| 18 |
+
"pad_token_id": 0,
|
| 19 |
+
"position_embedding_type": "absolute",
|
| 20 |
+
"transformers_version": "4.8.2",
|
| 21 |
+
"type_vocab_size": 2,
|
| 22 |
+
"use_cache": true,
|
| 23 |
+
"vocab_size": 30522
|
| 24 |
+
}
|
hf_cache/models--sentence-transformers--all-MiniLM-L6-v2/blobs/952a9b81c0bfd99800fabf352f69c7ccd46c5e43
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"idx": 0,
|
| 4 |
+
"name": "0",
|
| 5 |
+
"path": "",
|
| 6 |
+
"type": "sentence_transformers.models.Transformer"
|
| 7 |
+
},
|
| 8 |
+
{
|
| 9 |
+
"idx": 1,
|
| 10 |
+
"name": "1",
|
| 11 |
+
"path": "1_Pooling",
|
| 12 |
+
"type": "sentence_transformers.models.Pooling"
|
| 13 |
+
},
|
| 14 |
+
{
|
| 15 |
+
"idx": 2,
|
| 16 |
+
"name": "2",
|
| 17 |
+
"path": "2_Normalize",
|
| 18 |
+
"type": "sentence_transformers.models.Normalize"
|
| 19 |
+
}
|
| 20 |
+
]
|