Kushagra commited on
Commit
6e94f25
·
1 Parent(s): c1aa0bf

Hugging-Face Deployment

Browse files
.dockerignore ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ __pycache__/
2
+ *.py[cod]
3
+ *.pyo
4
+ .cache/
5
+ venv/
6
+ .env
7
+ *.faiss
8
+ *.pkl
9
+ *.json
10
+ testing/
11
+ redis/
.gitattributes DELETED
@@ -1,35 +0,0 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.gitignore ADDED
@@ -0,0 +1,213 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[codz]
4
+ *$py.class
5
+ cache/
6
+ database/
7
+ # C extensions
8
+ *.so
9
+ .idea/
10
+
11
+ # Distribution / packaging
12
+ .Python
13
+ build/
14
+ testing/
15
+ develop-eggs/
16
+ dist/
17
+ redis/
18
+ downloads/
19
+ eggs/
20
+ .eggs/
21
+ lib/
22
+ lib64/
23
+ parts/
24
+ redis/
25
+ sdist/
26
+ var/
27
+ wheels/
28
+ share/python-wheels/
29
+ *.egg-info/
30
+ .installed.cfg
31
+ *.egg
32
+ MANIFEST
33
+ V2
34
+
35
+ # PyInstaller
36
+ # Usually these files are written by a python script from a template
37
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
38
+ *.manifest
39
+ *.spec
40
+
41
+ # Installer logs
42
+ pip-log.txt
43
+ pip-delete-this-directory.txt
44
+
45
+ # Unit test / coverage reports
46
+ htmlcov/
47
+ .tox/
48
+ .nox/
49
+ .coverage
50
+ .coverage.*
51
+ .cache
52
+ nosetests.xml
53
+ coverage.xml
54
+ *.cover
55
+ *.py.cover
56
+ .hypothesis/
57
+ .pytest_cache/
58
+ cover/
59
+
60
+ # Translations
61
+ *.mo
62
+ *.pot
63
+
64
+ # Django stuff:
65
+ *.log
66
+ local_settings.py
67
+ db.sqlite3
68
+ db.sqlite3-journal
69
+
70
+ # Flask stuff:
71
+ instance/
72
+ .webassets-cache
73
+
74
+ # Scrapy stuff:
75
+ .scrapy
76
+
77
+ # Sphinx documentation
78
+ docs/_build/
79
+
80
+ # PyBuilder
81
+ .pybuilder/
82
+ target/
83
+
84
+ # Jupyter Notebook
85
+ .ipynb_checkpoints
86
+
87
+ # IPython
88
+ profile_default/
89
+ ipython_config.py
90
+
91
+ # pyenv
92
+ # For a library or package, you might want to ignore these files since the code is
93
+ # intended to run in multiple environments; otherwise, check them in:
94
+ # .python-version
95
+
96
+ # pipenv
97
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
98
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
99
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
100
+ # install all needed dependencies.
101
+ #Pipfile.lock
102
+
103
+ # UV
104
+ # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
105
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
106
+ # commonly ignored for libraries.
107
+ #uv.lock
108
+
109
+ # poetry
110
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
111
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
112
+ # commonly ignored for libraries.
113
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
114
+ #poetry.lock
115
+ #poetry.toml
116
+
117
+ # pdm
118
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
119
+ # pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
120
+ # https://pdm-project.org/en/latest/usage/project/#working-with-version-control
121
+ #pdm.lock
122
+ #pdm.toml
123
+ .pdm-python
124
+ .pdm-build/
125
+
126
+ # pixi
127
+ # Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
128
+ #pixi.lock
129
+ # Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
130
+ # in the .venv directory. It is recommended not to include this directory in version control.
131
+ .pixi
132
+
133
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
134
+ __pypackages__/
135
+
136
+ # Celery stuff
137
+ celerybeat-schedule
138
+ celerybeat.pid
139
+
140
+ # SageMath parsed files
141
+ *.sage.py
142
+
143
+ # Environments
144
+ .env
145
+ .envrc
146
+ .venv
147
+ env/
148
+ venv/
149
+ ENV/
150
+ env.bak/
151
+ venv.bak/
152
+
153
+ # Spyder project settings
154
+ .spyderproject
155
+ .spyproject
156
+
157
+ # Rope project settings
158
+ .ropeproject
159
+
160
+ # mkdocs documentation
161
+ /site
162
+
163
+ # mypy
164
+ .mypy_cache/
165
+ .dmypy.json
166
+ dmypy.json
167
+
168
+ # Pyre type checker
169
+ .pyre/
170
+
171
+ # pytype static type analyzer
172
+ .pytype/
173
+
174
+ # Cython debug symbols
175
+ cython_debug/
176
+
177
+ # PyCharm
178
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
179
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
180
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
181
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
182
+ #.idea/
183
+
184
+ # Abstra
185
+ # Abstra is an AI-powered process automation framework.
186
+ # Ignore directories containing user credentials, local state, and settings.
187
+ # Learn more at https://abstra.io/docs
188
+ .abstra/
189
+
190
+ # Visual Studio Code
191
+ # Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
192
+ # that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
193
+ # and can be added to the global gitignore or merged into this file. However, if you prefer,
194
+ # you could uncomment the following to ignore the entire vscode folder
195
+ # .vscode/
196
+
197
+ # Ruff stuff:
198
+ .ruff_cache/
199
+
200
+ # PyPI configuration file
201
+ .pypirc
202
+
203
+ # Cursor
204
+ # Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
205
+ # exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
206
+ # refer to https://docs.cursor.com/context/ignore-files
207
+ .cursorignore
208
+ .cursorindexingignore
209
+
210
+ # Marimo
211
+ marimo/_static/
212
+ marimo/_lsp/
213
+ __marimo__/
Dockerfile ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ---------- Builder Stage ----------
2
+ FROM python:3.11-bookworm AS builder
3
+
4
+ WORKDIR /app
5
+
6
+ # Install system build dependencies
7
+ RUN apt-get update && apt-get install -y \
8
+ build-essential \
9
+ pkg-config \
10
+ libhdf5-dev \
11
+ libopenblas-dev \
12
+ && apt-get clean && rm -rf /var/lib/apt/lists/*
13
+
14
+ # Install Python packages
15
+ COPY requirements.txt .
16
+ RUN pip install --no-cache-dir --upgrade pip setuptools wheel && \
17
+ pip install --no-cache-dir -r requirements.txt
18
+
19
+ # Set cache locations
20
+ ENV HF_HOME=/app/hf_cache/huggingface
21
+ ENV HF_DATASETS_CACHE=/app/hf_cache/datasets
22
+
23
+ # Pre-download model
24
+ RUN python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('all-MiniLM-L6-v2')"
25
+
26
+ # ---------- Production Stage ----------
27
+ FROM python:3.11-bookworm AS production
28
+
29
+ # Create non-root user
30
+ RUN groupadd -r appuser && useradd -r -g appuser appuser
31
+
32
+ # Install runtime dependencies
33
+ RUN apt-get update && apt-get install -y --no-install-recommends \
34
+ libgomp1 \
35
+ libopenblas0 \
36
+ libhdf5-103 \
37
+ curl \
38
+ && apt-get clean && rm -rf /var/lib/apt/lists/*
39
+
40
+ WORKDIR /app
41
+
42
+ # Copy Python dependencies and scripts
43
+ COPY --from=builder /usr/local/lib/python3.11/site-packages /usr/local/lib/python3.11/site-packages
44
+ COPY --from=builder /usr/local/bin /usr/local/bin
45
+
46
+ # Copy Hugging Face model cache and fix permissions
47
+ COPY --from=builder /app/hf_cache /app/cache
48
+ RUN chown -R appuser:appuser /app/cache
49
+
50
+ # Copy application code
51
+ COPY . .
52
+
53
+ # Create necessary directories and fix permissions
54
+ RUN mkdir -p \
55
+ /app/database/faiss_index \
56
+ /app/cache/datasets \
57
+ /home/appuser/.cache/huggingface \
58
+ /app/vector_store \
59
+ /app/temp_vector_store && \
60
+ chown -R appuser:appuser /app /home/appuser
61
+
62
+ # Switch to non-root user
63
+ USER appuser
64
+
65
+ # Expose app port
66
+ EXPOSE 7860
67
+
68
+ # Health check
69
+ HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
70
+ CMD curl -f http://localhost:7860/health || exit 1
71
+
72
+ # Environment variables
73
+ ENV PYTHONPATH=/app \
74
+ PYTHONUNBUFFERED=1 \
75
+ PYTHONDONTWRITEBYTECODE=1 \
76
+ HF_HOME=/app/cache/huggingface \
77
+ HF_DATASETS_CACHE=/app/cache/datasets
78
+
79
+ # Launch FastAPI app
80
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
README.md CHANGED
@@ -1,3 +1,4 @@
 
1
  ---
2
  title: PDF Summarization
3
  emoji: 🐢
@@ -9,3 +10,299 @@ short_description: PDF-Summarization
9
  ---
10
 
11
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
  ---
3
  title: PDF Summarization
4
  emoji: 🐢
 
10
  ---
11
 
12
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
13
+
14
+ ## 📦 Project Info
15
+
16
+ - **Project Name:** Hackrx
17
+ - **Python Version:** 3.13.3
18
+
19
+ # 🚀 HackRx PDF RAG API
20
+
21
+ A production-ready FastAPI service that processes PDF documents and answers questions using Retrieval-Augmented Generation (RAG) with Google's Gemini AI.
22
+
23
+ ## ✨ Features
24
+
25
+ - 🔍 **PDF Processing**: Automatic extraction and chunking of PDF documents
26
+ - 🧠 **AI-Powered Q&A**: Answers questions using Google's Gemini Pro 2.5
27
+ - 📊 **Vector Search**: Fast similarity search using FAISS with sentence embeddings
28
+ - 🔐 **Authentication**: Secure Bearer token authentication
29
+ - ⚡ **Fast Response**: Sub-30 second response times
30
+ - 🌐 **Public API**: HTTPS-ready for production deployment
31
+
32
+ ## 📋 API Endpoints
33
+
34
+ ### POST /hackrx/run
35
+ Process a PDF document and answer questions.
36
+
37
+ **Authentication**: Bearer Token Required
38
+
39
+ **Request Format**:
40
+ ```json
41
+ {
42
+ "documents": "https://example.com/document.pdf",
43
+ "questions": [
44
+ "What is the grace period for premium payment?",
45
+ "What is the waiting period for pre-existing diseases?"
46
+ ]
47
+ }
48
+ ```
49
+
50
+ **Response Format**:
51
+ ```json
52
+ {
53
+ "answers": [
54
+ "A grace period of thirty days is provided...",
55
+ "There is a waiting period of thirty-six months..."
56
+ ]
57
+ }
58
+ ```
59
+
60
+ ### GET /health
61
+ Health check endpoint for monitoring.
62
+
63
+ ## 🛠️ Installation
64
+
65
+ ### Prerequisites
66
+ - Python 3.8+
67
+ - pip package manager
68
+ - 4GB+ RAM recommended
69
+
70
+ ### 1. Clone and Setup
71
+ ```bash
72
+ git clone <your-repo-url>
73
+ cd hackrx-pdf-rag-api
74
+ ```
75
+
76
+ ### 2. Create Virtual Environment
77
+ ```bash
78
+ # Windows
79
+ python -m venv venv
80
+ venv\\Scripts\\activate
81
+
82
+ # Linux/Mac
83
+ python3 -m venv venv
84
+ source venv/bin/activate
85
+ ```
86
+
87
+ ### 3. Install Dependencies
88
+ ```bash
89
+ pip install -r requirements.txt
90
+ ```
91
+
92
+ ### 4. Environment Configuration
93
+ Create a `.env` file:
94
+ ```bash
95
+ # API Configuration
96
+ API_KEY=hackrx-2025-secure-key
97
+
98
+
99
+ # Server Configuration
100
+ HOST=0.0.0.0
101
+ PORT=8000
102
+ DEBUG=False
103
+ ```
104
+
105
+ ### 5. Run the Application
106
+
107
+ #### Local Development
108
+ ```bash
109
+ # Run with auto-reload
110
+ python fastapi_app.py
111
+
112
+ # Or with uvicorn
113
+ uvicorn fastapi_app:app --reload --host 0.0.0.0 --port 8000
114
+ ```
115
+
116
+ #### Production with HTTPS
117
+ ```bash
118
+ # With SSL certificates
119
+ uvicorn fastapi_app:app --host 0.0.0.0 --port 443 \
120
+ --ssl-keyfile=path/to/key.pem \
121
+ --ssl-certfile=path/to/cert.pem
122
+ ```
123
+
124
+ ## 🚀 Deployment Options
125
+
126
+ ### Option 1: Render (Recommended)
127
+ 1. Create account at [render.com](https://render.com)
128
+ 2. Connect GitHub repository
129
+ 3. Use `render.yaml` configuration
130
+ 4. Deploy automatically
131
+
132
+ ### Option 2: Railway
133
+ ```bash
134
+ # Install Railway CLI
135
+ npm install -g @railway/cli
136
+
137
+ # Deploy
138
+ railway login
139
+ railway up
140
+ ```
141
+
142
+ ### Option 3: Heroku
143
+ ```bash
144
+ # Install Heroku CLI
145
+ heroku create hackrx-pdf-rag-api
146
+ git push heroku main
147
+ ```
148
+
149
+ ### Option 4: Docker
150
+ ```bash
151
+ # Build image
152
+ docker build -t hackrx-pdf-rag .
153
+
154
+ # Run container
155
+ docker run -p 8000:8000 hackrx-pdf-rag
156
+ ```
157
+
158
+ ## 📖 Usage Examples
159
+
160
+ ### 1. Testing with curl
161
+ ```bash
162
+ curl -X POST "http://localhost:8000/hackrx/run" \\
163
+ -H "Authorization: Bearer hackrx-2024-secure-key" \\
164
+ -H "Content-Type: application/json" \\
165
+ -d '{
166
+ "documents": "https://hackrx.blob.core.windows.net/assets/policy.pdf?sv=2023-01-03&st=2025-07-04T09%3A11%3A24Z&se=2027-07-05T09%3A11%3A00Z&sr=b&sp=r&sig=N4a9OU0w0QXO6AOIBiu4bpl7AXvEZogeT%2FjUHNO7HzQ%3D",
167
+ "questions": [
168
+ "What is the grace period for premium payment?",
169
+ "What is the waiting period for pre-existing diseases?"
170
+ ]
171
+ }'
172
+ ```
173
+
174
+ ### 2. Python Client
175
+ ```python
176
+ import requests
177
+
178
+ API_URL = "https://your-domain.com/hackrx/run"
179
+ API_KEY = "hackrx-2024-secure-key"
180
+
181
+ response = requests.post(
182
+ API_URL,
183
+ headers={
184
+ "Authorization": f"Bearer {API_KEY}",
185
+ "Content-Type": "application/json"
186
+ },
187
+ json={
188
+ "documents": "https://example.com/document.pdf",
189
+ "questions": [
190
+ "What is the grace period for premium payment?",
191
+ "What is the waiting period for pre-existing diseases?",
192
+ "Does this policy cover maternity expenses?",
193
+ "What is the waiting period for cataract surgery?",
194
+ "Are the medical expenses for an organ donor covered?",
195
+ "What is the No Claim Discount (NCD) offered?",
196
+ "Is there a benefit for preventive health check-ups?",
197
+ "How does the policy define a 'Hospital'?",
198
+ "What is the extent of coverage for AYUSH treatments?",
199
+ "Are there any sub-limits on room rent and ICU charges?"
200
+ ]
201
+ }
202
+ )
203
+
204
+ print(response.json())
205
+ ```
206
+
207
+ ### 3. JavaScript/Node.js
208
+ ```javascript
209
+ const axios = require('axios');
210
+
211
+ async function processDocument() {
212
+ const response = await axios.post('https://your-domain.com/hackrx/run', {
213
+ documents: 'https://example.com/document.pdf',
214
+ questions: [
215
+ 'What is the grace period for premium payment?',
216
+ 'What is the waiting period for pre-existing diseases?'
217
+ ]
218
+ }, {
219
+ headers: {
220
+ 'Authorization': 'Bearer hackrx-2024-secure-key',
221
+ 'Content-Type': 'application/json'
222
+ }
223
+ });
224
+
225
+ console.log(response.data);
226
+ }
227
+
228
+ processDocument();
229
+ ```
230
+
231
+ ## 🔧 Configuration
232
+
233
+ ### Environment Variables
234
+ | Variable | Description | Default |
235
+ |----------|-------------|---------|
236
+ | `API_KEY` | Bearer token for authentication | `hackrx-2024-secure-key` |
237
+ | `GEMINI_API_KEY` | Google Gemini API key | Provided |
238
+ | `HOST` | Server host | `0.0.0.0` |
239
+ | `PORT` | Server port | `8000` |
240
+ | `DEBUG` | Debug mode | `False` |
241
+
242
+ ### Performance Tuning
243
+ - **Memory**: Ensure 4GB+ RAM for large PDFs
244
+ - **Timeout**: Set to 30 seconds for optimal performance
245
+ - **Concurrency**: Supports multiple simultaneous requests
246
+
247
+ ## 🧪 Testing
248
+
249
+ ### Run Tests
250
+ ```bash
251
+ # Install test dependencies
252
+ pip install pytest pytest-asyncio httpx
253
+
254
+ # Run tests
255
+ pytest test_api.py -v
256
+ ```
257
+
258
+ ### Load Testing
259
+ ```bash
260
+ # Install locust
261
+ pip install locust
262
+
263
+ # Run load test
264
+ locust -f load_test.py --host=http://localhost:8000
265
+ ```
266
+
267
+ ## 📊 Monitoring
268
+
269
+ ### Health Check
270
+ ```bash
271
+ curl http://localhost:8000/health
272
+ ```
273
+
274
+ ### Logging
275
+ - Logs are written to stdout
276
+ - Use `docker logs` for containerized deployments
277
+ - Integrate with monitoring tools like Datadog or New Relic
278
+
279
+ ## 🔍 Troubleshooting
280
+
281
+ ### Common Issues
282
+
283
+ 1. **PDF Download Fails**
284
+ - Check URL accessibility
285
+ - Verify file format (PDF only)
286
+ - Ensure HTTPS URLs for production
287
+
288
+ 2. **Slow Response Times**
289
+ - Increase server resources
290
+ - Optimize PDF size (< 10MB recommended)
291
+ - Check network connectivity
292
+
293
+ 3. **Authentication Errors**
294
+ - Verify Bearer token format
295
+ - Check API key in environment variables
296
+
297
+ 4. **Memory Issues**
298
+ - Monitor RAM usage during processing
299
+ - Consider PDF size limits
300
+ - Use streaming for large files
301
+
302
+ ### Debug Mode
303
+ ```bash
304
+ # Enable debug logging
305
+ DEBUG=True python fastapi_app.py
306
+ ```
307
+
308
+ ## 🏗️ Architecture
app/config/config.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "MODEL_NAME": "llama-3.3-70b-versatile",
3
+ "GROQ_KEY": "",
4
+ "VALID_API_KEY": "2931609bd36ec1a45cb577b3b831dc711c76ae157b3c6250c564284c93b062ff",
5
+
6
+ "LLM_CONFIG": {
7
+ "TEMPERATURE": 0.7,
8
+ "MAX_TOKENS": 300,
9
+ "FREQUENCY_PENALTY": 0,
10
+ "PRESENCE_PENALTY": 0
11
+ }
12
+ }
app/core/__init__.py ADDED
File without changes
app/core/models.py ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import os
3
+ import logging
4
+ import time
5
+ import asyncio
6
+
7
+ from fastapi import status
8
+ from langchain_groq import ChatGroq
9
+ from langchain.schema import Document
10
+ from langchain.chains import RetrievalQA
11
+ from langchain_community.vectorstores import FAISS
12
+ from langchain_huggingface import HuggingFaceEmbeddings
13
+ from langchain_text_splitters import CharacterTextSplitter, RecursiveCharacterTextSplitter
14
+ from langchain_community.document_loaders import PyPDFLoader
15
+ from app.core.template import prompt_template_description
16
+
17
+ embeddings = HuggingFaceEmbeddings(
18
+ model_name="sentence-transformers/all-MiniLM-L6-v2")
19
+
20
+ # Async PDF loader
21
+ async def pdf_loader(url: str):
22
+ pages = []
23
+ loader = PyPDFLoader(url)
24
+ async for page in loader.alazy_load():
25
+ pages.append(page)
26
+
27
+ return pages
28
+
29
+ # Main function to create/load vectorstore
30
+ async def load_and_create_vector_store(url: str):
31
+ """
32
+ Loads a PDF document from a URL and either reuses or builds a FAISS vectorstore.
33
+ Returns a retriever object.
34
+ """
35
+ vectorstore_path = "./database/faiss_index"
36
+
37
+ if os.path.exists(f"{vectorstore_path}/index.faiss"):
38
+ logging.info("Vector store already exists, loading it.")
39
+ vectorstore = FAISS.load_local(vectorstore_path, embeddings, allow_dangerous_deserialization=True)
40
+ else:
41
+ logging.info("Vector store not found. Creating new one from document.")
42
+ pages = await pdf_loader(url)
43
+ if not pages:
44
+ raise ValueError("No pages loaded from the document.")
45
+
46
+ full_text = "\n\n".join([page.page_content for page in pages])
47
+ documents = [Document(page_content=full_text, metadata={"source": url})]
48
+ # Use CharacterTextSplitter with optimized parameters for better chunk quality
49
+ text_splitter = CharacterTextSplitter(
50
+ separator="\n\n",
51
+ chunk_size=2500,
52
+ chunk_overlap=300,
53
+ length_function=len,
54
+ )
55
+ split_docs = text_splitter.split_documents(documents)
56
+ logging.info(f"Document split into {len(split_docs)} chunks")
57
+
58
+ vectorstore = FAISS.from_documents(split_docs, embeddings)
59
+ vectorstore.save_local(vectorstore_path)
60
+
61
+ return vectorstore.as_retriever(
62
+ search_kwargs={"k": 2, "score_threshold": 0.5}
63
+ )
64
+
65
+ async def llm_setup(config, url):
66
+ """
67
+ Setup the LLM for question answering.
68
+
69
+ This function initializes the LLM with the necessary configurations
70
+ for processing questions and generating answers based on the context.
71
+
72
+ Args:
73
+ config: Configuration dictionary with LLM settings
74
+ url: URL of the document to process
75
+ Returns:
76
+ object: The configured LLM instance.
77
+ """
78
+ llm = ChatGroq(
79
+ model=f"{config.get('MODEL_NAME')}",
80
+ temperature=f"{config.get('TEMPERATURE', 0)}",
81
+ max_tokens=f"{config.get('MAX_TOKENS', 300)}", # Increased token limit for JSON responses
82
+ max_retries=f"{config.get('MAX_RETRIES', 3)}",
83
+ api_key=f"{config.get('GROQ_KEY')}",
84
+ )
85
+ logging.info(f"LLM initialized with model: {config.get('MODEL_NAME')}, api_key: {config.get('GROQ_KEY')}")
86
+
87
+ # Choose template based on whether we need structured JSON output
88
+ prompt_template = prompt_template_description()
89
+
90
+ retriever = await load_and_create_vector_store(url=url)
91
+
92
+ qa_chain = RetrievalQA.from_chain_type(
93
+ llm=llm,
94
+ chain_type="stuff",
95
+ retriever=retriever,
96
+ chain_type_kwargs={"prompt": prompt_template}
97
+ )
98
+ return qa_chain
99
+
100
+ async def llm_response_generator(config, url, questions):
101
+ """
102
+ Generate answers from the LLM within 30 seconds.
103
+
104
+ Args:
105
+ config: Configuration dictionary with LLM settings
106
+ url: URL of the document to process
107
+ questions: List of questions to answer
108
+ use_json: Whether to force JSON output format
109
+
110
+ Returns:
111
+ Tuple of (response dict, status code)
112
+ """
113
+ try:
114
+ start = time.time()
115
+ qa_chain = await llm_setup(config, url)
116
+
117
+ answers = []
118
+ for question in questions:
119
+ elapsed = time.time() - start
120
+ if elapsed > 28: # leave margin for safety
121
+ logging.warning("Time limit reached, skipping remaining questions.")
122
+ break
123
+
124
+ try:
125
+ answer = await qa_chain.arun(question)
126
+ answers.append(answer)
127
+
128
+ except Exception as e:
129
+ logging.error(f"Error answering: {question} | {e}")
130
+ answers.append("Error processing question.")
131
+
132
+ return {"answers": answers}, status.HTTP_200_OK
133
+
134
+ except Exception as e:
135
+ logging.error(f"Error in llm_response_generator: {e}")
136
+ return {"answers": []}, status.HTTP_500_INTERNAL_SERVER_ERROR
app/core/template.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.prompts import PromptTemplate
2
+
3
+
4
+ def prompt_template_description():
5
+ """
6
+ Returns a clean and usable prompt template.
7
+ """
8
+ template = """You are a specialized AI assistant designed to provide precise, factual answers based strictly on the context of the provided document. These documents may include insurance policies, legal contracts, HR manuals, compliance guidelines, technical manuals, brochures, academic materials, or other large, unstructured texts.
9
+
10
+ CRITICAL RULES:
11
+ - Answer exactly what is asked with the most important details only
12
+ - Include specific numbers, time periods, names, or key conditions when relevant
13
+ - Keep answers to 1-2 sentences maximum
14
+ - Use clear, professional language
15
+ - Focus on the core information requested
16
+ - If information is not in the context, respond with: "Information not available in the provided document."
17
+
18
+ IMPORTANT: Respond with ONLY the answer text. Do NOT wrap your response in JSON format. Do not mention page numbers or sources. Provide a focused answer with only the essential details from the document that directly answer the question.
19
+
20
+ Context:
21
+ {context}
22
+
23
+ Question:
24
+ {question}
25
+
26
+ Answer:"""
27
+ return PromptTemplate.from_template(template)
app/schema/__init__.py ADDED
File without changes
app/schema/schema.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List, Optional
2
+ from pydantic import BaseModel, HttpUrl
3
+
4
+
5
+ class QuestionRequest(BaseModel):
6
+ documents: HttpUrl
7
+ questions: List[str]
8
+
9
+ class AnswerResponse(BaseModel):
10
+ answers: List[str]
app/utils/__init__.py ADDED
File without changes
app/utils/util.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import re
3
+ import json
4
+ import logging
5
+ from fastapi import FastAPI, HTTPException, Depends, Security
6
+ from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
7
+
8
+ security = HTTPBearer()
9
+
10
+ def load_config():
11
+ """
12
+ Load configuration from config.json.
13
+ """
14
+ with open('./app/config/config.json', 'r') as file:
15
+ config_data = json.load(file)
16
+ print(config_data)
17
+ logging.info("Configuration loaded successfully.")
18
+ return config_data
19
+
20
+
21
+ def verify_api_key(credentials: HTTPAuthorizationCredentials = Security(security)):
22
+ """
23
+ Verify the API key from Bearer token
24
+ """
25
+ config = load_config()
26
+ if credentials.credentials != config.get("VALID_API_KEY"):
27
+ raise HTTPException(
28
+ status_code=401,
29
+ detail="Invalid API Key"
30
+ )
31
+
32
+ logging.info("API Key verified successfully.")
33
+ return credentials.credentials
34
+
35
+
36
+ def sanitize_filename(file_link: str) -> str:
37
+ """
38
+ Sanitize file link to create a safe filename.
39
+ """
40
+ return re.sub(r'[^a-zA-Z0-9_-]', '_', file_link)
main.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import logging
3
+ import uvicorn
4
+ import json
5
+ import hashlib
6
+
7
+ # Import our existing pipeline components
8
+ from contextlib import asynccontextmanager
9
+ from fastapi import FastAPI, HTTPException, Depends, status
10
+ from app.utils.util import verify_api_key
11
+ from app.core.models import llm_response_generator
12
+ from app.schema.schema import QuestionRequest, AnswerResponse
13
+
14
+ logging.basicConfig(format='%(asctime)s - %(levelname)s - Line: %(lineno)d - %(message)s',
15
+ datefmt='%Y-%m-%d %H:%M:%S',
16
+ level=logging.INFO)
17
+
18
+
19
+ os.environ["TOKENIZERS_PARALLELISM"] = "false"
20
+
21
+ # Load config.json at startup
22
+ @asynccontextmanager
23
+ async def lifespan(app: FastAPI):
24
+ import json
25
+ config_path = os.path.join(os.path.dirname(__file__), "app", "config", "config.json")
26
+ with open(config_path, "r") as f:
27
+ app.state.config = json.load(f)
28
+ logging.info("Config loaded successfully.")
29
+ yield
30
+ logging.info("Shutting down FastAPI app...")
31
+
32
+
33
+ # FastAPI app
34
+ app = FastAPI(title="HackRx PDF RAG API", version="1.0.0", lifespan=lifespan)
35
+
36
+ @app.post("/api/v1/hackrx/run", response_model=AnswerResponse)
37
+ async def process_questions(request: QuestionRequest, api_key: str = Depends(verify_api_key)):
38
+ try:
39
+ url = str(request.documents)
40
+ config = app.state.config
41
+ questions = request.questions
42
+
43
+ logging.info(f"Received {len(questions)} questions for processing. Documents URL: {url}")
44
+
45
+ # Create cache directory if not exists
46
+ cache_dir = "redis"
47
+ os.makedirs(cache_dir, exist_ok=True)
48
+ # Create a cache key from url and questions
49
+ cache_key = hashlib.sha256((url + json.dumps(questions, sort_keys=True)).encode()).hexdigest()
50
+ cache_file = os.path.join(cache_dir, f"{cache_key}.json")
51
+
52
+ # Try to load from cache
53
+ if os.path.exists(cache_file):
54
+ logging.info(f"Cache hit for key: {cache_key}")
55
+ with open(cache_file, "r") as f:
56
+ cached_response = json.load(f)
57
+ return cached_response
58
+
59
+ # Otherwise, call LLM and cache response
60
+ response, status_code = await llm_response_generator(config=config, url=url, questions=questions)
61
+ if status_code != status.HTTP_200_OK:
62
+ raise HTTPException(status_code=status_code, detail="Error processing questions")
63
+
64
+ else:
65
+ with open(cache_file, "w") as f:
66
+ json.dump(response, f)
67
+ logging.info(f"Cache saved for key: {cache_key}")
68
+ return response
69
+
70
+ except Exception as e:
71
+ logging.error(f"Error processing questions: {e}")
72
+ raise HTTPException(status_code=500, detail="Internal Server Error")
73
+
74
+ @app.get("/health")
75
+ async def health_check():
76
+ """
77
+ Health check endpoint to verify API is running
78
+ """
79
+ return {"status": "Healthy", "message": "API is running smoothly.", "code": status.HTTP_200_OK}
80
+
81
+ if __name__ == "__main__":
82
+ uvicorn.run(
83
+ "main:app",
84
+ host="0.0.0.0",
85
+ port=8000,
86
+ reload=True,
87
+ workers=1
88
+ )
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ pypdf
3
+ langchain_community
4
+ uvicorn
5
+ langchain_groq
6
+ sentence-transformers
7
+ langchain-huggingface
8
+ faiss-cpu
9
+ langchain-text-splitters