Spaces:

aravsaxena884
/

HackRx

Runtime error

App Files Files Community

aravsaxena884 commited on Aug 6, 2025

Commit

3951d64

1 Parent(s): 62c0d9a

dd

Browse files

Files changed (24) hide show

.env.example +8 -0
.gitignore +132 -0
Dockerfile +46 -0
Procfile +1 -0
README.md +1 -1
deploy.bat +41 -0
deploy.sh +35 -0
models/all-MiniLM-L6-v2/1_Pooling/config.json +10 -0
models/all-MiniLM-L6-v2/README.md +173 -0
models/all-MiniLM-L6-v2/config.json +25 -0
models/all-MiniLM-L6-v2/config_sentence_transformers.json +10 -0
models/all-MiniLM-L6-v2/model.safetensors +3 -0
models/all-MiniLM-L6-v2/modules.json +20 -0
models/all-MiniLM-L6-v2/sentence_bert_config.json +4 -0
models/all-MiniLM-L6-v2/special_tokens_map.json +37 -0
models/all-MiniLM-L6-v2/tokenizer.json +0 -0
models/all-MiniLM-L6-v2/tokenizer_config.json +65 -0
models/all-MiniLM-L6-v2/vocab.txt +0 -0
rag.py +816 -0
railway.json +12 -0
req.txt +18 -0
requirements.txt +14 -0
test.py +231 -0
test_api.py +124 -0

.env.example ADDED Viewed

	@@ -0,0 +1,8 @@

+# Environment Variables Template
+# Copy this file to .env and fill in your actual API keys
+PINECONE_API_KEY=your_pinecone_api_key_here
+GROQ_API_KEY=your_groq_api_key_here
+LANGSMITH_API_KEY=your_langsmith_api_key_here
+LANGSMITH_TRACING=true
+LANGSMITH_PROJECT=BajaRX

.gitignore ADDED Viewed

	@@ -0,0 +1,132 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+.pytest_cache/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+.python-version
+# celery beat schedule file
+celerybeat-schedule
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# Project specific
+rag_system.db
+backup/
+*.pdf
+*.docx
+*.doc
+*.eml
+*.msg
+# IDE
+.vscode/
+.idea/
+*.swp
+*.swo
+*~
+# OS
+.DS_Store
+Thumbs.db

Dockerfile ADDED Viewed

	@@ -0,0 +1,46 @@

+FROM python:3.11-slim
+# Set working directory
+WORKDIR /app
+# Set environment variables
+ENV PYTHONPATH=/app
+ENV PYTHONUNBUFFERED=1
+ENV PORT=8000
+# Set Hugging Face and Sentence Transformers cache directories
+ENV HF_HOME=/app/.cache/huggingface
+ENV TRANSFORMERS_CACHE=/app/.cache/huggingface
+ENV SENTENCE_TRANSFORMERS_HOME=/app/.cache/sentence_transformers
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    gcc \
+    g++ \
+    libmagic1 \
+    curl \
+    && rm -rf /var/lib/apt/lists/*
+# Create cache directories with proper permissions
+RUN mkdir -p /app/.cache/huggingface /app/.cache/sentence_transformers && chmod -R 777 /app/.cache
+# Copy requirements first for better caching
+COPY requirements.txt .
+# Install Python dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy application code and model files
+COPY . .
+# Ensure model directory has proper permissions
+RUN chmod -R 777 /app/models
+# Expose port
+EXPOSE $PORT
+# Health check
+HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \
+    CMD curl -f http://localhost:$PORT/health || exit 1
+# Run the application
+CMD ["sh", "-c", "uvicorn rag:app --host 0.0.0.0 --port $PORT"]

Procfile ADDED Viewed

	@@ -0,0 +1 @@


1	+ web: uvicorn rag:app --host 0.0.0.0 --port $PORT

README.md CHANGED Viewed

@@ -1,6 +1,6 @@
 ---
 title: HackRx
-emoji: 💻
 colorFrom: indigo
 colorTo: purple
 sdk: docker

 ---
 title: HackRx
+emoji: 🏃
 colorFrom: indigo
 colorTo: purple
 sdk: docker

deploy.bat ADDED Viewed

	@@ -0,0 +1,41 @@

+@echo off
+echo 🚀 Deploying Ultra-Fast RAG System to Railway...
+REM Check if Railway CLI is installed
+railway --version >nul 2>&1
+if %errorlevel% neq 0 (
+    echo ❌ Railway CLI not found. Please install from: https://railway.app/cli
+    echo Or run: npm install -g @railway/cli
+    pause
+    exit /b 1
+)
+REM Login to Railway (if not already logged in)
+echo 🔐 Checking Railway authentication...
+railway whoami
+if %errorlevel% neq 0 (
+    railway login
+)
+REM Initialize project (if not already initialized)
+if not exist "railway.toml" (
+    echo 📦 Initializing Railway project...
+    railway init
+)
+REM Set environment variables reminder
+echo 🔧 Environment Variables Setup Required:
+echo Please set these in Railway dashboard after deployment:
+echo - PINECONE_API_KEY=your_pinecone_api_key
+echo - GROQ_API_KEY=your_groq_api_key
+echo - LANGSMITH_API_KEY=your_langsmith_api_key
+echo.
+REM Deploy
+echo 🚀 Deploying to Railway...
+railway up
+echo ✅ Deployment complete!
+echo 🌐 Your API will be available at the Railway-provided URL
+echo 📊 Test with: GET https://your-app.railway.app/health
+pause

deploy.sh ADDED Viewed

	@@ -0,0 +1,35 @@

+#!/bin/bash
+# Deploy to Railway Script
+echo "🚀 Deploying Ultra-Fast RAG System to Railway..."
+# Check if Railway CLI is installed
+if ! command -v railway &> /dev/null; then
+    echo "❌ Railway CLI not found. Installing..."
+    curl -fsSL https://railway.app/install.sh | sh
+fi
+# Login to Railway (if not already logged in)
+echo "🔐 Checking Railway authentication..."
+railway whoami || railway login
+# Initialize project (if not already initialized)
+if [ ! -f "railway.toml" ]; then
+    echo "📦 Initializing Railway project..."
+    railway init
+fi
+# Set environment variables
+echo "🔧 Setting environment variables..."
+echo "Please set these environment variables in Railway dashboard:"
+echo "PINECONE_API_KEY=your_pinecone_api_key"
+echo "GROQ_API_KEY=your_groq_api_key"
+echo "LANGSMITH_API_KEY=your_langsmith_api_key"
+# Deploy
+echo "🚀 Deploying to Railway..."
+railway up
+echo "✅ Deployment complete!"
+echo "🌐 Your API will be available at the Railway-provided URL"
+echo "📊 Test with: GET https://your-app.railway.app/health"

models/all-MiniLM-L6-v2/1_Pooling/config.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+  "word_embedding_dimension": 384,
+  "pooling_mode_cls_token": false,
+  "pooling_mode_mean_tokens": true,
+  "pooling_mode_max_tokens": false,
+  "pooling_mode_mean_sqrt_len_tokens": false,
+  "pooling_mode_weightedmean_tokens": false,
+  "pooling_mode_lasttoken": false,
+  "include_prompt": true
+}

models/all-MiniLM-L6-v2/README.md ADDED Viewed

	@@ -0,0 +1,173 @@

+---
+language: en
+license: apache-2.0
+library_name: sentence-transformers
+tags:
+- sentence-transformers
+- feature-extraction
+- sentence-similarity
+- transformers
+datasets:
+- s2orc
+- flax-sentence-embeddings/stackexchange_xml
+- ms_marco
+- gooaq
+- yahoo_answers_topics
+- code_search_net
+- search_qa
+- eli5
+- snli
+- multi_nli
+- wikihow
+- natural_questions
+- trivia_qa
+- embedding-data/sentence-compression
+- embedding-data/flickr30k-captions
+- embedding-data/altlex
+- embedding-data/simple-wiki
+- embedding-data/QQP
+- embedding-data/SPECTER
+- embedding-data/PAQ_pairs
+- embedding-data/WikiAnswers
+pipeline_tag: sentence-similarity
+---
+# all-MiniLM-L6-v2
+This is a [sentence-transformers](https://www.SBERT.net) model: It maps sentences & paragraphs to a 384 dimensional dense vector space and can be used for tasks like clustering or semantic search.
+## Usage (Sentence-Transformers)
+Using this model becomes easy when you have [sentence-transformers](https://www.SBERT.net) installed:
+```
+pip install -U sentence-transformers
+```
+Then you can use the model like this:
+```python
+from sentence_transformers import SentenceTransformer
+sentences = ["This is an example sentence", "Each sentence is converted"]
+model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
+embeddings = model.encode(sentences)
+print(embeddings)
+```
+## Usage (HuggingFace Transformers)
+Without [sentence-transformers](https://www.SBERT.net), you can use the model like this: First, you pass your input through the transformer model, then you have to apply the right pooling-operation on-top of the contextualized word embeddings.
+```python
+from transformers import AutoTokenizer, AutoModel
+import torch
+import torch.nn.functional as F
+#Mean Pooling - Take attention mask into account for correct averaging
+def mean_pooling(model_output, attention_mask):
+    token_embeddings = model_output[0] #First element of model_output contains all token embeddings
+    input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
+    return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
+# Sentences we want sentence embeddings for
+sentences = ['This is an example sentence', 'Each sentence is converted']
+# Load model from HuggingFace Hub
+tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')
+model = AutoModel.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')
+# Tokenize sentences
+encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt')
+# Compute token embeddings
+with torch.no_grad():
+    model_output = model(**encoded_input)
+# Perform pooling
+sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask'])
+# Normalize embeddings
+sentence_embeddings = F.normalize(sentence_embeddings, p=2, dim=1)
+print("Sentence embeddings:")
+print(sentence_embeddings)
+```
+------
+## Background
+The project aims to train sentence embedding models on very large sentence level datasets using a self-supervised
+contrastive learning objective. We used the pretrained [`nreimers/MiniLM-L6-H384-uncased`](https://huggingface.co/nreimers/MiniLM-L6-H384-uncased) model and fine-tuned in on a
+1B sentence pairs dataset. We use a contrastive learning objective: given a sentence from the pair, the model should predict which out of a set of randomly sampled other sentences, was actually paired with it in our dataset.
+We developed this model during the
+[Community week using JAX/Flax for NLP & CV](https://discuss.huggingface.co/t/open-to-the-community-community-week-using-jax-flax-for-nlp-cv/7104),
+organized by Hugging Face. We developed this model as part of the project:
+[Train the Best Sentence Embedding Model Ever with 1B Training Pairs](https://discuss.huggingface.co/t/train-the-best-sentence-embedding-model-ever-with-1b-training-pairs/7354). We benefited from efficient hardware infrastructure to run the project: 7 TPUs v3-8, as well as intervention from Googles Flax, JAX, and Cloud team member about efficient deep learning frameworks.
+## Intended uses
+Our model is intended to be used as a sentence and short paragraph encoder. Given an input text, it outputs a vector which captures
+the semantic information. The sentence vector may be used for information retrieval, clustering or sentence similarity tasks.
+By default, input text longer than 256 word pieces is truncated.
+## Training procedure
+### Pre-training
+We use the pretrained [`nreimers/MiniLM-L6-H384-uncased`](https://huggingface.co/nreimers/MiniLM-L6-H384-uncased) model. Please refer to the model card for more detailed information about the pre-training procedure.
+### Fine-tuning
+We fine-tune the model using a contrastive objective. Formally, we compute the cosine similarity from each possible sentence pairs from the batch.
+We then apply the cross entropy loss by comparing with true pairs.
+#### Hyper parameters
+We trained our model on a TPU v3-8. We train the model during 100k steps using a batch size of 1024 (128 per TPU core).
+We use a learning rate warm up of 500. The sequence length was limited to 128 tokens. We used the AdamW optimizer with
+a 2e-5 learning rate. The full training script is accessible in this current repository: `train_script.py`.
+#### Training data
+We use the concatenation from multiple datasets to fine-tune our model. The total number of sentence pairs is above 1 billion sentences.
+We sampled each dataset given a weighted probability which configuration is detailed in the `data_config.json` file.
+| Dataset                                                  | Paper                                    | Number of training tuples  |
+|--------------------------------------------------------|:----------------------------------------:|:--------------------------:|
+| [Reddit comments (2015-2018)](https://github.com/PolyAI-LDN/conversational-datasets/tree/master/reddit) | [paper](https://arxiv.org/abs/1904.06472) | 726,484,430 |
+| [S2ORC](https://github.com/allenai/s2orc) Citation pairs (Abstracts) | [paper](https://aclanthology.org/2020.acl-main.447/) | 116,288,806 |
+| [WikiAnswers](https://github.com/afader/oqa#wikianswers-corpus) Duplicate question pairs | [paper](https://doi.org/10.1145/2623330.2623677) | 77,427,422 |
+| [PAQ](https://github.com/facebookresearch/PAQ) (Question, Answer) pairs | [paper](https://arxiv.org/abs/2102.07033) | 64,371,441 |
+| [S2ORC](https://github.com/allenai/s2orc) Citation pairs (Titles) | [paper](https://aclanthology.org/2020.acl-main.447/) | 52,603,982 |
+| [S2ORC](https://github.com/allenai/s2orc) (Title, Abstract) | [paper](https://aclanthology.org/2020.acl-main.447/) | 41,769,185 |
+| [Stack Exchange](https://huggingface.co/datasets/flax-sentence-embeddings/stackexchange_xml) (Title, Body) pairs  | - | 25,316,456 |
+| [Stack Exchange](https://huggingface.co/datasets/flax-sentence-embeddings/stackexchange_xml) (Title+Body, Answer) pairs  | - | 21,396,559 |
+| [Stack Exchange](https://huggingface.co/datasets/flax-sentence-embeddings/stackexchange_xml) (Title, Answer) pairs  | - | 21,396,559 |
+| [MS MARCO](https://microsoft.github.io/msmarco/) triplets | [paper](https://doi.org/10.1145/3404835.3462804) | 9,144,553 |
+| [GOOAQ: Open Question Answering with Diverse Answer Types](https://github.com/allenai/gooaq) | [paper](https://arxiv.org/pdf/2104.08727.pdf) | 3,012,496 |
+| [Yahoo Answers](https://www.kaggle.com/soumikrakshit/yahoo-answers-dataset) (Title, Answer) | [paper](https://proceedings.neurips.cc/paper/2015/hash/250cf8b51c773f3f8dc8b4be867a9a02-Abstract.html) | 1,198,260 |
+| [Code Search](https://huggingface.co/datasets/code_search_net) | - | 1,151,414 |
+| [COCO](https://cocodataset.org/#home) Image captions | [paper](https://link.springer.com/chapter/10.1007%2F978-3-319-10602-1_48) | 828,395|
+| [SPECTER](https://github.com/allenai/specter) citation triplets | [paper](https://doi.org/10.18653/v1/2020.acl-main.207) | 684,100 |
+| [Yahoo Answers](https://www.kaggle.com/soumikrakshit/yahoo-answers-dataset) (Question, Answer) | [paper](https://proceedings.neurips.cc/paper/2015/hash/250cf8b51c773f3f8dc8b4be867a9a02-Abstract.html) | 681,164 |
+| [Yahoo Answers](https://www.kaggle.com/soumikrakshit/yahoo-answers-dataset) (Title, Question) | [paper](https://proceedings.neurips.cc/paper/2015/hash/250cf8b51c773f3f8dc8b4be867a9a02-Abstract.html) | 659,896 |
+| [SearchQA](https://huggingface.co/datasets/search_qa) | [paper](https://arxiv.org/abs/1704.05179) | 582,261 |
+| [Eli5](https://huggingface.co/datasets/eli5) | [paper](https://doi.org/10.18653/v1/p19-1346) | 325,475 |
+| [Flickr 30k](https://shannon.cs.illinois.edu/DenotationGraph/) | [paper](https://transacl.org/ojs/index.php/tacl/article/view/229/33) | 317,695 |
+| [Stack Exchange](https://huggingface.co/datasets/flax-sentence-embeddings/stackexchange_xml) Duplicate questions (titles) | | 304,525 |
+| AllNLI ([SNLI](https://nlp.stanford.edu/projects/snli/) and [MultiNLI](https://cims.nyu.edu/~sbowman/multinli/) | [paper SNLI](https://doi.org/10.18653/v1/d15-1075), [paper MultiNLI](https://doi.org/10.18653/v1/n18-1101) | 277,230 |
+| [Stack Exchange](https://huggingface.co/datasets/flax-sentence-embeddings/stackexchange_xml) Duplicate questions (bodies) | | 250,519 |
+| [Stack Exchange](https://huggingface.co/datasets/flax-sentence-embeddings/stackexchange_xml) Duplicate questions (titles+bodies) | | 250,460 |
+| [Sentence Compression](https://github.com/google-research-datasets/sentence-compression) | [paper](https://www.aclweb.org/anthology/D13-1155/) | 180,000 |
+| [Wikihow](https://github.com/pvl/wikihow_pairs_dataset) | [paper](https://arxiv.org/abs/1810.09305) | 128,542 |
+| [Altlex](https://github.com/chridey/altlex/) | [paper](https://aclanthology.org/P16-1135.pdf) | 112,696 |
+| [Quora Question Triplets](https://quoradata.quora.com/First-Quora-Dataset-Release-Question-Pairs) | - | 103,663 |
+| [Simple Wikipedia](https://cs.pomona.edu/~dkauchak/simplification/) | [paper](https://www.aclweb.org/anthology/P11-2117/) | 102,225 |
+| [Natural Questions (NQ)](https://ai.google.com/research/NaturalQuestions) | [paper](https://transacl.org/ojs/index.php/tacl/article/view/1455) | 100,231 |
+| [SQuAD2.0](https://rajpurkar.github.io/SQuAD-explorer/) | [paper](https://aclanthology.org/P18-2124.pdf) | 87,599 |
+| [TriviaQA](https://huggingface.co/datasets/trivia_qa) | - | 73,346 |
+| **Total** | | **1,170,060,424** |

models/all-MiniLM-L6-v2/config.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "architectures": [
+    "BertModel"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 384,
+  "initializer_range": 0.02,
+  "intermediate_size": 1536,
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 6,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "torch_dtype": "float32",
+  "transformers_version": "4.50.1",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 30522
+}

models/all-MiniLM-L6-v2/config_sentence_transformers.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+  "__version__": {
+    "sentence_transformers": "4.0.1",
+    "transformers": "4.50.1",
+    "pytorch": "2.6.0+cu118"
+  },
+  "prompts": {},
+  "default_prompt_name": null,
+  "similarity_fn_name": "cosine"
+}

models/all-MiniLM-L6-v2/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1377e9af0ca0b016a9f2aa584d6fc71ab3ea6804fae21ef9fb1416e2944057ac
+size 90864192

models/all-MiniLM-L6-v2/modules.json ADDED Viewed

	@@ -0,0 +1,20 @@

+[
+  {
+    "idx": 0,
+    "name": "0",
+    "path": "",
+    "type": "sentence_transformers.models.Transformer"
+  },
+  {
+    "idx": 1,
+    "name": "1",
+    "path": "1_Pooling",
+    "type": "sentence_transformers.models.Pooling"
+  },
+  {
+    "idx": 2,
+    "name": "2",
+    "path": "2_Normalize",
+    "type": "sentence_transformers.models.Normalize"
+  }
+]

models/all-MiniLM-L6-v2/sentence_bert_config.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+  "max_seq_length": 256,
+  "do_lower_case": false
+}

models/all-MiniLM-L6-v2/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,37 @@

+{
+  "cls_token": {
+    "content": "[CLS]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "[MASK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "[PAD]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "[SEP]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "[UNK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

models/all-MiniLM-L6-v2/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

models/all-MiniLM-L6-v2/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,65 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": false,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": true,
+  "extra_special_tokens": {},
+  "mask_token": "[MASK]",
+  "max_length": 128,
+  "model_max_length": 256,
+  "never_split": null,
+  "pad_to_multiple_of": null,
+  "pad_token": "[PAD]",
+  "pad_token_type_id": 0,
+  "padding_side": "right",
+  "sep_token": "[SEP]",
+  "stride": 0,
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "truncation_side": "right",
+  "truncation_strategy": "longest_first",
+  "unk_token": "[UNK]"
+}

models/all-MiniLM-L6-v2/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

rag.py ADDED Viewed

	@@ -0,0 +1,816 @@

+import os
+import json
+import asyncio
+import logging
+import io
+import traceback
+from datetime import datetime, timezone
+from typing import List, Dict, Tuple, Optional
+from fastapi import FastAPI, HTTPException, Depends, Security, BackgroundTasks
+from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel, Field, validator
+import requests
+import pdfplumber
+from sentence_transformers import SentenceTransformer
+from pinecone import Pinecone, ServerlessSpec
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.schema import Document
+from langchain_groq import ChatGroq
+from langchain_core.messages import SystemMessage, HumanMessage
+import time
+import hashlib
+from urllib.parse import urlparse
+import magic
+import docx2txt
+model_path = "/app/models/all-MiniLM-L6-v2"
+# Set cache directories to writable path inside the container
+os.environ["HF_HOME"] = "/app/.cache/huggingface"
+os.environ["TRANSFORMERS_CACHE"] = "/app/.cache/huggingface/transformers"
+os.environ["TOKENIZERS_CACHE"] = "/app/.cache/huggingface/tokenizers"
+# Optional: create the folders if not exist (may help avoid errors)
+os.makedirs(os.environ["TRANSFORMERS_CACHE"], exist_ok=True)
+os.makedirs(os.environ["TOKENIZERS_CACHE"], exist_ok=True)
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+# Load environment variables
+from pathlib import Path
+env_file = Path(".env")
+if env_file.exists():
+    with open(env_file, 'r') as f:
+        for line in f:
+            line = line.strip()
+            if '=' in line and not line.startswith('#'):
+                key, value = line.split('=', 1)
+                os.environ[key] = value
+# Configuration
+class Config:
+    GROQ_API_KEY = os.getenv("GROQ_API_KEY", "")
+    PINECONE_API_KEY = os.getenv("PINECONE_API_KEY", "")
+    BEARER_TOKEN = "dbbdb701cfc45d4041e22a03edbfc65753fe9d7b4b9ba1df4884e864f3bb934d"
+    EMBEDDING_MODEL = "all-MiniLM-L6-v2"
+    MAX_CHUNK_SIZE = 1536  # works well with models like MiniLM and e5
+    CHUNK_OVERLAP = 200
+    SIMILARITY_THRESHOLD = 0.2
+    TOP_K = 11
+    PINECONE_INDEX_NAME = "insurance-documents"
+    PINECONE_REGION = "us-east-1"
+    MAX_DOCUMENT_SIZE = 50 * 1024 * 1024  # 50MB
+    REQUEST_TIMEOUT = 60
+    MAX_RETRIES = 3
+config = Config()
+# Validate configuration
+if not config.GROQ_API_KEY:
+    logger.error("GROQ_API_KEY not found in environment variables")
+if not config.PINECONE_API_KEY:
+    logger.error("PINECONE_API_KEY not found in environment variables")
+# Initialize LLM and embeddings with error handling
+try:
+    llm = ChatGroq(
+        api_key=config.GROQ_API_KEY,
+        model="llama3-70b-8192",
+        temperature=0.3,  # Slightly higher temperature for more complete responses
+        max_tokens=2048,  # Explicitly set max tokens
+        max_retries=config.MAX_RETRIES
+    )
+    embedding_model = SentenceTransformer(model_path)
+    logger.info("LLM and embedding model initialized successfully")
+except Exception as e:
+    logger.error(f"Failed to initialize models: {str(e)}")
+    raise
+security = HTTPBearer()
+# Pydantic Models with validation
+class QueryRequest(BaseModel):
+    documents: str = Field(..., description="Comma-separated URLs to document blobs", min_length=1)
+    questions: List[str] = Field(..., description="List of questions to answer", min_items=1, max_items=50)
+    @validator('questions')
+    def validate_questions(cls, v):
+        if not all(question.strip() for question in v):
+            raise ValueError("All questions must be non-empty strings")
+        return [question.strip() for question in v]
+    @validator('documents')
+    def validate_documents(cls, v):
+        urls = [url.strip() for url in v.split(',') if url.strip()]
+        if not urls:
+            raise ValueError("At least one valid document URL must be provided")
+        for url in urls:
+            parsed = urlparse(url)
+            if not parsed.scheme or not parsed.netloc:
+                raise ValueError(f"Invalid URL format: {url}")
+        return v
+class QueryResponse(BaseModel):
+    answers: List[str] = Field(..., description="List of answers")
+    processing_time: float = Field(..., description="Total processing time in seconds")
+    documents_processed: int = Field(..., description="Number of documents processed")
+    chunks_retrieved: int = Field(..., description="Total chunks retrieved for all questions")
+# Enhanced Document Processor
+class DocumentProcessor:
+    def __init__(self):
+        self.text_splitter = RecursiveCharacterTextSplitter(
+            chunk_size=config.MAX_CHUNK_SIZE,
+            chunk_overlap=config.CHUNK_OVERLAP,
+            separators=["\n\n", "\n", ". ", " ", ""]
+        )
+        self.document_cache = {}
+        self.supported_types = {
+            'application/pdf': self._extract_pdf_text,
+            'application/vnd.openxmlformats-officedocument.wordprocessingml.document': self._extract_docx_text,
+            'text/plain': self._extract_text_content,
+            'text/html': self._extract_text_content
+        }
+    def _get_document_hash(self, url: str) -> str:
+        """Generate a hash for the document URL for caching"""
+        return hashlib.md5(url.encode()).hexdigest()
+    def download_document(self, url: str) -> Tuple[bytes, str]:
+        """Download document and return content with MIME type"""
+        try:
+            # Validate URL format more strictly
+            parsed = urlparse(url)
+            if not parsed.scheme or not parsed.netloc:
+                raise HTTPException(
+                    status_code=400,
+                    detail=f"Invalid URL format: {url}"
+                )
+            # Check if domain is reachable (basic validation)
+            import socket
+            try:
+                socket.gethostbyname(parsed.netloc.split(':')[0])
+            except socket.gaierror:
+                raise HTTPException(
+                    status_code=400,
+                    detail=f"Domain not reachable: {parsed.netloc}"
+                )
+            headers = {
+                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
+            }
+            with requests.get(
+                url,
+                timeout=config.REQUEST_TIMEOUT,
+                headers=headers,
+                stream=True
+            ) as response:
+                response.raise_for_status()
+                # Check content length
+                content_length = response.headers.get('content-length')
+                if content_length and int(content_length) > config.MAX_DOCUMENT_SIZE:
+                    raise HTTPException(
+                        status_code=413,
+                        detail=f"Document too large. Max size: {config.MAX_DOCUMENT_SIZE} bytes"
+                    )
+                content = response.content
+                # Detect MIME type
+                try:
+                    mime_type = magic.from_buffer(content[:1024], mime=True)
+                except:
+                    # Fallback to content-type header or URL extension
+                    mime_type = response.headers.get('content-type', '').split(';')[0]
+                    if not mime_type:
+                        if url.lower().endswith('.pdf'):
+                            mime_type = 'application/pdf'
+                        elif url.lower().endswith('.docx'):
+                            mime_type = 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
+                        else:
+                            mime_type = 'text/plain'
+                return content, mime_type
+        except requests.RequestException as e:
+            logger.error(f"Failed to download {url}: {str(e)}")
+            raise HTTPException(
+                status_code=400,
+                detail=f"Failed to download document: {str(e)}"
+            )
+        except HTTPException:
+            raise  # Re-raise HTTP exceptions
+        except Exception as e:
+            logger.error(f"Unexpected error downloading {url}: {str(e)}")
+            raise HTTPException(
+                status_code=500,
+                detail=f"Unexpected error downloading document: {str(e)}"
+            )
+    def _extract_pdf_text(self, content: bytes) -> str:
+        """Extract text from PDF content"""
+        try:
+            # Convert bytes to file-like object
+            pdf_file = io.BytesIO(content)
+            with pdfplumber.open(pdf_file) as pdf:
+                text_parts = []
+                for page_num, page in enumerate(pdf.pages):
+                    try:
+                        page_text = page.extract_text()
+                        if page_text:
+                            text_parts.append(f"\n--- Page {page_num + 1} ---\n{page_text.strip()}")
+                    except Exception as e:
+                        logger.warning(f"Failed to extract text from page {page_num + 1}: {str(e)}")
+                        continue
+                full_text = "\n".join(text_parts)
+                if not full_text.strip():
+                    # Try alternative extraction methods
+                    logger.info("Standard extraction failed, trying alternative methods")
+                    # You could add OCR here if needed (like pytesseract)
+                    return "No readable text content found in PDF"
+                return full_text.strip()
+        except Exception as e:
+            logger.error(f"PDF extraction failed: {str(e)}")
+            logger.error(traceback.format_exc())
+            raise HTTPException(
+                status_code=400,
+                detail=f"Failed to extract PDF text: {str(e)}"
+            )
+    def _extract_docx_text(self, content: bytes) -> str:
+        """Extract text from DOCX content"""
+        try:
+            docx_file = io.BytesIO(content)
+            text = docx2txt.process(docx_file)
+            return text.strip() if text else "No text content found in document"
+        except Exception as e:
+            logger.error(f"DOCX extraction failed: {str(e)}")
+            raise HTTPException(
+                status_code=400,
+                detail=f"Failed to extract DOCX text: {str(e)}"
+            )
+    def _extract_text_content(self, content: bytes) -> str:
+        """Extract text from plain text or HTML content"""
+        try:
+            # Try different encodings
+            encodings = ['utf-8', 'utf-16', 'latin-1', 'cp1252']
+            for encoding in encodings:
+                try:
+                    text = content.decode(encoding, errors='ignore')
+                    if text.strip():
+                        return text.strip()
+                except:
+                    continue
+            return "Unable to decode text content"
+        except Exception as e:
+            logger.error(f"Text extraction failed: {str(e)}")
+            return "Failed to extract text content"
+    def process_document(self, url: str) -> List[Document]:
+        """Process a document and return chunks"""
+        doc_hash = self._get_document_hash(url)
+        if doc_hash in self.document_cache:
+            logger.info(f"Using cached document for {url}")
+            return self.document_cache[doc_hash]
+        try:
+            content, mime_type = self.download_document(url)
+            logger.info(f"Downloaded document {url} with MIME type: {mime_type}")
+            # Extract text based on MIME type
+            if mime_type in self.supported_types:
+                text = self.supported_types[mime_type](content)
+            else:
+                logger.warning(f"Unsupported MIME type {mime_type}, treating as plain text")
+                text = self._extract_text_content(content)
+            if not text or len(text.strip()) < 10:
+                raise HTTPException(
+                    status_code=400,
+                    detail="Document appears to be empty or contains insufficient text content"
+                )
+            # Split text into chunks
+            chunks = self.text_splitter.split_text(text)
+            # Filter out very short chunks
+            meaningful_chunks = [chunk for chunk in chunks if len(chunk.strip()) > 20]
+            if not meaningful_chunks:
+                raise HTTPException(
+                    status_code=400,
+                    detail="No meaningful text chunks could be extracted from the document"
+                )
+            documents = [
+                Document(
+                    page_content=chunk,
+                    metadata={
+                        "source": url,
+                        "chunk_id": i,
+                        "mime_type": mime_type,
+                        "doc_hash": doc_hash
+                    }
+                )
+                for i, chunk in enumerate(meaningful_chunks)
+            ]
+            self.document_cache[doc_hash] = documents
+            logger.info(f"Processed {len(documents)} chunks for {url}")
+            return documents
+        except HTTPException:
+            raise
+        except Exception as e:
+            logger.error(f"Unexpected error processing document {url}: {str(e)}")
+            logger.error(traceback.format_exc())
+            raise HTTPException(
+                status_code=500,
+                detail=f"Unexpected error processing document: {str(e)}"
+            )
+# Enhanced Pinecone Vector Store
+class PineconeVectorStore:
+    def __init__(self, api_key: str, index_name: str):
+        try:
+            self.pc = Pinecone(api_key=api_key)
+            self.index_name = index_name
+            self.dimension = 384
+            # Check if index exists, create if not
+            existing_indexes = [index.name for index in self.pc.list_indexes()]
+            if index_name not in existing_indexes:
+                logger.info(f"Creating new Pinecone index: {index_name}")
+                self.pc.create_index(
+                    name=index_name,
+                    dimension=self.dimension,
+                    metric="cosine",
+                    spec=ServerlessSpec(cloud="aws", region=config.PINECONE_REGION)
+                )
+                # Wait for index to be ready
+                time.sleep(10)
+            self.index = self.pc.Index(index_name)
+            self.processed_docs = set()
+            logger.info(f"Pinecone vector store initialized successfully")
+        except Exception as e:
+            logger.error(f"Failed to initialize Pinecone: {str(e)}")
+            raise
+    def document_exists(self, doc_hash: str) -> bool:
+        """Check if document is already indexed"""
+        return doc_hash in self.processed_docs
+    async def add_documents(self, documents: List[Document], batch_size: int = 100):
+        """Add documents to the vector store in batches"""
+        try:
+            doc_hash = documents[0].metadata.get('doc_hash')
+            if self.document_exists(doc_hash):
+                logger.info(f"Document {doc_hash} already indexed")
+                return
+            # Process in batches to avoid memory issues
+            for i in range(0, len(documents), batch_size):
+                batch = documents[i:i + batch_size]
+                vectors = []
+                for doc in batch:
+                    try:
+                        embedding = embedding_model.encode(doc.page_content).tolist()
+                        vector = {
+                            "id": f"{doc_hash}_{doc.metadata['chunk_id']}",
+                            "values": embedding,
+                            "metadata": {
+                                "text": doc.page_content[:1000],  # Limit metadata size
+                                "source": doc.metadata['source'],
+                                "chunk_id": doc.metadata['chunk_id'],
+                                "doc_hash": doc_hash
+                            }
+                        }
+                        vectors.append(vector)
+                    except Exception as e:
+                        logger.error(f"Failed to create embedding for chunk {doc.metadata['chunk_id']}: {str(e)}")
+                        continue
+                if vectors:
+                    self.index.upsert(vectors=vectors)
+                    logger.info(f"Upserted batch of {len(vectors)} vectors")
+            self.processed_docs.add(doc_hash)
+            logger.info(f"Successfully indexed {len(documents)} chunks")
+        except Exception as e:
+            logger.error(f"Failed to add documents to vector store: {str(e)}")
+            raise
+    async def similarity_search(self, query: str, top_k: int = config.TOP_K) -> List[Tuple[Document, float]]:
+        """Perform similarity search"""
+        try:
+            query_embedding = embedding_model.encode(query).tolist()
+            results = self.index.query(
+                vector=query_embedding,
+                top_k=top_k,
+                include_metadata=True
+            )
+            documents_with_scores = []
+            for match in results.matches:
+                if match.score >= config.SIMILARITY_THRESHOLD:
+                    doc = Document(
+                        page_content=match.metadata.get("text", ""),
+                        metadata=match.metadata
+                    )
+                    documents_with_scores.append((doc, float(match.score)))
+            logger.info(f"Retrieved {len(documents_with_scores)} relevant chunks for query")
+            return documents_with_scores
+        except Exception as e:
+            logger.error(f"Similarity search failed: {str(e)}")
+            return []
+    async def delete_documents(self, doc_hashes: List[str]):
+        """Delete documents from the vector store"""
+        try:
+            for doc_hash in doc_hashes:
+                # Delete all vectors for this document
+                delete_response = self.index.delete(filter={"doc_hash": {"$eq": doc_hash}})
+                logger.info(f"Deleted vectors for document {doc_hash}")
+                self.processed_docs.discard(doc_hash)
+        except Exception as e:
+            logger.error(f"Failed to delete documents: {str(e)}")
+# Enhanced Insurance Query Processor
+class InsuranceQueryEnhancer:
+    def __init__(self):
+        self.insurance_terms = {
+            'premium': ['payment', 'installment', 'fee', 'cost'],
+            'coverage': ['benefit', 'protection', 'indemnity', 'compensation'],
+            'waiting period': ['qualification period', 'cooling period'],
+            'grace period': ['extension period', 'buffer period'],
+            'maternity': ['pregnancy', 'childbirth', 'delivery'],
+            'pre-existing': ['prior condition', 'existing condition'],
+            'deductible': ['excess', 'co-payment'],
+            'exclusion': ['limitation', 'restriction'],
+            'claim': ['settlement', 'reimbursement'],
+            'policy': ['contract', 'agreement', 'plan']
+        }
+    def expand_query(self, query: str) -> str:
+        """Expand query with insurance-specific synonyms"""
+        query_lower = query.lower()
+        expanded_terms = [query]
+        for main_term, synonyms in self.insurance_terms.items():
+            if main_term in query_lower:
+                for synonym in synonyms:
+                    expanded_terms.append(query.lower().replace(main_term, synonym))
+        return ' '.join(expanded_terms)
+# FastAPI App
+app = FastAPI(
+    title="Robust RAG System for Insurance Documents",
+    description="Advanced RAG system with comprehensive error handling and document processing",
+    version="3.0.0"
+)
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"]
+)
+# Initialize components
+processor = DocumentProcessor()
+vector_store = PineconeVectorStore(config.PINECONE_API_KEY, config.PINECONE_INDEX_NAME)
+query_enhancer = InsuranceQueryEnhancer()
+# Authentication
+async def verify_token(credentials: HTTPAuthorizationCredentials = Security(security)):
+    if credentials.credentials != config.BEARER_TOKEN:
+        raise HTTPException(status_code=401, detail="Invalid authentication token")
+    return credentials.credentials
+# API Endpoints
+@app.post("/hackrx/run", response_model=QueryResponse)
+async def query_retrieval(
+    request: QueryRequest,
+    background_tasks: BackgroundTasks,
+    token: str = Depends(verify_token)
+):
+    start_time = time.time()
+    total_chunks_retrieved = 0
+    processed_docs = 0
+    try:
+        doc_urls = [url.strip() for url in request.documents.split(',') if url.strip()]
+        logger.info(f"Processing {len(doc_urls)} documents and {len(request.questions)} questions")
+        # Process documents with better error handling
+        doc_hashes = []
+        failed_docs = []
+        for url in doc_urls:
+            try:
+                doc_hash = processor._get_document_hash(url)
+                if not vector_store.document_exists(doc_hash):
+                    logger.info(f"Processing new document: {url}")
+                    documents = processor.process_document(url)
+                    await vector_store.add_documents(documents)
+                    processed_docs += 1
+                else:
+                    logger.info(f"Document already processed: {url}")
+                    processed_docs += 1
+                doc_hashes.append(doc_hash)
+            except HTTPException as e:
+                logger.error(f"HTTP error processing document {url}: {e.detail}")
+                failed_docs.append(f"{url}: {e.detail}")
+                continue
+            except Exception as e:
+                logger.error(f"Unexpected error processing document {url}: {str(e)}")
+                failed_docs.append(f"{url}: {str(e)}")
+                continue
+        # If no documents were successfully processed, return error
+        if processed_docs == 0:
+            error_msg = "No documents could be processed successfully."
+            if failed_docs:
+                error_msg += f" Errors: {'; '.join(failed_docs[:3])}"  # Show first 3 errors
+            raise HTTPException(
+                status_code=400,
+                detail=error_msg
+            )
+        # Process questions
+        async def process_question(question: str) -> str:
+            nonlocal total_chunks_retrieved
+            try:
+                expanded_query = query_enhancer.expand_query(question)
+                retrieved_docs = await vector_store.similarity_search(expanded_query)
+                if not retrieved_docs:
+                    logger.warning(f"No relevant information found for question: {question}")
+                    return "No relevant information found in the documents for this question."
+                total_chunks_retrieved += len(retrieved_docs)
+                # Build context from retrieved documents
+                context_parts = []
+                for i, (doc, score) in enumerate(retrieved_docs):
+                    context_parts.append(
+                        f"[Chunk {i+1} - Relevance: {score:.3f}]\n{doc.page_content}"
+                    )
+                context = "\n\n".join(context_parts)
+                # Enhanced system prompt
+                system_prompt = """You are an expert insurance policy analyst with comprehensive knowledge of insurance regulations, particularly Indian insurance policies.
+Your expertise includes:
+- Policy terms, conditions, and exclusions
+- Premium calculations and payment structures
+- Claim procedures and settlement processes
+- Waiting periods, grace periods, coverage limits, and deductibles
+- Pre-existing disease clauses, maternity benefits, and specialized treatments
+- Regulatory compliance and policy terminology
+Instructions for answering:
+1. Provide precise, factual answers based exclusively on the document context provided
+2. Include specific amounts, percentages, time periods, and conditions when mentioned
+3. Clearly state any conditions, limitations, or exclusions that apply
+4. Use proper insurance terminology and maintain professional language
+5. If information is not available in the context, explicitly state this
+6. When referencing policy sections or clauses, mention them if available
+7. Provide comprehensive answers that address all aspects of the question
+Format your response clearly and professionally."""
+                messages = [
+                    SystemMessage(content=system_prompt),
+                    HumanMessage(content=f"""Based on the following document context, please answer the question comprehensively:
+CONTEXT:
+{context}
+QUESTION: {question}
+Please provide a detailed, accurate answer based solely on the information in the context above.""")
+                ]
+                response = await llm.ainvoke(messages)
+                return response.content
+            except Exception as e:
+                logger.error(f"Error processing question '{question}': {str(e)}")
+                return f"An error occurred while processing this question: {str(e)}"
+        # Process all questions concurrently
+        logger.info("Processing questions concurrently...")
+        answers = await asyncio.gather(*[process_question(q) for q in request.questions])
+        processing_time = time.time() - start_time
+        logger.info(f"Completed processing in {processing_time:.2f} seconds")
+        # Schedule cleanup in background
+        background_tasks.add_task(vector_store.delete_documents, doc_hashes)
+        response_data = QueryResponse(
+            answers=answers,
+            processing_time=processing_time,
+            documents_processed=processed_docs,
+            chunks_retrieved=total_chunks_retrieved
+        )
+        # Add warning if some documents failed
+        if failed_docs:
+            logger.warning(f"Some documents failed to process: {failed_docs}")
+        return response_data
+    except HTTPException:
+        raise  # Re-raise HTTP exceptions
+    except Exception as e:
+        logger.error(f"Error in query retrieval: {str(e)}")
+        logger.error(traceback.format_exc())
+        raise HTTPException(
+            status_code=500,
+            detail=f"Internal server error: {str(e)}"
+        )
+# 3. Add a dedicated document validation endpoint
+@app.post("/validate-documents")
+async def validate_documents(
+    documents: str,
+    token: str = Depends(verify_token)
+):
+    """Validate document URLs without processing them"""
+    try:
+        doc_urls = [url.strip() for url in documents.split(',') if url.strip()]
+        results = []
+        for url in doc_urls:
+            try:
+                # Basic URL validation
+                parsed = urlparse(url)
+                if not parsed.scheme or not parsed.netloc:
+                    results.append({
+                        "url": url,
+                        "valid": False,
+                        "error": "Invalid URL format"
+                    })
+                    continue
+                # Test connectivity
+                response = requests.head(url, timeout=10, allow_redirects=True)
+                results.append({
+                    "url": url,
+                    "valid": response.status_code < 400,
+                    "status_code": response.status_code,
+                    "content_type": response.headers.get('content-type', 'unknown'),
+                    "content_length": response.headers.get('content-length', 'unknown')
+                })
+            except Exception as e:
+                results.append({
+                    "url": url,
+                    "valid": False,
+                    "error": str(e)
+                })
+        return {
+            "validation_results": results,
+            "valid_count": sum(1 for r in results if r.get('valid', False)),
+            "total_count": len(results)
+        }
+    except Exception as e:
+        raise HTTPException(
+            status_code=500,
+            detail=f"Validation error: {str(e)}"
+        )
+@app.get("/health")
+async def health_check():
+    """Health check endpoint"""
+    try:
+        # Test basic functionality
+        test_embedding = embedding_model.encode("test")
+        return {
+            "status": "healthy",
+            "timestamp": datetime.now(timezone.utc).isoformat(),
+            "version": "3.0.0",
+            "components": {
+                "embedding_model": "operational",
+                "vector_store": "operational",
+                "llm": "operational"
+            }
+        }
+    except Exception as e:
+        logger.error(f"Health check failed: {str(e)}")
+        raise HTTPException(status_code=503, detail="Service unhealthy")
+@app.get("/metrics")
+async def get_metrics(token: str = Depends(verify_token)):
+    """Get system metrics"""
+    return {
+        "status": "operational",
+        "configuration": {
+            "pinecone_index": config.PINECONE_INDEX_NAME,
+            "embedding_model": config.EMBEDDING_MODEL,
+            "max_chunk_size": config.MAX_CHUNK_SIZE,
+            "similarity_threshold": config.SIMILARITY_THRESHOLD,
+            "top_k": config.TOP_K
+        },
+        "version": "3.0.0",
+        "features": [
+            "multi_format_document_processing",
+            "pinecone_vector_database",
+            "parallel_question_processing",
+            "insurance_domain_optimization",
+            "robust_error_handling",
+            "document_caching",
+            "batch_processing"
+        ]
+    }
+@app.post("/webhook")
+async def hackathon_webhook(request: dict):
+    """Webhook endpoint for hackathon"""
+    logger.info(f"Webhook received: {request}")
+    return {
+        "status": "success",
+        "timestamp": datetime.now(timezone.utc).isoformat(),
+        "system_health": await health_check(),
+        "api_endpoints": {
+            "main_submission": "/hackrx/run",
+            "health_check": "/health",
+            "metrics": "/metrics"
+        }
+    }
+@app.get("/")
+def read_root():
+    return {"message": "RAG backend is up and running!"}
+# Error handlers
+@app.exception_handler(Exception)
+async def global_exception_handler(request, exc):
+    logger.error(f"Global exception handler: {str(exc)}")
+    logger.error(traceback.format_exc())
+    return HTTPException(
+        status_code=500,
+        detail="An unexpected error occurred. Please try again later."
+    )
+# Startup event
+@app.on_event("startup")
+async def startup_event():
+    logger.info("RAG System starting up...")
+    logger.info(f"Configuration loaded: Index={config.PINECONE_INDEX_NAME}, Model={config.EMBEDDING_MODEL}")
+# Shutdown event
+@app.on_event("shutdown")
+async def shutdown_event():
+    logger.info("RAG System shutting down...")
+# Run the app
+if __name__ == "__main__":
+    import uvicorn
+    port = int(os.environ.get("PORT", 8000))
+    uvicorn.run(
+        app,
+        host="0.0.0.0",
+        port=port,
+        log_level="info",
+        access_log=True
+    )

railway.json ADDED Viewed

	@@ -0,0 +1,12 @@

+{
+  "build": {
+    "builder": "DOCKERFILE"
+  },
+  "deploy": {
+    "startCommand": "uvicorn rag:app --host 0.0.0.0 --port $PORT",
+    "healthcheckPath": "/health",
+    "healthcheckTimeout": 300,
+    "restartPolicyType": "ON_FAILURE",
+    "restartPolicyMaxRetries": 3
+  }
+}

req.txt ADDED Viewed

	@@ -0,0 +1,18 @@

+asyncio
+datetime
+fastapi
+python-magic
+langchain
+langchain-groq
+logging
+pydantic
+pinecone
+python-docx
+python-dotenv
+python-magic
+requests
+sentence-transformers
+pdfplumber
+typing
+uvicorn
+docx2txt

requirements.txt ADDED Viewed

	@@ -0,0 +1,14 @@

+fastapi
+langchain
+langchain-groq
+pydantic
+pinecone # Correct name for Pinecone
+python-docx
+python-dotenv
+python-magic # Use this if you're on Windows; otherwise use python-magic + libmagic on Linux
+requests
+sentence-transformers
+pdfplumber
+uvicorn
+docx2txt

test.py ADDED Viewed

	@@ -0,0 +1,231 @@

+import requests
+import json
+import time
+import asyncio
+from typing import Dict, List
+import logging
+from concurrent.futures import ThreadPoolExecutor
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+class RAGSystemTester:
+    def __init__(self, base_url: str = "http://localhost:8000", bearer_token: str = None):
+        self.base_url = base_url
+        self.bearer_token = bearer_token or "dbbdb701cfc45d4041e22a03edbfc65753fe9d7b4b9ba1df4884e864f3bb934d"
+        self.headers = {
+            "Authorization": f"Bearer {self.bearer_token}",
+            "Content-Type": "application/json"
+        }
+        self.executor = ThreadPoolExecutor(max_workers=3)
+    def test_health_check(self) -> bool:
+        """Test health check endpoint"""
+        try:
+            response = requests.get(f"{self.base_url}/health", timeout=10)
+            if response.status_code == 200:
+                data = response.json()
+                print(f"✅ Health check passed: {data}")
+                return True
+            else:
+                print(f"❌ Health check failed: Status {response.status_code}")
+                logger.error(f"Health check failed with status: {response.status_code}")
+                return False
+        except Exception as e:
+            print(f"❌ Health check error: {str(e)}")
+            logger.error(f"Health check error: {str(e)}")
+            return False
+    def test_metrics_endpoint(self) -> bool:
+        """Test metrics endpoint"""
+        try:
+            response = requests.get(f"{self.base_url}/metrics", headers=self.headers, timeout=10)
+            if response.status_code == 200:
+                data = response.json()
+                print(f"✅ Metrics endpoint passed: {json.dumps(data, indent=2)}")
+                return True
+            else:
+                print(f"❌ Metrics endpoint failed: Status {response.status_code}")
+                logger.error(f"Metrics endpoint failed with status: {response.status_code}")
+                return False
+        except Exception as e:
+            print(f"❌ Metrics endpoint error: {str(e)}")
+            logger.error(f"Metrics endpoint error: {str(e)}")
+            return False
+    def test_sample_query(self) -> bool:
+        """Test with the provided sample data"""
+        sample_data = {
+            "documents": "https://hackrx.blob.core.windows.net/assets/Arogya%20Sanjeevani%20Policy%20-%20CIN%20-%20U10200WB1906GOI001713%201.pdf?sv=2023-01-03&st=2025-07-21T08%3A29%3A02Z&se=2025-09-22T08%3A29%3A00Z&sr=b&sp=r&sig=nzrz1K9Iurt%2BBXom%2FB%2BMPTFMFP3PRnIvEsipAX10Ig4%3D",
+            "questions": [
+                "What is the grace period for premium payment under the National Parivar Mediclaim Plus Policy?",
+                "What is the waiting period for pre-existing diseases (PED) to be covered?",
+                "Does this policy cover maternity expenses, and what are the conditions?",
+                "What is the waiting period for cataract surgery?",
+                "Are the medical expenses for an organ donor covered under this policy?",
+                "What is the No Claim Discount (NCD) offered in this policy?",
+                "Is there a benefit for preventive health check-ups?",
+                "How does the policy define a 'Hospital'?",
+                "What is the extent of coverage for AYUSH treatments?",
+                "Are there any sub-limits on room rent and ICU charges for Plan A?"
+            ]
+        }
+        try:
+            print("🔄 Testing sample query...")
+            start_time = time.time()
+            response = requests.post(
+                f"{self.base_url}/hackrx/run",
+                headers=self.headers,
+                json=sample_data,
+                timeout=120
+            )
+            end_time = time.time()
+            latency = end_time - start_time
+            if response.status_code == 200:
+                data = response.json()
+                answers = data.get("answers", [])
+                print(f"✅ Sample query successful (Latency: {latency:.2f}s)")
+                print(f"📊 Received {len(answers)} answers")
+                # Print all answers for validation
+                for i, (question, answer) in enumerate(zip(sample_data['questions'], answers)):
+                    print(f"Q{i+1}: {question}")
+                    print(f"A{i+1}: {answer[:200]}..." if len(answer) > 200 else f"A{i+1}: {answer}")
+                    print("-" * 50)
+                # Validate that we received answers for all questions
+                if len(answers) == len(sample_data['questions']):
+                    print("✅ All questions answered")
+                    return True
+                else:
+                    print(f"❌ Incomplete response: Expected {len(sample_data['questions'])} answers, got {len(answers)}")
+                    logger.warning(f"Incomplete response: Expected {len(sample_data['questions'])} answers, got {len(answers)}")
+                    return False
+            else:
+                print(f"❌ Sample query failed: Status {response.status_code}")
+                print(f"Response: {response.text}")
+                logger.error(f"Sample query failed: Status {response.status_code}, Response: {response.text}")
+                return False
+        except Exception as e:
+            print(f"❌ Sample query error: {str(e)}")
+            logger.error(f"Sample query error: {str(e)}")
+            return False
+    async def test_concurrent_queries(self, num_requests: int = 3) -> bool:
+        """Test system under concurrent load"""
+        async def make_request():
+            try:
+                response = requests.post(
+                    f"{self.base_url}/hackrx/run",
+                    headers=self.headers,
+                    json={
+                        "documents": "https://hackrx.blob.core.windows.net/assets/Arogya%20Sanjeevani%20Policy%20-%20CIN%20-%20U10200WB1906GOI001713%201.pdf?sv=2023-01-03&st=2025-07-21T08%3A29%3A02Z&se=2025-09-22T08%3A29%3A00Z&sr=b&sp=r&sig=nzrz1K9Iurt%2BBXom%2FB%2BMPTFMFP3PRnIvEsipAX10Ig4%3D",
+                        "questions": ["What is the grace period for premium payment?"]
+                    },
+                    timeout=60
+                )
+                return response.status_code == 200
+            except Exception as e:
+                logger.error(f"Concurrent query error: {str(e)}")
+                return False
+        print(f"🔄 Testing {num_requests} concurrent queries...")
+        tasks = [make_request() for _ in range(num_requests)]
+        results = await asyncio.gather(*tasks, return_exceptions=True)
+        success_count = sum(1 for result in results if result is True)
+        print(f"✅ Concurrent test completed: {success_count}/{num_requests} successful")
+        return success_count == num_requests
+    def test_invalid_token(self) -> bool:
+        """Test authentication with invalid token"""
+        try:
+            invalid_headers = {
+                "Authorization": "Bearer invalid_token",
+                "Content-Type": "application/json"
+            }
+            response = requests.post(
+                f"{self.base_url}/hackrx/run",
+                headers=invalid_headers,
+                json={
+                    "documents": "https://hackrx.blob.core.windows.net/assets/Arogya%20Sanjeevani%20Policy%20-%20CIN%20-%20U10200WB1906GOI001713%201.pdf?sv=2023-01-03&st=2025-07-21T08%3A29%3A02Z&se=2025-09-22T08%3A29%3A00Z&sr=b&sp=r&sig=nzrz1K9Iurt%2BBXom%2FB%2BMPTFMFP3PRnIvEsipAX10Ig4%3D",
+                    "questions": ["Test question"]
+                },
+                timeout=10
+            )
+            if response.status_code == 401:
+                print("✅ Invalid token test passed: Correctly rejected")
+                return True
+            else:
+                print(f"❌ Invalid token test failed: Expected 401, got {response.status_code}")
+                logger.warning(f"Invalid token test failed: Expected 401, got {response.status_code}")
+                return False
+        except Exception as e:
+            print(f"❌ Invalid token test error: {str(e)}")
+            logger.error(f"Invalid token test error: {str(e)}")
+            return False
+    def test_invalid_url(self) -> bool:
+        """Test with invalid document URL"""
+        try:
+            response = requests.post(
+                f"{self.base_url}/hackrx/run",
+                headers=self.headers,
+                json={
+                    "documents": "https://invalid-url-that-does-not-exist.com/fake.pdf",  # Actually invalid URL
+                    "questions": ["Test question"]
+                },
+                timeout=30
+            )
+            # Accept either 400 or 500 as valid error responses for invalid URLs
+            if response.status_code in [400, 500]:
+                print("✅ Invalid URL test passed: Correctly handled")
+                return True
+            else:
+                print(f"❌ Invalid URL test failed: Expected 400/500, got {response.status_code}")
+                logger.warning(f"Invalid URL test failed: Expected 400/500, got {response.status_code}")
+                return False
+        except Exception as e:
+            print(f"❌ Invalid URL test error: {str(e)}")
+            logger.error(f"Invalid URL test error: {str(e)}")
+            return False
+    async def run_all_tests(self):
+        """Run all test cases"""
+        print("🚀 Starting RAG System Tests")
+        print("=" * 50)
+        results = {
+            "health_check": self.test_health_check(),
+            "metrics_endpoint": self.test_metrics_endpoint(),
+            "sample_query": self.test_sample_query(),
+            "concurrent_queries": await self.test_concurrent_queries(),
+            "invalid_token": self.test_invalid_token(),
+            "invalid_url": self.test_invalid_url()
+        }
+        print("\n📊 Test Summary")
+        print("=" * 50)
+        passed = sum(1 for result in results.values() if result)
+        total = len(results)
+        for test_name, passed in results.items():
+            status = "✅ PASSED" if passed else "❌ FAILED"
+            print(f"{test_name}: {status}")
+        print(f"\n🎯 Overall: {passed}/{total} tests passed")
+        return passed == total
+def main():
+    tester = RAGSystemTester()
+    asyncio.run(tester.run_all_tests())
+if __name__ == "__main__":
+    main()

test_api.py ADDED Viewed

	@@ -0,0 +1,124 @@

+#!/usr/bin/env python3
+"""
+Test script for the Ultra-Fast RAG System API
+Usage: python test_api.py <base_url>
+Example: python test_api.py https://your-app.railway.app
+"""
+import requests
+import json
+import time
+import sys
+def test_api(base_url):
+    """Test the deployed API endpoints"""
+    print(f"🧪 Testing API at: {base_url}")
+    # Test 1: Health Check
+    print("\n1️⃣ Testing Health Check...")
+    try:
+        response = requests.get(f"{base_url}/health", timeout=10)
+        if response.status_code == 200:
+            print("✅ Health check passed!")
+            print(f"   Response: {response.json()}")
+        else:
+            print(f"❌ Health check failed: {response.status_code}")
+            return False
+    except Exception as e:
+        print(f"❌ Health check error: {e}")
+        return False
+    # Test 2: Sample RAG Query
+    print("\n2️⃣ Testing RAG Endpoint...")
+    headers = {
+        "Content-Type": "application/json",
+        "Accept": "application/json",
+        "Authorization": "Bearer dbbdb701cfc45d4041e22a03edbfc65753fe9d7b4b9ba1df4884e864f3bb934d"
+    }
+    test_payload = {
+        "documents": "https://hackrx.blob.core.windows.net/assets/policy.pdf?sv=2023-01-03&st=2025-07-04T09%3A11%3A24Z&se=2027-07-05T09%3A11%3A00Z&sr=b&sp=r&sig=N4a9OU0w0QXO6AOIBiu4bpl7AXvEZogeT%2FjUHNO7HzQ%3D",
+        "questions": [
+            "What is the grace period for premium payment?",
+            "What is the waiting period for pre-existing diseases?"
+        ]
+    }
+    try:
+        print("   Sending request... (this may take 15-30 seconds)")
+        start_time = time.time()
+        response = requests.post(
+            f"{base_url}/hackrx/run",
+            json=test_payload,
+            headers=headers,
+            timeout=120
+        )
+        end_time = time.time()
+        response_time = end_time - start_time
+        if response.status_code == 200:
+            result = response.json()
+            print(f"✅ RAG query successful! ({response_time:.2f} seconds)")
+            print(f"   Questions: {len(test_payload['questions'])}")
+            print(f"   Answers: {len(result['answers'])}")
+            print("\n   Sample answers:")
+            for i, answer in enumerate(result['answers'][:2]):
+                print(f"   Q{i+1}: {answer[:100]}...")
+        else:
+            print(f"❌ RAG query failed: {response.status_code}")
+            print(f"   Response: {response.text}")
+            return False
+    except Exception as e:
+        print(f"❌ RAG query error: {e}")
+        return False
+    # Test 3: Metrics (optional)
+    print("\n3️⃣ Testing Metrics Endpoint...")
+    try:
+        response = requests.get(
+            f"{base_url}/metrics",
+            headers=headers,
+            timeout=10
+        )
+        if response.status_code == 200:
+            print("✅ Metrics endpoint working!")
+            metrics = response.json()
+            print(f"   Total queries: {metrics.get('total_queries_24h', 0)}")
+        else:
+            print(f"⚠️ Metrics endpoint issue: {response.status_code}")
+    except Exception as e:
+        print(f"⚠️ Metrics endpoint error: {e}")
+    print(f"\n🎉 API testing complete! System is ready for hackathon use.")
+    return True
+def main():
+    if len(sys.argv) != 2:
+        print("Usage: python test_api.py <base_url>")
+        print("Example: python test_api.py https://your-app.railway.app")
+        sys.exit(1)
+    base_url = sys.argv[1].rstrip('/')
+    print("🚀 Ultra-Fast RAG System API Tester")
+    print("=" * 50)
+    success = test_api(base_url)
+    if success:
+        print("\n✅ All tests passed! Your API is ready for the hackathon! 🏆")
+        print(f"\n📋 API Usage Summary:")
+        print(f"   Endpoint: POST {base_url}/hackrx/run")
+        print(f"   Auth: Bearer dbbdb701cfc45d4041e22a03edbfc65753fe9d7b4b9ba1df4884e864f3bb934d")
+        print(f"   Health: GET {base_url}/health")
+    else:
+        print("\n❌ Some tests failed. Please check your deployment.")
+        sys.exit(1)
+if __name__ == "__main__":
+    main()