Spaces:
Sleeping
Sleeping
yuvrajsingh6 commited on
Commit ·
9c4c212
0
Parent(s):
deploy: v2 production baked index (zero latency)
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- .dockerignore +56 -0
- .env.example +6 -0
- .github/workflows/deploy_to_hf.yml +20 -0
- .github/workflows/docker-build.yml +34 -0
- .gitignore +23 -0
- AWS_APP_RUNNER_SETUP.md +48 -0
- AWS_DEPLOYMENT.md +152 -0
- DEPLOYMENT.md +178 -0
- Dockerfile +27 -0
- Makefile +40 -0
- Procfile +1 -0
- README.md +170 -0
- app.py +173 -0
- configs/default.yaml +22 -0
- data/raw/finphrase_000.txt +3 -0
- data/raw/finphrase_001.txt +3 -0
- data/raw/finphrase_002.txt +3 -0
- data/raw/finphrase_003.txt +3 -0
- data/raw/finphrase_004.txt +3 -0
- data/raw/finphrase_005.txt +3 -0
- data/raw/finphrase_006.txt +3 -0
- data/raw/finphrase_007.txt +3 -0
- data/raw/finphrase_008.txt +3 -0
- data/raw/finphrase_009.txt +3 -0
- data/raw/finphrase_010.txt +3 -0
- data/raw/finphrase_011.txt +3 -0
- data/raw/finphrase_012.txt +3 -0
- data/raw/finphrase_013.txt +3 -0
- data/raw/finphrase_014.txt +3 -0
- data/raw/finphrase_015.txt +3 -0
- data/raw/finphrase_016.txt +3 -0
- data/raw/finphrase_017.txt +3 -0
- data/raw/finphrase_018.txt +3 -0
- data/raw/finphrase_019.txt +3 -0
- data/raw/finphrase_020.txt +3 -0
- data/raw/finphrase_021.txt +3 -0
- data/raw/finphrase_022.txt +3 -0
- data/raw/finphrase_023.txt +3 -0
- data/raw/finphrase_024.txt +3 -0
- data/raw/finphrase_025.txt +3 -0
- data/raw/finphrase_026.txt +3 -0
- data/raw/finphrase_027.txt +3 -0
- data/raw/finphrase_028.txt +3 -0
- data/raw/finphrase_029.txt +3 -0
- data/raw/finphrase_030.txt +3 -0
- data/raw/finphrase_031.txt +3 -0
- data/raw/finphrase_032.txt +3 -0
- data/raw/finphrase_033.txt +3 -0
- data/raw/finphrase_034.txt +3 -0
- data/raw/finphrase_035.txt +3 -0
.dockerignore
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Python
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.pyc
|
| 4 |
+
*.pyo
|
| 5 |
+
*.pyd
|
| 6 |
+
.Python
|
| 7 |
+
|
| 8 |
+
# Virtual Environment
|
| 9 |
+
venv/
|
| 10 |
+
env/
|
| 11 |
+
ENV/
|
| 12 |
+
.venv
|
| 13 |
+
|
| 14 |
+
# Environment Variables
|
| 15 |
+
.env
|
| 16 |
+
.env.local
|
| 17 |
+
|
| 18 |
+
# Data directories
|
| 19 |
+
data/raw/
|
| 20 |
+
# data/index/ <-- We want to include this for the showcase
|
| 21 |
+
data/db/
|
| 22 |
+
data/processed/
|
| 23 |
+
|
| 24 |
+
# IDE
|
| 25 |
+
.vscode/
|
| 26 |
+
.idea/
|
| 27 |
+
*.swp
|
| 28 |
+
*.swo
|
| 29 |
+
*~
|
| 30 |
+
|
| 31 |
+
# OS
|
| 32 |
+
.DS_Store
|
| 33 |
+
Thumbs.db
|
| 34 |
+
|
| 35 |
+
# Git
|
| 36 |
+
.git/
|
| 37 |
+
.gitignore
|
| 38 |
+
|
| 39 |
+
# Testing
|
| 40 |
+
.pytest_cache/
|
| 41 |
+
.coverage
|
| 42 |
+
htmlcov/
|
| 43 |
+
|
| 44 |
+
# Documentation (not needed in container)
|
| 45 |
+
docs/
|
| 46 |
+
*.md
|
| 47 |
+
!README.md
|
| 48 |
+
|
| 49 |
+
# Notebooks
|
| 50 |
+
notebooks/
|
| 51 |
+
*.ipynb
|
| 52 |
+
|
| 53 |
+
# Build artifacts
|
| 54 |
+
dist/
|
| 55 |
+
build/
|
| 56 |
+
*.egg-info/
|
.env.example
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
OPENAI_API_KEY=sk-xxx
|
| 2 |
+
VLLM_API_URL=http://localhost:8000/v1
|
| 3 |
+
# VLLM_MODEL=mistralai/Mistral-7B-Instruct-v0.2
|
| 4 |
+
LOG_LEVEL=INFO
|
| 5 |
+
DATA_DIR=data
|
| 6 |
+
INDEX_DIR=data/index
|
.github/workflows/deploy_to_hf.yml
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: Sync to Hugging Face Hub
|
| 2 |
+
|
| 3 |
+
on:
|
| 4 |
+
push:
|
| 5 |
+
branches: [main]
|
| 6 |
+
|
| 7 |
+
jobs:
|
| 8 |
+
sync-to-hub:
|
| 9 |
+
runs-on: ubuntu-latest
|
| 10 |
+
steps:
|
| 11 |
+
- uses: actions/checkout@v3
|
| 12 |
+
with:
|
| 13 |
+
fetch-depth: 0
|
| 14 |
+
lfs: true
|
| 15 |
+
- name: Push to hub
|
| 16 |
+
env:
|
| 17 |
+
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
| 18 |
+
run: |
|
| 19 |
+
git remote add hf https://yuvis:$HF_TOKEN@huggingface.co/spaces/yuvis/Enterprise-RAG-System
|
| 20 |
+
git push -f hf main
|
.github/workflows/docker-build.yml
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: Build and Push Docker Image
|
| 2 |
+
|
| 3 |
+
on:
|
| 4 |
+
push:
|
| 5 |
+
branches: [ "main", "master" ]
|
| 6 |
+
workflow_dispatch:
|
| 7 |
+
|
| 8 |
+
jobs:
|
| 9 |
+
build-and-push:
|
| 10 |
+
runs-on: ubuntu-latest
|
| 11 |
+
steps:
|
| 12 |
+
- name: Checkout repository
|
| 13 |
+
uses: actions/checkout@v4
|
| 14 |
+
|
| 15 |
+
- name: Set up QEMU
|
| 16 |
+
uses: docker/setup-qemu-action@v3
|
| 17 |
+
|
| 18 |
+
- name: Set up Docker Buildx
|
| 19 |
+
uses: docker/setup-buildx-action@v3
|
| 20 |
+
|
| 21 |
+
- name: Login to Docker Hub
|
| 22 |
+
uses: docker/login-action@v3
|
| 23 |
+
with:
|
| 24 |
+
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
| 25 |
+
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
| 26 |
+
|
| 27 |
+
- name: Build and push
|
| 28 |
+
uses: docker/build-push-action@v5
|
| 29 |
+
with:
|
| 30 |
+
context: .
|
| 31 |
+
push: true
|
| 32 |
+
tags: ${{ secrets.DOCKERHUB_USERNAME }}/enterprise-rag:latest
|
| 33 |
+
cache-from: type=gha
|
| 34 |
+
cache-to: type=gha,mode=max
|
.gitignore
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Python
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.pyc
|
| 4 |
+
|
| 5 |
+
# Virtual Env
|
| 6 |
+
venv/
|
| 7 |
+
env/
|
| 8 |
+
|
| 9 |
+
# Environment Variables
|
| 10 |
+
.env
|
| 11 |
+
|
| 12 |
+
# System
|
| 13 |
+
.DS_Store
|
| 14 |
+
|
| 15 |
+
# Data (Generated/Downloaded)
|
| 16 |
+
data/index/
|
| 17 |
+
data/raw/
|
| 18 |
+
# data/db/
|
| 19 |
+
# data/processed/
|
| 20 |
+
|
| 21 |
+
# IDE
|
| 22 |
+
.vscode/
|
| 23 |
+
.idea/
|
AWS_APP_RUNNER_SETUP.md
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# AWS App Runner Deployment Guide
|
| 2 |
+
|
| 3 |
+
Follow these steps to deploy your Enterprise RAG System to AWS App Runner for a recruiter-ready showcase.
|
| 4 |
+
|
| 5 |
+
## 1. Local Verification
|
| 6 |
+
First, build and run your image locally to ensure the index is properly packaged:
|
| 7 |
+
```bash
|
| 8 |
+
docker build -t enterprise-rag .
|
| 9 |
+
docker run -p 8501:8501 -e GROQ_API_KEY=your_key_here enterprise-rag
|
| 10 |
+
```
|
| 11 |
+
Visit `http://localhost:8501` to verify.
|
| 12 |
+
|
| 13 |
+
## 2. Push to AWS ECR
|
| 14 |
+
You need to push your image to the Amazon Elastic Container Registry.
|
| 15 |
+
|
| 16 |
+
1. **Create Repository**:
|
| 17 |
+
```bash
|
| 18 |
+
aws ecr create-repository --repository-name enterprise-rag --region your-region
|
| 19 |
+
```
|
| 20 |
+
2. **Login to ECR**:
|
| 21 |
+
```bash
|
| 22 |
+
aws ecr get-login-password --region your-region | docker login --username AWS --password-stdin <your-account-id>.dkr.ecr.<your-region>.amazonaws.com
|
| 23 |
+
```
|
| 24 |
+
3. **Tag & Push**:
|
| 25 |
+
```bash
|
| 26 |
+
docker tag enterprise-rag:latest <your-account-id>.dkr.ecr.<your-region>.amazonaws.com/enterprise-rag:latest
|
| 27 |
+
docker push <your-account-id>.dkr.ecr.<your-region>.amazonaws.com/enterprise-rag:latest
|
| 28 |
+
```
|
| 29 |
+
|
| 30 |
+
## 3. Create App Runner Service
|
| 31 |
+
1. Go to **AWS Console** → **App Runner**.
|
| 32 |
+
2. Click **Create service**.
|
| 33 |
+
3. **Source**:
|
| 34 |
+
- Repository type: **Container registry**.
|
| 35 |
+
- Provider: **Amazon ECR**.
|
| 36 |
+
- Container image: Select your `enterprise-rag` image.
|
| 37 |
+
- Deployment settings: **Manual** (or Automatic if you want CI/CD).
|
| 38 |
+
4. **Configuration**:
|
| 39 |
+
- Service name: `enterprise-rag-showcase`.
|
| 40 |
+
- Virtual CPU & Memory: **1 vCPU & 2 GB** (Minimum recommended).
|
| 41 |
+
- **Environment variables**:
|
| 42 |
+
- `GROQ_API_KEY`: Paste your key here.
|
| 43 |
+
5. **Connectivity**:
|
| 44 |
+
- Port: **8501**.
|
| 45 |
+
6. **Review & Create**.
|
| 46 |
+
|
| 47 |
+
## 4. Final Result
|
| 48 |
+
Once deployed, AWS will provide a public URL like `https://xxxxxx.us-east-1.awsapprunner.com`. This is the URL you can share with recruiters!
|
AWS_DEPLOYMENT.md
ADDED
|
@@ -0,0 +1,152 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# AWS Deployment Guide - Enterprise RAG System
|
| 2 |
+
|
| 3 |
+
## Prerequisites
|
| 4 |
+
- AWS Account
|
| 5 |
+
- AWS CLI installed and configured
|
| 6 |
+
- Docker installed locally
|
| 7 |
+
|
| 8 |
+
## Deployment Options
|
| 9 |
+
|
| 10 |
+
### Option 1: AWS EC2 (Recommended for Full Control)
|
| 11 |
+
|
| 12 |
+
#### Step 1: Launch EC2 Instance
|
| 13 |
+
1. Go to AWS Console → EC2
|
| 14 |
+
2. Click **"Launch Instance"**
|
| 15 |
+
3. Choose:
|
| 16 |
+
- **AMI**: Ubuntu 22.04 LTS
|
| 17 |
+
- **Instance Type**: t3.medium (4GB RAM minimum)
|
| 18 |
+
- **Storage**: 20GB
|
| 19 |
+
4. Configure Security Group:
|
| 20 |
+
- Allow SSH (port 22) from your IP
|
| 21 |
+
- Allow HTTP (port 8501) from anywhere
|
| 22 |
+
- Allow HTTP (port 8000) from anywhere
|
| 23 |
+
|
| 24 |
+
#### Step 2: Connect to Instance
|
| 25 |
+
```bash
|
| 26 |
+
ssh -i your-key.pem ubuntu@your-ec2-public-ip
|
| 27 |
+
```
|
| 28 |
+
|
| 29 |
+
#### Step 3: Install Docker
|
| 30 |
+
```bash
|
| 31 |
+
# Update system
|
| 32 |
+
sudo apt update && sudo apt upgrade -y
|
| 33 |
+
|
| 34 |
+
# Install Docker
|
| 35 |
+
curl -fsSL https://get.docker.com -o get-docker.sh
|
| 36 |
+
sudo sh get-docker.sh
|
| 37 |
+
sudo usermod -aG docker ubuntu
|
| 38 |
+
|
| 39 |
+
# Install Docker Compose
|
| 40 |
+
sudo apt install docker-compose-plugin
|
| 41 |
+
```
|
| 42 |
+
|
| 43 |
+
#### Step 4: Clone Repository
|
| 44 |
+
```bash
|
| 45 |
+
git clone https://github.com/YuvrajSinghBhadoria2/Enterprise-RAG-System.git
|
| 46 |
+
cd Enterprise-RAG-System
|
| 47 |
+
```
|
| 48 |
+
|
| 49 |
+
#### Step 5: Configure Environment
|
| 50 |
+
```bash
|
| 51 |
+
# Create .env file
|
| 52 |
+
cat > .env << EOF
|
| 53 |
+
GROQ_API_KEY=your_groq_api_key_here
|
| 54 |
+
EOF
|
| 55 |
+
```
|
| 56 |
+
|
| 57 |
+
#### Step 6: Build and Run
|
| 58 |
+
```bash
|
| 59 |
+
# Using Docker Compose
|
| 60 |
+
docker compose -f docker/docker-compose.yml up -d --build
|
| 61 |
+
|
| 62 |
+
# Generate data (one-time)
|
| 63 |
+
docker compose -f docker/docker-compose.yml exec api python3 tools/generate-dataset.py
|
| 64 |
+
docker compose -f docker/docker-compose.yml exec api python3 src/ingestion/ingest.py
|
| 65 |
+
```
|
| 66 |
+
|
| 67 |
+
#### Step 7: Access Application
|
| 68 |
+
- **UI**: `http://your-ec2-public-ip:8501`
|
| 69 |
+
- **API**: `http://your-ec2-public-ip:8000/docs`
|
| 70 |
+
|
| 71 |
+
---
|
| 72 |
+
|
| 73 |
+
### Option 2: AWS ECS (Fargate) - Serverless
|
| 74 |
+
|
| 75 |
+
#### Step 1: Push Docker Image to ECR
|
| 76 |
+
```bash
|
| 77 |
+
# Create ECR repository
|
| 78 |
+
aws ecr create-repository --repository-name enterprise-rag
|
| 79 |
+
|
| 80 |
+
# Login to ECR
|
| 81 |
+
aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin YOUR_ACCOUNT_ID.dkr.ecr.us-east-1.amazonaws.com
|
| 82 |
+
|
| 83 |
+
# Build and push
|
| 84 |
+
docker build -f docker/Dockerfile.api -t enterprise-rag .
|
| 85 |
+
docker tag enterprise-rag:latest YOUR_ACCOUNT_ID.dkr.ecr.us-east-1.amazonaws.com/enterprise-rag:latest
|
| 86 |
+
docker push YOUR_ACCOUNT_ID.dkr.ecr.us-east-1.amazonaws.com/enterprise-rag:latest
|
| 87 |
+
```
|
| 88 |
+
|
| 89 |
+
#### Step 2: Create ECS Task Definition
|
| 90 |
+
1. Go to ECS Console
|
| 91 |
+
2. Create new Task Definition (Fargate)
|
| 92 |
+
3. Add container:
|
| 93 |
+
- Image: Your ECR image URI
|
| 94 |
+
- Memory: 4GB
|
| 95 |
+
- Port: 8501
|
| 96 |
+
4. Add environment variable: `GROQ_API_KEY`
|
| 97 |
+
|
| 98 |
+
#### Step 3: Create ECS Service
|
| 99 |
+
1. Create ECS Cluster
|
| 100 |
+
2. Create Service from Task Definition
|
| 101 |
+
3. Configure Load Balancer (optional)
|
| 102 |
+
|
| 103 |
+
---
|
| 104 |
+
|
| 105 |
+
### Option 3: AWS Lightsail (Simplest)
|
| 106 |
+
|
| 107 |
+
#### Step 1: Create Lightsail Instance
|
| 108 |
+
1. Go to Lightsail Console
|
| 109 |
+
2. Create Instance:
|
| 110 |
+
- Platform: Linux/Unix
|
| 111 |
+
- Blueprint: Ubuntu 22.04
|
| 112 |
+
- Plan: $10/month (2GB RAM)
|
| 113 |
+
|
| 114 |
+
#### Step 2: Deploy
|
| 115 |
+
Same as EC2 steps 2-7 above
|
| 116 |
+
|
| 117 |
+
---
|
| 118 |
+
|
| 119 |
+
## Cost Estimates
|
| 120 |
+
|
| 121 |
+
| Service | Cost/Month | Best For |
|
| 122 |
+
|---------|-----------|----------|
|
| 123 |
+
| EC2 t3.medium | ~$30 | Full control, testing |
|
| 124 |
+
| ECS Fargate | ~$40 | Production, auto-scaling |
|
| 125 |
+
| Lightsail | $10-20 | Simple deployment |
|
| 126 |
+
|
| 127 |
+
## Recommended: EC2 t3.medium
|
| 128 |
+
|
| 129 |
+
For your use case, I recommend **EC2 t3.medium** because:
|
| 130 |
+
- ✅ Full control
|
| 131 |
+
- ✅ Easy to manage
|
| 132 |
+
- ✅ Cost-effective
|
| 133 |
+
- ✅ Can run Docker Compose easily
|
| 134 |
+
|
| 135 |
+
## Maintenance
|
| 136 |
+
|
| 137 |
+
**Update code:**
|
| 138 |
+
```bash
|
| 139 |
+
cd Enterprise-RAG-System
|
| 140 |
+
git pull
|
| 141 |
+
docker compose -f docker/docker-compose.yml up -d --build
|
| 142 |
+
```
|
| 143 |
+
|
| 144 |
+
**View logs:**
|
| 145 |
+
```bash
|
| 146 |
+
docker compose -f docker/docker-compose.yml logs -f
|
| 147 |
+
```
|
| 148 |
+
|
| 149 |
+
**Restart services:**
|
| 150 |
+
```bash
|
| 151 |
+
docker compose -f docker/docker-compose.yml restart
|
| 152 |
+
```
|
DEPLOYMENT.md
ADDED
|
@@ -0,0 +1,178 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Deployment Guide
|
| 2 |
+
|
| 3 |
+
This guide ensures a smooth deployment of the Enterprise RAG system to any cloud VPS (Virtual Private Server) such as AWS EC2, DigitalOcean Droplet, Google Compute Engine, or Azure VM.
|
| 4 |
+
|
| 5 |
+
## 🚀 Prerequisites
|
| 6 |
+
|
| 7 |
+
* **Server**: A Linux server (Ubuntu 22.04 LTS recommended).
|
| 8 |
+
* **Specs**: Minimum 4GB RAM (8GB recommended for embeddings/FAISS), 2 vCPUs.
|
| 9 |
+
* **Software**: Docker and Docker Compose installed.
|
| 10 |
+
|
| 11 |
+
## � Recommended Providers
|
| 12 |
+
|
| 13 |
+
**Railway.app** (Easiest PaaS):
|
| 14 |
+
* Perfect for quick demos.
|
| 15 |
+
* Supports our `Dockerfile` setup out of the box.
|
| 16 |
+
|
| 17 |
+
## 🚂 Railway.app Deployment (Complete Setup)
|
| 18 |
+
|
| 19 |
+
Railway requires deploying **two separate services** from the same repository.
|
| 20 |
+
|
| 21 |
+
### Prerequisites
|
| 22 |
+
* GitHub repository: `https://github.com/YuvrajSinghBhadoria2/Enterprise-RAG-System.git`
|
| 23 |
+
* Railway account: [railway.app](https://railway.app)
|
| 24 |
+
* Groq API Key
|
| 25 |
+
|
| 26 |
+
### Step 1: Deploy API Service
|
| 27 |
+
|
| 28 |
+
1. **Create New Project** in Railway
|
| 29 |
+
2. **Deploy from GitHub** → Select your repository
|
| 30 |
+
3. Railway will auto-detect the `railway.toml` and use the Dockerfile
|
| 31 |
+
4. **Add Environment Variables**:
|
| 32 |
+
```
|
| 33 |
+
GROQ_API_KEY=gsk_your_key_here
|
| 34 |
+
PORT=8000
|
| 35 |
+
```
|
| 36 |
+
5. **Deploy** - Railway will build using `docker/Dockerfile.api`
|
| 37 |
+
6. **Get API URL** - Copy the public URL (e.g., `https://enterprise-rag-production.up.railway.app`)
|
| 38 |
+
|
| 39 |
+
### Step 2: Deploy UI Service
|
| 40 |
+
|
| 41 |
+
1. In the **same Railway project**, click **+ New Service**
|
| 42 |
+
2. **Deploy from GitHub** → Select the **same repository**
|
| 43 |
+
3. **Configure Build**:
|
| 44 |
+
- Go to **Settings** → **Build**
|
| 45 |
+
- Set **Dockerfile Path**: `docker/Dockerfile.streamlit`
|
| 46 |
+
4. **Add Environment Variable**:
|
| 47 |
+
```
|
| 48 |
+
API_URL=https://your-api-url-from-step1.up.railway.app/api/v1/chat
|
| 49 |
+
```
|
| 50 |
+
*(Replace with your actual API URL from Step 1)*
|
| 51 |
+
5. **Deploy**
|
| 52 |
+
|
| 53 |
+
### Step 3: Generate Data (Critical!)
|
| 54 |
+
|
| 55 |
+
Railway containers are ephemeral, so you need to generate data on startup:
|
| 56 |
+
|
| 57 |
+
**Option A: One-time manual generation** (for testing):
|
| 58 |
+
```bash
|
| 59 |
+
# In Railway API service shell
|
| 60 |
+
python3 tools/generate-dataset.py
|
| 61 |
+
python3 src/ingestion/ingest.py
|
| 62 |
+
```
|
| 63 |
+
|
| 64 |
+
**Option B: Auto-generate on startup** (recommended):
|
| 65 |
+
Update the `railway.toml` start command to include data generation.
|
| 66 |
+
|
| 67 |
+
### Step 4: Access Your Application
|
| 68 |
+
|
| 69 |
+
- **UI**: `https://your-ui-service.up.railway.app`
|
| 70 |
+
- **API Docs**: `https://your-api-service.up.railway.app/docs`
|
| 71 |
+
|
| 72 |
+
### Troubleshooting
|
| 73 |
+
|
| 74 |
+
**Build Timeout?**
|
| 75 |
+
- Ensure `.dockerignore` excludes `venv/` and `data/`
|
| 76 |
+
- Check that `railway.toml` points to the correct Dockerfile
|
| 77 |
+
|
| 78 |
+
**UI Can't Connect to API?**
|
| 79 |
+
- Verify `API_URL` environment variable in UI service
|
| 80 |
+
- Ensure API service is deployed and running
|
| 81 |
+
- Check API URL includes `/api/v1/chat` endpoint
|
| 82 |
+
|
| 83 |
+
---
|
| 84 |
+
|
| 85 |
+
## 📦 Step-by-Step Deployment (VPS)
|
| 86 |
+
|
| 87 |
+
### 1. Provision & Access Server
|
| 88 |
+
SSH into your server:
|
| 89 |
+
```bash
|
| 90 |
+
ssh user@your-server-ip
|
| 91 |
+
```
|
| 92 |
+
|
| 93 |
+
### 2. Install Docker (If not installed)
|
| 94 |
+
```bash
|
| 95 |
+
# Update packages
|
| 96 |
+
sudo apt update && sudo apt upgrade -y
|
| 97 |
+
|
| 98 |
+
# Install Docker
|
| 99 |
+
curl -fsSL https://get.docker.com -o get-docker.sh
|
| 100 |
+
sudo sh get-docker.sh
|
| 101 |
+
|
| 102 |
+
# Install Docker Compose Plugin
|
| 103 |
+
sudo apt install docker-compose-plugin
|
| 104 |
+
```
|
| 105 |
+
|
| 106 |
+
### 3. Clone the Repository
|
| 107 |
+
```bash
|
| 108 |
+
git clone https://github.com/your-repo/enterprise-rag.git
|
| 109 |
+
cd enterprise-rag
|
| 110 |
+
```
|
| 111 |
+
|
| 112 |
+
### 4. Configure Environment
|
| 113 |
+
Create the production `.env` file:
|
| 114 |
+
```bash
|
| 115 |
+
cp .env.example .env
|
| 116 |
+
nano .env
|
| 117 |
+
```
|
| 118 |
+
*Paste your `GROQ_API_KEY` or `OPENAI_API_KEY` into the file.*
|
| 119 |
+
|
| 120 |
+
### 5. Build and Start Services
|
| 121 |
+
This command will build the images and start the API and UI in the background with auto-restart enabled.
|
| 122 |
+
```bash
|
| 123 |
+
# Using the Makefile shortcut
|
| 124 |
+
make up
|
| 125 |
+
|
| 126 |
+
# OR manually using docker compose
|
| 127 |
+
docker compose -f docker/docker-compose.yml up -d --build
|
| 128 |
+
```
|
| 129 |
+
|
| 130 |
+
### 6. Generate Data (Critical Step)
|
| 131 |
+
Fresh deployments start empty. You must ingest the datasets to make the search work.
|
| 132 |
+
```bash
|
| 133 |
+
# Run ingestion inside the running API container
|
| 134 |
+
docker compose -f docker/docker-compose.yml exec api python3 tools/generate-dataset.py
|
| 135 |
+
docker compose -f docker/docker-compose.yml exec api python3 src/ingestion/ingest.py
|
| 136 |
+
```
|
| 137 |
+
|
| 138 |
+
### 7. Access the Application
|
| 139 |
+
* **UI**: `http://your-server-ip:8501`
|
| 140 |
+
* **API**: `http://your-server-ip:8000/docs`
|
| 141 |
+
|
| 142 |
+
---
|
| 143 |
+
|
| 144 |
+
## 🔒 Production Hardening
|
| 145 |
+
|
| 146 |
+
### 1. Firewall (UFW)
|
| 147 |
+
Only open necessary ports.
|
| 148 |
+
```bash
|
| 149 |
+
sudo ufw allow 22/tcp
|
| 150 |
+
sudo ufw allow 8501/tcp # Streamlit
|
| 151 |
+
sudo ufw allow 8000/tcp # API
|
| 152 |
+
sudo ufw enable
|
| 153 |
+
```
|
| 154 |
+
|
| 155 |
+
### 2. Reverse Proxy (Nginx + SSL)
|
| 156 |
+
For HTTPS, use Nginx as a reverse proxy.
|
| 157 |
+
```nginx
|
| 158 |
+
server {
|
| 159 |
+
listen 80;
|
| 160 |
+
server_name rag.yourdomain.com;
|
| 161 |
+
|
| 162 |
+
location / {
|
| 163 |
+
proxy_pass http://localhost:8501;
|
| 164 |
+
proxy_set_header Host $host;
|
| 165 |
+
proxy_set_header X-Real-IP $remote_addr;
|
| 166 |
+
}
|
| 167 |
+
}
|
| 168 |
+
```
|
| 169 |
+
|
| 170 |
+
## 🛠️ Maintenance
|
| 171 |
+
|
| 172 |
+
* **View Logs**: `make logs`
|
| 173 |
+
* **Restart Services**: `make down && make up`
|
| 174 |
+
* **Update Code**:
|
| 175 |
+
```bash
|
| 176 |
+
git pull
|
| 177 |
+
make up
|
| 178 |
+
```
|
Dockerfile
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.11-slim
|
| 2 |
+
|
| 3 |
+
WORKDIR /app
|
| 4 |
+
|
| 5 |
+
# Install system dependencies
|
| 6 |
+
RUN apt-get update && apt-get install -y \
|
| 7 |
+
build-essential \
|
| 8 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 9 |
+
|
| 10 |
+
# Copy requirements and install Python dependencies
|
| 11 |
+
COPY requirements.txt .
|
| 12 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 13 |
+
|
| 14 |
+
# Copy application code
|
| 15 |
+
COPY . .
|
| 16 |
+
|
| 17 |
+
# Download index during build (Bake into image)
|
| 18 |
+
RUN python tools/download_index.py
|
| 19 |
+
|
| 20 |
+
# Set Python path
|
| 21 |
+
ENV PYTHONPATH=/app
|
| 22 |
+
|
| 23 |
+
# Expose Streamlit port
|
| 24 |
+
EXPOSE 8501
|
| 25 |
+
|
| 26 |
+
# Run Streamlit
|
| 27 |
+
CMD ["streamlit", "run", "app.py", "--server.port=8501", "--server.address=0.0.0.0"]
|
Makefile
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
.PHONY: build up down logs ingest eval run-local
|
| 2 |
+
|
| 3 |
+
# Docker commands
|
| 4 |
+
build:
|
| 5 |
+
docker-compose -f docker/docker-compose.yml --env-file .env build
|
| 6 |
+
|
| 7 |
+
up:
|
| 8 |
+
docker-compose -f docker/docker-compose.yml --env-file .env up -d --build
|
| 9 |
+
|
| 10 |
+
down:
|
| 11 |
+
docker-compose -f docker/docker-compose.yml --env-file .env down
|
| 12 |
+
|
| 13 |
+
logs:
|
| 14 |
+
docker-compose -f docker/docker-compose.yml --env-file .env logs -f
|
| 15 |
+
|
| 16 |
+
api-shell:
|
| 17 |
+
docker-compose -f docker/docker-compose.yml --env-file .env exec api /bin/bash
|
| 18 |
+
|
| 19 |
+
# Run evaluation inside Docker
|
| 20 |
+
eval:
|
| 21 |
+
docker-compose -f docker/docker-compose.yml --env-file .env exec api python3 tools/run_eval.py
|
| 22 |
+
|
| 23 |
+
# Run evaluation locally (Mac fallback)
|
| 24 |
+
eval-local:
|
| 25 |
+
export DISABLE_FAISS=1 && export KMP_DUPLICATE_LIB_OK=TRUE && export GROQ_API_KEY=${GROQ_API_KEY} && python3 tools/run_eval.py
|
| 26 |
+
|
| 27 |
+
# Ingestion (runs locally if venv active, or use via docker exec)
|
| 28 |
+
ingest:
|
| 29 |
+
export PYTHONPATH=$$PYTHONPATH:. && python3 src/ingestion/ingest.py
|
| 30 |
+
|
| 31 |
+
# Data generation
|
| 32 |
+
generate-data:
|
| 33 |
+
python3 tools/generate-dataset.py
|
| 34 |
+
|
| 35 |
+
# Run API and UI locally (Mac fallback)
|
| 36 |
+
run-local:
|
| 37 |
+
@echo "Starting Enterprise RAG Locally (Safe Mode)..."
|
| 38 |
+
@export DISABLE_FAISS=1 && export KMP_DUPLICATE_LIB_OK=TRUE && export GROQ_API_KEY=${GROQ_API_KEY} && \
|
| 39 |
+
(uvicorn src.app.main:app --host 0.0.0.0 --port 8000 &) && \
|
| 40 |
+
(sleep 5 && streamlit run src/ui/app.py --server.port 8501)
|
Procfile
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
web: uvicorn src.app.main:app --host 0.0.0.0 --port $PORT
|
README.md
ADDED
|
@@ -0,0 +1,170 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: Enterprise RAG System
|
| 3 |
+
emoji: 🚀
|
| 4 |
+
colorFrom: blue
|
| 5 |
+
colorTo: indigo
|
| 6 |
+
sdk: docker
|
| 7 |
+
app_port: 8501
|
| 8 |
+
pinned: false
|
| 9 |
+
---
|
| 10 |
+
|
| 11 |
+
# Enterprise RAG System
|
| 12 |
+
|
| 13 |
+
An enterprise-grade Retrieval-Augmented Generation (RAG) system designed for high accuracy, safety, and scalability. This project demonstrates a production-ready pipeline with hybrid search, reranking, and strict guardrails against hallucinations.
|
| 14 |
+
|
| 15 |
+
## 🚀 Key Features
|
| 16 |
+
|
| 17 |
+
* **Hybrid Retrieval**: Combines **BM25** (Keyword) and **FAISS** (Dense Vector) search for optimal recall.
|
| 18 |
+
* **Context Reranking**: Utilizes `cross-encoder/ms-marco-MiniLM-L-6-v2` to precision-rank documents before generation.
|
| 19 |
+
* **Enterprise Guardrails**:
|
| 20 |
+
* **Refusal Logic**: Strictly refuses to answer if context is insufficient.
|
| 21 |
+
* **Halucination Detection**: Automated grading of Answer Relevancy and Groundedness.
|
| 22 |
+
* **Confidence Gating**: Blocks generation if retrieval scores are below a safety threshold.
|
| 23 |
+
* **Multi-Provider LLM**: Supports **Groq** (Llama-3), **vLLM**, and **OpenAI**.
|
| 24 |
+
* **Modern Stack**: Built with **FastAPI**, **Streamlit**, and **Docker**.
|
| 25 |
+
|
| 26 |
+
---
|
| 27 |
+
|
| 28 |
+
## 🛠️ Quick Start
|
| 29 |
+
|
| 30 |
+
### Prerequisites
|
| 31 |
+
* Docker Desktop (Recommended)
|
| 32 |
+
* Python 3.10+ (For local run)
|
| 33 |
+
* Groq API Key (or OpenAI/vLLM)
|
| 34 |
+
|
| 35 |
+
### 1. Configuration
|
| 36 |
+
Create a `.env` file in the root directory:
|
| 37 |
+
```bash
|
| 38 |
+
cp .env.example .env
|
| 39 |
+
```
|
| 40 |
+
Edit `.env` and add your API key:
|
| 41 |
+
```ini
|
| 42 |
+
GROQ_API_KEY=gsk_...
|
| 43 |
+
# Cloud Vector DB (Optional - Recommended for Deployment)
|
| 44 |
+
VECTOR_DB_TYPE=pinecone
|
| 45 |
+
PINECONE_API_KEY=pcsk_...
|
| 46 |
+
|
| 47 |
+
# Optional:
|
| 48 |
+
OPENAI_API_KEY=sk-...
|
| 49 |
+
```
|
| 50 |
+
|
| 51 |
+
### 2. Generate Data
|
| 52 |
+
The system needs data to function. Run the ingestion script to download real datasets (WikiQA, Multi-News, GovReport) and build the index:
|
| 53 |
+
```bash
|
| 54 |
+
make generate-data
|
| 55 |
+
make ingest
|
| 56 |
+
```
|
| 57 |
+
|
| 58 |
+
### 3. Run the Application
|
| 59 |
+
|
| 60 |
+
#### Option A: Docker (Recommended for Linux/Windows)
|
| 61 |
+
The most stable environment.
|
| 62 |
+
```bash
|
| 63 |
+
make up
|
| 64 |
+
```
|
| 65 |
+
* **UI**: [http://localhost:8501](http://localhost:8501)
|
| 66 |
+
* **API**: [http://localhost:8000/docs](http://localhost:8000/docs)
|
| 67 |
+
|
| 68 |
+
#### Option B: Local Safe Mode (Apple Silicon / Mac)
|
| 69 |
+
Use this if you encounter Docker connection issues or `Segmentation Fault` (FAISS/OpenMP conflicts). This mode disables FAISS and runs on **BM25 (Keyword Search) only**, ensuring stability.
|
| 70 |
+
```bash
|
| 71 |
+
make run-local
|
| 72 |
+
```
|
| 73 |
+
*(This starts both the FastAPI backend and Streamlit UI)*
|
| 74 |
+
|
| 75 |
+
---
|
| 76 |
+
|
| 77 |
+
## 📊 Evaluation
|
| 78 |
+
|
| 79 |
+
Verify the accuracy and safety of the system using the built-in evaluation suite.
|
| 80 |
+
|
| 81 |
+
### Run Evaluation (Local Safe Mode)
|
| 82 |
+
```bash
|
| 83 |
+
make eval-local
|
| 84 |
+
```
|
| 85 |
+
This script will:
|
| 86 |
+
1. Load the **WikiQA** test set.
|
| 87 |
+
2. Run the full RAG pipeline for each question.
|
| 88 |
+
3. Report:
|
| 89 |
+
* **Recall@10**: Retrieval effectiveness.
|
| 90 |
+
* **MRR**: Mean Reciprocal Rank.
|
| 91 |
+
* **Groundedness**: Frequency of hallucination checks passing.
|
| 92 |
+
* **Refusal Rate**: How often the system correctly refuses unknown questions.
|
| 93 |
+
|
| 94 |
+
## 🏆 Performance & Results
|
| 95 |
+
|
| 96 |
+
Tested on **WikiQA**, **Multi-News**, and **GovReport** datasets.
|
| 97 |
+
|
| 98 |
+
| Metric | Score | Description |
|
| 99 |
+
| :--- | :--- | :--- |
|
| 100 |
+
| **Recall@10** | **1.0000** | Perfect retrieval of relevant documents. |
|
| 101 |
+
| **MRR** | **1.0000** | Relevant document consistently ranked #1. |
|
| 102 |
+
| **Factuality** | **1.0000** | 100% of answers grounded in context. |
|
| 103 |
+
| **Safety** | **100%** | Successfully refuses to answer out-of-context queries. |
|
| 104 |
+
|
| 105 |
+
### 🛡️ Guardrails in Action
|
| 106 |
+
|
| 107 |
+
**1. Hallucination Prevention (Safety Layer)**
|
| 108 |
+

|
| 109 |
+
* **Scenario Refusal**: The system correctly identified that the retrieved context (about Howard Stern) was irrelevant to the "Airline Strike" query.
|
| 110 |
+
* **Low Confidence**: The retrieval score of `-9.73` triggered the safety gate (Threshold: `-4.0`), automatically blocking the generation.
|
| 111 |
+
* **Result**: Zero hallucination. The user receives a safe, honest refusal instead of a made-up answer.
|
| 112 |
+
|
| 113 |
+
**2. Enterprise Accuracy**
|
| 114 |
+

|
| 115 |
+
* **High Precision**: The query for "Emerging Contaminants" retrieved exact matches from the *GovReport* dataset.
|
| 116 |
+
* **Grounded Generation**: The answer is derived *strictly* from the text, listing specific chemicals (Perchlorate, TCE, etc.) mentioned in the document.
|
| 117 |
+
* **Verified**: Retrieval score of `4.82` shows high confidence, allowing the answer to pass.
|
| 118 |
+
|
| 119 |
+
---
|
| 120 |
+
|
| 121 |
+
## 🏗️ Architecture
|
| 122 |
+
|
| 123 |
+
```mermaid
|
| 124 |
+
flowchart TD
|
| 125 |
+
User([User Query]) --> Hybrid{Hybrid Retrieval}
|
| 126 |
+
|
| 127 |
+
subgraph Retrieval Layer
|
| 128 |
+
Hybrid -->|Lexical| BM25[BM25 Index]
|
| 129 |
+
Hybrid -->|Semantic| FAISS[FAISS Vector DB]
|
| 130 |
+
BM25 --> Candidates[Candidate Pool]
|
| 131 |
+
FAISS --> Candidates
|
| 132 |
+
end
|
| 133 |
+
|
| 134 |
+
Candidates --> Rerank[Cross-Encoder Reranker]
|
| 135 |
+
|
| 136 |
+
subgraph Safety Layer
|
| 137 |
+
Rerank --> Gate{Confidence Gate}
|
| 138 |
+
Gate -- Score < -4.0 --> Refusal([Refusal Response])
|
| 139 |
+
Gate -- Score >= -4.0 --> Context[Context Optimization]
|
| 140 |
+
end
|
| 141 |
+
|
| 142 |
+
subgraph Generation Layer
|
| 143 |
+
Context --> Prompt[Strict System Prompt]
|
| 144 |
+
Prompt --> LLM[LLM Inference]
|
| 145 |
+
LLM --> Guard[Hallucination Check]
|
| 146 |
+
Guard --> Final([Final Answer])
|
| 147 |
+
end
|
| 148 |
+
|
| 149 |
+
style Refusal fill:#ffcccc,stroke:#333
|
| 150 |
+
style Final fill:#ccffcc,stroke:#333
|
| 151 |
+
style Gate fill:#fff4e6,stroke:#ff9900
|
| 152 |
+
```
|
| 153 |
+
|
| 154 |
+
1. **Ingestion**: Documents are cleaned, chunked (Sliding Window), and indexed into **Faiss** (Vectors) and **BM25** (Keywords).
|
| 155 |
+
2. **Retrieval**: Queries retrieve candidates from both indices (`HybridRetriever`).
|
| 156 |
+
3. **Reranking**: A Cross-Encoder scores the relevance of each candidate pair (`Query, Doc`).
|
| 157 |
+
4. **Guardrails**:
|
| 158 |
+
* If `Max(Rerank Score) < -4.0`: **Refuse** immediately.
|
| 159 |
+
5. **Generation**: Top documents are passed to the LLM with a strict "Context-Only" system prompt.
|
| 160 |
+
6. **Validation**: Output is graded for Groundedness (Token Overlap) before being returned (in Evaluation mode).
|
| 161 |
+
|
| 162 |
+
## 📁 Project Structure
|
| 163 |
+
|
| 164 |
+
* `src/app`: FastAPI Backend
|
| 165 |
+
* `src/ui`: Streamlit Frontend
|
| 166 |
+
* `src/pipeline`: Core RAG Logic (`QueryPipeline.py`)
|
| 167 |
+
* `src/retriever`: Search Algorithms (`HybridRetriever.py`)
|
| 168 |
+
* `src/eval`: Scoring Metrics (`Hallucination`, `Relevancy`)
|
| 169 |
+
* `tools`: Scripts for dataset generation and evaluation
|
| 170 |
+
* `data`: Raw and Indexed data storage
|
app.py
ADDED
|
@@ -0,0 +1,173 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Hugging Face Spaces - Enterprise RAG System
|
| 3 |
+
Standalone Streamlit application
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import streamlit as st
|
| 7 |
+
import os
|
| 8 |
+
import sys
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
# Add src to path
|
| 13 |
+
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src'))
|
| 14 |
+
|
| 15 |
+
from dotenv import load_dotenv
|
| 16 |
+
load_dotenv()
|
| 17 |
+
|
| 18 |
+
from src.pipeline.query_pipeline import QueryPipeline
|
| 19 |
+
from src.ingestion.ingest import IngestionPipeline
|
| 20 |
+
import subprocess
|
| 21 |
+
|
| 22 |
+
def prepare_data():
|
| 23 |
+
"""Ensure data is generated and indexed if missing"""
|
| 24 |
+
INDEX_DIR = "data/index"
|
| 25 |
+
RAW_DIR = "data/raw"
|
| 26 |
+
|
| 27 |
+
# 1. Create directories
|
| 28 |
+
os.makedirs(INDEX_DIR, exist_ok=True)
|
| 29 |
+
os.makedirs(RAW_DIR, exist_ok=True)
|
| 30 |
+
|
| 31 |
+
# 2. Check if raw data exists (Only needed if NOT using Pinecone)
|
| 32 |
+
if os.getenv("VECTOR_DB_TYPE", "").lower() != "pinecone":
|
| 33 |
+
if not os.listdir(RAW_DIR):
|
| 34 |
+
st.error("❌ Data folder empty! Please commit your 'data/raw' folder to Git and redeploy.")
|
| 35 |
+
st.stop()
|
| 36 |
+
|
| 37 |
+
# 3. Check if indices exist, if not run ingestion (Skip for Pinecone)
|
| 38 |
+
# 3. Check if indices exist (Files should be baked in)
|
| 39 |
+
bm25_path = os.path.join(INDEX_DIR, "bm25.pkl")
|
| 40 |
+
|
| 41 |
+
# Only download if absolutely missing (Fallback for dev env)
|
| 42 |
+
if not os.path.exists(bm25_path):
|
| 43 |
+
with st.spinner("Downloading Knowledge Base (Dev Mode)..."):
|
| 44 |
+
try:
|
| 45 |
+
from huggingface_hub import hf_hub_download
|
| 46 |
+
os.makedirs(INDEX_DIR, exist_ok=True)
|
| 47 |
+
hf_hub_download(repo_id="yuvis/enterprise-rag-index", filename="index/bm25.pkl", repo_type="dataset", local_dir="data")
|
| 48 |
+
hf_hub_download(repo_id="yuvis/enterprise-rag-index", filename="index/doc_map.pkl", repo_type="dataset", local_dir="data")
|
| 49 |
+
except Exception:
|
| 50 |
+
pass
|
| 51 |
+
|
| 52 |
+
st.set_page_config(
|
| 53 |
+
page_title="Enterprise RAG Search",
|
| 54 |
+
page_icon="🔍",
|
| 55 |
+
layout="wide"
|
| 56 |
+
)
|
| 57 |
+
|
| 58 |
+
# Initialize pipeline
|
| 59 |
+
@st.cache_resource
|
| 60 |
+
def load_pipeline():
|
| 61 |
+
"""Load the RAG pipeline (cached for performance)"""
|
| 62 |
+
try:
|
| 63 |
+
# Ensure data is ready before initializing pipeline
|
| 64 |
+
prepare_data()
|
| 65 |
+
return QueryPipeline()
|
| 66 |
+
except Exception as e:
|
| 67 |
+
st.error(f"Error loading pipeline: {e}")
|
| 68 |
+
st.exception(e)
|
| 69 |
+
return None
|
| 70 |
+
|
| 71 |
+
# Main UI
|
| 72 |
+
st.title("🔍 Enterprise RAG Search")
|
| 73 |
+
st.markdown("*Production-grade Retrieval-Augmented Generation with Hallucination Prevention*")
|
| 74 |
+
|
| 75 |
+
# Sidebar configuration
|
| 76 |
+
with st.sidebar:
|
| 77 |
+
st.header("⚙️ Configuration")
|
| 78 |
+
st.caption("🚀 Version: Pinecone V2")
|
| 79 |
+
|
| 80 |
+
# Check for API key
|
| 81 |
+
groq_key = os.getenv("GROQ_API_KEY")
|
| 82 |
+
if not groq_key:
|
| 83 |
+
st.warning("⚠️ GROQ_API_KEY not set. Please configure in Space settings.")
|
| 84 |
+
else:
|
| 85 |
+
st.success("✅ API Key configured")
|
| 86 |
+
|
| 87 |
+
st.divider()
|
| 88 |
+
|
| 89 |
+
top_k_retrieval = st.slider("Retrieval Top-K", 5, 50, 20)
|
| 90 |
+
top_k_rerank = st.slider("Rerank Top-K", 1, 10, 5)
|
| 91 |
+
|
| 92 |
+
st.divider()
|
| 93 |
+
st.markdown("### 📊 System Info")
|
| 94 |
+
st.info("""
|
| 95 |
+
- **Hybrid Search**: BM25 + FAISS
|
| 96 |
+
- **Reranking**: Cross-Encoder
|
| 97 |
+
- **Safety**: Confidence Gating
|
| 98 |
+
""")
|
| 99 |
+
|
| 100 |
+
# Initialize session state
|
| 101 |
+
if "messages" not in st.session_state:
|
| 102 |
+
st.session_state.messages = []
|
| 103 |
+
|
| 104 |
+
# Display chat history
|
| 105 |
+
for message in st.session_state.messages:
|
| 106 |
+
with st.chat_message(message["role"]):
|
| 107 |
+
st.markdown(message["content"])
|
| 108 |
+
|
| 109 |
+
# Chat input
|
| 110 |
+
if prompt := st.chat_input("Ask a question about your documents..."):
|
| 111 |
+
# Add user message
|
| 112 |
+
st.session_state.messages.append({"role": "user", "content": prompt})
|
| 113 |
+
with st.chat_message("user"):
|
| 114 |
+
st.markdown(prompt)
|
| 115 |
+
|
| 116 |
+
# Generate response
|
| 117 |
+
with st.chat_message("assistant"):
|
| 118 |
+
with st.spinner("Searching and generating answer..."):
|
| 119 |
+
pipeline = load_pipeline()
|
| 120 |
+
|
| 121 |
+
if pipeline is None:
|
| 122 |
+
st.error("Pipeline not loaded. Please check configuration.")
|
| 123 |
+
else:
|
| 124 |
+
try:
|
| 125 |
+
result = pipeline.run(
|
| 126 |
+
query=prompt,
|
| 127 |
+
top_k_retrieval=top_k_retrieval,
|
| 128 |
+
top_k_rerank=top_k_rerank
|
| 129 |
+
)
|
| 130 |
+
|
| 131 |
+
# Display answer
|
| 132 |
+
st.markdown(result["answer"])
|
| 133 |
+
|
| 134 |
+
# Display metadata in expander
|
| 135 |
+
with st.expander("📋 View Details"):
|
| 136 |
+
col1, col2, col3 = st.columns(3)
|
| 137 |
+
|
| 138 |
+
with col1:
|
| 139 |
+
st.metric("Retrieval Score", f"{result.get('retrieval_score', 'N/A'):.2f}")
|
| 140 |
+
|
| 141 |
+
with col2:
|
| 142 |
+
hallucination = result.get('hallucination_score', 'N/A')
|
| 143 |
+
if hallucination != 'N/A':
|
| 144 |
+
st.metric("Hallucination Score", f"{hallucination:.2f}")
|
| 145 |
+
|
| 146 |
+
with col3:
|
| 147 |
+
groundedness = result.get('groundedness', 'N/A')
|
| 148 |
+
if groundedness != 'N/A':
|
| 149 |
+
st.metric("Groundedness", f"{groundedness:.2f}")
|
| 150 |
+
|
| 151 |
+
# Show retrieved context
|
| 152 |
+
if result.get("context"):
|
| 153 |
+
st.markdown("**Retrieved Context:**")
|
| 154 |
+
for i, (doc, score) in enumerate(result["context"][:3], 1):
|
| 155 |
+
st.markdown(f"{i}. [Score: {score:.2f}] {doc[:200]}...")
|
| 156 |
+
|
| 157 |
+
# Add to chat history
|
| 158 |
+
st.session_state.messages.append({
|
| 159 |
+
"role": "assistant",
|
| 160 |
+
"content": result["answer"]
|
| 161 |
+
})
|
| 162 |
+
|
| 163 |
+
except Exception as e:
|
| 164 |
+
st.error(f"Error generating response: {e}")
|
| 165 |
+
st.exception(e)
|
| 166 |
+
|
| 167 |
+
# Footer
|
| 168 |
+
st.divider()
|
| 169 |
+
st.markdown("""
|
| 170 |
+
<div style='text-align: center; color: gray; font-size: 0.8em;'>
|
| 171 |
+
Enterprise RAG System | <a href='https://github.com/YuvrajSinghBhadoria2/Enterprise-RAG-System'>GitHub</a>
|
| 172 |
+
</div>
|
| 173 |
+
""", unsafe_allow_html=True)
|
configs/default.yaml
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
app:
|
| 2 |
+
title: "Enterprise RAG Search"
|
| 3 |
+
host: "0.0.0.0"
|
| 4 |
+
port: 8080
|
| 5 |
+
|
| 6 |
+
retrieval:
|
| 7 |
+
top_k_retrieval: 20
|
| 8 |
+
top_k_rerank: 5
|
| 9 |
+
weights:
|
| 10 |
+
bm25: 0.3
|
| 11 |
+
dense: 0.7
|
| 12 |
+
|
| 13 |
+
embeddings:
|
| 14 |
+
model_name: "BAAI/bge-m3"
|
| 15 |
+
device: "cpu" # or cuda
|
| 16 |
+
|
| 17 |
+
reranker:
|
| 18 |
+
model_name: "cross-encoder/ms-marco-MiniLM-L-6-v2"
|
| 19 |
+
|
| 20 |
+
ingestion:
|
| 21 |
+
chunk_size: 512
|
| 22 |
+
chunk_overlap: 50
|
data/raw/finphrase_000.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Source: FinancialPhrasebank
|
| 2 |
+
|
| 3 |
+
According to Gran , the company has no plans to move all production to Russia , although that is where the company is growing .
|
data/raw/finphrase_001.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Source: FinancialPhrasebank
|
| 2 |
+
|
| 3 |
+
For the last quarter of 2010 , Componenta 's net sales doubled to EUR131m from EUR76m for the same period a year earlier , while it moved to a zero pre-tax profit from a pre-tax loss of EUR7m .
|
data/raw/finphrase_002.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Source: FinancialPhrasebank
|
| 2 |
+
|
| 3 |
+
In the third quarter of 2010 , net sales increased by 5.2 % to EUR 205.5 mn , and operating profit by 34.9 % to EUR 23.5 mn .
|
data/raw/finphrase_003.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Source: FinancialPhrasebank
|
| 2 |
+
|
| 3 |
+
Operating profit rose to EUR 13.1 mn from EUR 8.7 mn in the corresponding period in 2007 representing 7.7 % of net sales .
|
data/raw/finphrase_004.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Source: FinancialPhrasebank
|
| 2 |
+
|
| 3 |
+
Operating profit totalled EUR 21.1 mn , up from EUR 18.6 mn in 2007 , representing 9.7 % of net sales .
|
data/raw/finphrase_005.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Source: FinancialPhrasebank
|
| 2 |
+
|
| 3 |
+
Finnish Talentum reports its operating profit increased to EUR 20.5 mn in 2005 from EUR 9.3 mn in 2004 , and net sales totaled EUR 103.3 mn , up from EUR 96.4 mn .
|
data/raw/finphrase_006.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Source: FinancialPhrasebank
|
| 2 |
+
|
| 3 |
+
Clothing retail chain Sepp+ñl+ñ 's sales increased by 8 % to EUR 155.2 mn , and operating profit rose to EUR 31.1 mn from EUR 17.1 mn in 2004 .
|
data/raw/finphrase_007.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Source: FinancialPhrasebank
|
| 2 |
+
|
| 3 |
+
Consolidated net sales increased 16 % to reach EUR74 .8 m , while operating profit amounted to EUR0 .9 m compared to a loss of EUR0 .7 m in the prior year period .
|
data/raw/finphrase_008.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Source: FinancialPhrasebank
|
| 2 |
+
|
| 3 |
+
Foundries division reports its sales increased by 9.7 % to EUR 63.1 mn from EUR 57.5 mn in the corresponding period in 2006 , and sales of the Machine Shop division increased by 16.4 % to EUR 41.2 mn from EUR 35.4 mn in the corresponding period in 2006 .
|
data/raw/finphrase_009.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Source: FinancialPhrasebank
|
| 2 |
+
|
| 3 |
+
HELSINKI ( AFX ) - Shares closed higher , led by Nokia after it announced plans to team up with Sanyo to manufacture 3G handsets , and by Nokian Tyres after its fourth-quarter earnings report beat analysts ' expectations , dealers said .
|
data/raw/finphrase_010.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Source: FinancialPhrasebank
|
| 2 |
+
|
| 3 |
+
Its board of directors will propose a dividend of EUR0 .12 per share for 2010 , up from the EUR0 .08 per share paid in 2009 .
|
data/raw/finphrase_011.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Source: FinancialPhrasebank
|
| 2 |
+
|
| 3 |
+
MegaFon 's subscriber base increased 16.1 % in 2009 to 50.5 million users as of December 31 , while its market share by the number of customers amounted to 24 % as of late 2009 , up from 23 % as of late 2008 , according to TeliaSonera estimates .
|
data/raw/finphrase_012.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Source: FinancialPhrasebank
|
| 2 |
+
|
| 3 |
+
Net income from life insurance doubled to EUR 6.8 mn from EUR 3.2 mn , and net income from non-life insurance rose to EUR 5.2 mn from EUR 1.5 mn in the corresponding period in 2009 .
|
data/raw/finphrase_013.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Source: FinancialPhrasebank
|
| 2 |
+
|
| 3 |
+
Net sales increased to EUR193 .3 m from EUR179 .9 m and pretax profit rose by 34.2 % to EUR43 .1 m. ( EUR1 = USD1 .4 )
|
data/raw/finphrase_014.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Source: FinancialPhrasebank
|
| 2 |
+
|
| 3 |
+
Net sales surged by 18.5 % to EUR167 .8 m. Teleste said that EUR20 .4 m , or 12.2 % , of the sales came from the acquisitions made in 2009 .
|
data/raw/finphrase_015.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Source: FinancialPhrasebank
|
| 2 |
+
|
| 3 |
+
Nordea Group 's operating profit increased in 2010 by 18 percent year-on-year to 3.64 billion euros and total revenue by 3 percent to 9.33 billion euros .
|
data/raw/finphrase_016.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Source: FinancialPhrasebank
|
| 2 |
+
|
| 3 |
+
Operating profit for the nine-month period increased from EUR13 .6 m , while net sales increased from EUR394 .7 m , as compared to the corresponding period in 2005 .
|
data/raw/finphrase_017.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Source: FinancialPhrasebank
|
| 2 |
+
|
| 3 |
+
Operating profit for the three-month period increased from EUR1 .2 m , while revenue increased from EUR20 .2 m , as compared to the corresponding period in 2005 .
|
data/raw/finphrase_018.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Source: FinancialPhrasebank
|
| 2 |
+
|
| 3 |
+
The company 's net profit rose 11.4 % on the year to 82.2 million euros in 2005 on sales of 686.5 million euros , 13.8 % up on the year , the company said earlier .
|
data/raw/finphrase_019.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Source: FinancialPhrasebank
|
| 2 |
+
|
| 3 |
+
The Lithuanian beer market made up 14.41 million liters in January , a rise of 0.8 percent from the year-earlier figure , the Lithuanian Brewers ' Association reporting citing the results from its members .
|
data/raw/finphrase_020.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Source: FinancialPhrasebank
|
| 2 |
+
|
| 3 |
+
Viking Line 's cargo revenue increased by 5.4 % to EUR 21.46 mn , and cargo volume increased by 2.4 % to 70,116 cargo units .
|
data/raw/finphrase_021.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Source: FinancialPhrasebank
|
| 2 |
+
|
| 3 |
+
The fair value of the property portfolio doubled as a result of the Kapiteeli acquisition and totalled EUR 2,686.2 1,259.7 million .
|
data/raw/finphrase_022.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Source: FinancialPhrasebank
|
| 2 |
+
|
| 3 |
+
10 February 2011 - Finnish media company Sanoma Oyj HEL : SAA1V said yesterday its 2010 net profit almost tripled to EUR297 .3 m from EUR107 .1 m for 2009 and announced a proposal for a raised payout .
|
data/raw/finphrase_023.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Source: FinancialPhrasebank
|
| 2 |
+
|
| 3 |
+
A Helsinki : ELIiV today reported EPS of EUR1 .13 for 2009 , an increase over EPS of EUR1 .12 in 2008 .
|
data/raw/finphrase_024.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Source: FinancialPhrasebank
|
| 2 |
+
|
| 3 |
+
Commission income increased by 22 % to EUR 4.4 mn , and lending volume rose by 13.5 % .
|
data/raw/finphrase_025.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Source: FinancialPhrasebank
|
| 2 |
+
|
| 3 |
+
In January , traffic , measured in revenue passenger kilometres RPK , went up by 3.2 % and capacity , measured in available seat kilometres ASK , rose by 12.2 % .
|
data/raw/finphrase_026.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Source: FinancialPhrasebank
|
| 2 |
+
|
| 3 |
+
In January-September 2010 , Fiskars ' net profit went up by 14 % year-on-year to EUR 65.4 million and net sales to EUR 525.3 million from EUR 487.7 million .
|
data/raw/finphrase_027.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Source: FinancialPhrasebank
|
| 2 |
+
|
| 3 |
+
Net income from life insurance rose to EUR 16.5 mn from EUR 14.0 mn , and net income from non-life insurance to EUR 22.6 mn from EUR 15.2 mn in 2009 .
|
data/raw/finphrase_028.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Source: FinancialPhrasebank
|
| 2 |
+
|
| 3 |
+
Sales have risen in other export markets .
|
data/raw/finphrase_029.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Source: FinancialPhrasebank
|
| 2 |
+
|
| 3 |
+
Sales increased due to growing market rates and increased operations .
|
data/raw/finphrase_030.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Source: FinancialPhrasebank
|
| 2 |
+
|
| 3 |
+
The agreement strengthens our long-term partnership with Nokia Siemens Networks .
|
data/raw/finphrase_031.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Source: FinancialPhrasebank
|
| 2 |
+
|
| 3 |
+
The company 's order book stood at 1.5 bln euro $ 2.2 bln on September 30 , 2007 , up by 24.2 pct on the year , with international orders amounting to 365 mln euro $ 534.3 mln .
|
data/raw/finphrase_032.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Source: FinancialPhrasebank
|
| 2 |
+
|
| 3 |
+
The company said that paper demand increased in all of its main markets , including of publication papers , and that it increased average paper prices by 4 percent compared with last year .
|
data/raw/finphrase_033.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Source: FinancialPhrasebank
|
| 2 |
+
|
| 3 |
+
The world 's second largest stainless steel maker said net profit in the three-month period until Dec. 31 surged to euro603 million US$ 781 million , or euro3 .33 US$ 4.31 per share , from euro172 million , or euro0 .94 per share , the previous year .
|
data/raw/finphrase_034.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Source: FinancialPhrasebank
|
| 2 |
+
|
| 3 |
+
Shares of Standard Chartered ( STAN ) rose 1.2 % in the FTSE 100 , while Royal Bank of Scotland ( RBS ) shares rose 2 % and Barclays shares ( BARC ) ( BCS ) were up 1.7 % .
|
data/raw/finphrase_035.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Source: FinancialPhrasebank
|
| 2 |
+
|
| 3 |
+
Shares of Nokia Corp. rose Thursday after the cell phone maker said its third-quarter earnings almost doubled and its share of the global handset market increased .
|