yuvrajsingh6 commited on
Commit
4d592a4
·
1 Parent(s): d43e9d6

feat: RAG system with OCR for Hugging Face Spaces

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitignore +7 -0
  2. README.md +426 -5
  3. backend/Dockerfile +25 -0
  4. backend/app/__init__.py +0 -0
  5. backend/app/api/__init__.py +3 -0
  6. backend/app/api/v1/__init__.py +5 -0
  7. backend/app/api/v1/routes/__init__.py +12 -0
  8. backend/app/api/v1/routes/documents.py +70 -0
  9. backend/app/api/v1/routes/health.py +14 -0
  10. backend/app/api/v1/routes/query.py +87 -0
  11. backend/app/api/v1/routes/upload.py +87 -0
  12. backend/app/config.py +47 -0
  13. backend/app/main.py +52 -0
  14. backend/app/models/__init__.py +0 -0
  15. backend/app/models/schemas.py +71 -0
  16. backend/app/services/__init__.py +19 -0
  17. backend/app/services/confidence.py +61 -0
  18. backend/app/services/embeddings.py +33 -0
  19. backend/app/services/enhanced_llm.py +267 -0
  20. backend/app/services/llm_service.py +104 -0
  21. backend/app/services/pdf_processor.py +82 -0
  22. backend/app/services/prompt_guard.py +51 -0
  23. backend/app/services/retriever.py +83 -0
  24. backend/app/services/vector_store.py +75 -0
  25. backend/app/services/web_search.py +416 -0
  26. backend/app/utils/__init__.py +10 -0
  27. backend/app/utils/chunking.py +48 -0
  28. backend/app/utils/rate_limiter.py +199 -0
  29. backend/reproduce_query.py +79 -0
  30. backend/reproduce_upload.py +20 -0
  31. backend/requirements.txt +20 -0
  32. frontend/.env.example +1 -0
  33. frontend/.env.local +1 -0
  34. frontend/README.md +87 -0
  35. frontend/index.html +13 -0
  36. frontend/package-lock.json +0 -0
  37. frontend/package.json +29 -0
  38. frontend/postcss.config.js +6 -0
  39. frontend/public/vite.svg +1 -0
  40. frontend/src/App.tsx +20 -0
  41. frontend/src/components/common/EmptyState.tsx +77 -0
  42. frontend/src/components/documents/DocumentCard.tsx +97 -0
  43. frontend/src/components/documents/DocumentList.tsx +48 -0
  44. frontend/src/components/documents/FileUpload.tsx +83 -0
  45. frontend/src/components/layout/Header.tsx +56 -0
  46. frontend/src/components/layout/MainContent.tsx +25 -0
  47. frontend/src/components/layout/Sidebar.tsx +19 -0
  48. frontend/src/components/query/ModeSelector.tsx +53 -0
  49. frontend/src/components/query/QueryInput.tsx +125 -0
  50. frontend/src/components/results/AnswerCard.tsx +70 -0
.gitignore ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ backend/.env
2
+ backend/.env.bak
3
+ backend/storage/
4
+ frontend/node_modules/
5
+ frontend/dist/
6
+ *.pyc
7
+ __pycache__/
README.md CHANGED
@@ -1,10 +1,431 @@
1
  ---
2
- title: Web Based Rag
3
- emoji: 💻
4
- colorFrom: pink
5
- colorTo: blue
6
  sdk: docker
7
  pinned: false
8
  ---
9
 
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: Web-Based RAG System
3
+ emoji: 📚
4
+ colorFrom: blue
5
+ colorTo: indigo
6
  sdk: docker
7
  pinned: false
8
  ---
9
 
10
+ # Web-Based RAG System
11
+
12
+ A production-ready Retrieval-Augmented Generation (RAG) system that combines PDF document processing and web search capabilities to provide intelligent answers to user queries.
13
+
14
+ ## Table of Contents
15
+ - [Features](#features)
16
+ - [Tech Stack](#tech-stack)
17
+ - [Architecture](#architecture)
18
+ - [Installation](#installation)
19
+ - [Configuration](#configuration)
20
+ - [Usage](#usage)
21
+ - [Project Structure](#project-structure)
22
+ - [API Endpoints](#api-endpoints)
23
+ - [Frontend Components](#frontend-components)
24
+ - [Contributing](#contributing)
25
+ - [License](#license)
26
+
27
+ ## Features
28
+
29
+ - **Multi-Modal Query Processing**: Supports queries against both uploaded PDF documents and live web search
30
+ - **PDF Document Management**: Upload, store, and process PDF documents with advanced extraction techniques
31
+ - **OCR Support for Scanned PDFs**: Automatically extracts text from image-based/scanned PDFs using Tesseract OCR
32
+ - **Hybrid Search**: Combine PDF-based retrieval with web search for comprehensive answers
33
+ - **Confidence Scoring**: Provides confidence scores for generated responses
34
+ - **Vector Storage**: Efficient similarity search using ChromaDB vector database
35
+ - **Modern UI**: Responsive React-based frontend with intuitive user experience
36
+ - **RESTful API**: Well-documented API endpoints for easy integration
37
+ - **File Upload**: Drag-and-drop PDF upload functionality
38
+ - **Query Modes**: Different query modes (PDF-only, Web-only, Hybrid, Restricted)
39
+
40
+ ## Tech Stack
41
+
42
+ ### Backend
43
+ - **Framework**: FastAPI (Python)
44
+ - **Database**: ChromaDB (Vector Database)
45
+ - **Embeddings**: Sentence Transformers
46
+ - **Language**: Python 3.11+
47
+ - **Web Framework**: FastAPI with Uvicorn ASGI server
48
+ - **HTTP Client**: aiohttp
49
+ - **PDF Processing**: PyPDF, pdfplumber, pdf2image, pytesseract
50
+ - **OCR**: Tesseract for scanned/image-based PDFs
51
+ - **LLM Integration**: Groq API
52
+ - **Environment Management**: python-dotenv
53
+ - **Data Validation**: Pydantic
54
+
55
+ ### Frontend
56
+ - **Framework**: React 18+
57
+ - **Language**: TypeScript
58
+ - **Styling**: Tailwind CSS
59
+ - **Build Tool**: Vite
60
+ - **HTTP Client**: Axios
61
+ - **UI Components**: Custom-built with Lucide React icons
62
+ - **File Upload**: react-dropzone
63
+ - **Notifications**: react-hot-toast
64
+
65
+ ## Architecture
66
+
67
+ The application follows a microservices architecture with a clear separation between frontend and backend:
68
+
69
+ ```
70
+ ┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐
71
+ │ Frontend │ │ Backend │ │ External │
72
+ │ (React) │◄──►│ (FastAPI) │◄──►│ Services │
73
+ │ │ │ │ │ │
74
+ │ • User Interface│ │ • API Gateway │ │ • Groq API │
75
+ │ • File Upload │ │ • PDF Processor │ │ • Web Search │
76
+ │ • Query Input │ │ • Embedding │ │ • Vector Store │
77
+ │ • Results Display│ │ • Retriever │ │ │
78
+ └─────────────────┘ │ • LLM Service │ └─────────────────┘
79
+ └─────────────────┘
80
+ ```
81
+
82
+ ## Installation
83
+
84
+ ### Prerequisites
85
+ - Python 3.11+
86
+ - Node.js 18+
87
+ - npm or yarn
88
+ - Git
89
+ - **Tesseract OCR** (for scanned PDF support):
90
+ - macOS: `brew install tesseract poppler`
91
+ - Ubuntu: `sudo apt-get install tesseract-ocr poppler-utils`
92
+ - Windows: Download from https://github.com/tesseract-ocr/tesseract
93
+
94
+ ### Backend Setup
95
+
96
+ 1. Clone the repository:
97
+ ```bash
98
+ git clone https://github.com/YuvrajSinghBhadoria2/web_based_rag.git
99
+ cd web_based_rag/backend
100
+ ```
101
+
102
+ 2. Create a virtual environment:
103
+ ```bash
104
+ python -m venv venv
105
+ source venv/bin/activate # On Windows: venv\Scripts\activate
106
+ ```
107
+
108
+ 3. Install dependencies:
109
+ ```bash
110
+ pip install -r requirements.txt
111
+ ```
112
+
113
+ 4. Create a `.env` file in the backend directory based on `.env.bak`:
114
+ ```bash
115
+ cp .env.bak .env
116
+ ```
117
+
118
+ ### Frontend Setup
119
+
120
+ 1. Navigate to the frontend directory:
121
+ ```bash
122
+ cd ../frontend
123
+ ```
124
+
125
+ 2. Install dependencies:
126
+ ```bash
127
+ npm install
128
+ ```
129
+
130
+ ## Configuration
131
+
132
+ ### Backend Environment Variables
133
+
134
+ Create a `.env` file in the backend directory with the following variables:
135
+
136
+ ```env
137
+ GROQ_API_KEY=your_groq_api_key_here
138
+ SERPER_API_KEY=your_serper_api_key_here # Optional - for web search
139
+ TAVILY_API_KEY=your_tavily_api_key_here # Optional - for web search
140
+ CHROMA_DB_PATH=./storage/vector_db
141
+ UPLOAD_DIR=./storage/uploads
142
+ MODEL_NAME=llama3-70b-8192
143
+ TEMPERATURE=0.1
144
+ MAX_TOKENS=1000
145
+ TOP_P=1
146
+ STOP_TOKENS=["\n", "###"]
147
+ CORS_ORIGINS=["http://localhost:5173", "http://localhost:3000", "http://127.0.0.1:5173", "http://127.0.0.1:3000", "http://localhost:5175"]
148
+ ```
149
+
150
+ Replace `your_groq_api_key_here` with your actual Groq API key. You can get one from [Groq Cloud](https://console.groq.com/keys).
151
+
152
+ For web search functionality, add Serper or Tavily API keys (optional - without them, hybrid mode will only use PDF sources).
153
+
154
+ ## Usage
155
+
156
+ ### Running the Backend
157
+
158
+ 1. Make sure you're in the backend directory
159
+ 2. Activate your virtual environment
160
+ 3. Start the backend server:
161
+ ```bash
162
+ uvicorn app.main:app --reload --host 0.0.0.0 --port 8000
163
+ ```
164
+
165
+ The backend will be available at `http://localhost:8000` with API documentation at `http://localhost:8000/api/docs`.
166
+
167
+ ### Running the Frontend
168
+
169
+ 1. Navigate to the frontend directory
170
+ 2. Start the development server:
171
+ ```bash
172
+ npm run dev
173
+ ```
174
+
175
+ The frontend will be available at `http://localhost:5173`.
176
+
177
+ ### Application Workflow
178
+
179
+ 1. **Upload Documents**: Use the drag-and-drop interface to upload PDF documents
180
+ 2. **Select Query Mode**: Choose between PDF-only, Web-only, Hybrid, or Restricted modes
181
+ 3. **Enter Query**: Type your question in the query input
182
+ 4. **Get Response**: Receive an AI-generated answer with confidence score and source citations
183
+ 5. **Review Sources**: View the documents and web pages that contributed to the response
184
+
185
+ ### OCR for Scanned PDFs
186
+
187
+ The system automatically detects and processes scanned/image-based PDFs using Tesseract OCR:
188
+ - If a PDF contains selectable text, it uses the native text extraction
189
+ - If no text is found, it automatically applies OCR to extract text from images
190
+ - Works with scanned documents, image-only PDFs, and documents with mixed content
191
+
192
+ ## Project Structure
193
+
194
+ ```
195
+ web_based_rag/
196
+ ├── backend/
197
+ │ ├── app/
198
+ │ │ ├── api/
199
+ │ │ │ └── v1/
200
+ │ │ │ └── routes/
201
+ │ │ │ ├── documents.py # Document management endpoints
202
+ │ │ │ ├── health.py # Health check endpoint
203
+ │ │ │ ├── query.py # Query processing endpoints
204
+ │ │ │ └── upload.py # File upload endpoints
205
+ │ │ ├── core/ # Core utilities and configurations
206
+ │ │ ├── models/
207
+ │ │ │ └── schemas.py # Pydantic models and schemas
208
+ │ │ ├── services/
209
+ │ │ │ ├── confidence.py # Confidence scoring service
210
+ │ │ │ ├── embeddings.py # Embedding generation service
211
+ │ │ │ ├── enhanced_llm.py # Enhanced LLM service
212
+ │ │ │ ├── llm_service.py # LLM integration service
213
+ │ │ │ ├── pdf_processor.py # PDF processing service
214
+ │ │ │ ├── prompt_guard.py # Prompt safety service
215
+ │ │ │ ├── retriever.py # Information retrieval service
216
+ │ │ │ ├── vector_store.py # Vector database operations
217
+ │ │ │ └── web_search.py # Web search service
218
+ │ │ ├── utils/
219
+ │ │ │ ├── chunking.py # Text chunking utilities
220
+ │ │ │ └── rate_limiter.py # Rate limiting utilities
221
+ │ │ ├── config.py # Configuration settings
222
+ │ │ └── main.py # Application entry point
223
+ │ ├── storage/
224
+ │ │ ├── uploads/ # Uploaded PDF files
225
+ │ │ ├── vector_db/ # Vector database files
226
+ │ │ └── documents.json # Document metadata
227
+ │ ├── requirements.txt # Python dependencies
228
+ │ ├── Dockerfile # Docker configuration
229
+ │ └── .env.bak # Environment variables template
230
+ └── frontend/
231
+ ├── src/
232
+ │ ├── components/
233
+ │ │ ├── common/ # Reusable UI components
234
+ │ │ ├── documents/ # Document-related components
235
+ │ │ ├── layout/ # Layout components
236
+ │ │ ├── query/ # Query input components
237
+ │ │ ├── results/ # Results display components
238
+ │ │ └── settings/ # Settings modal components
239
+ │ ├── context/
240
+ │ │ └── AppContext.tsx # Application state management
241
+ │ ├── services/
242
+ │ │ └── api.ts # API service client
243
+ │ ├── types/
244
+ │ │ └── index.ts # Type definitions
245
+ │ ├── App.tsx # Main application component
246
+ │ └── main.tsx # Application entry point
247
+ ├── package.json
248
+ ├── tsconfig.json
249
+ ├── tailwind.config.js
250
+ └── vite.config.ts
251
+ ```
252
+
253
+ ## API Endpoints
254
+
255
+ ### Health Check
256
+ - `GET /` - Root endpoint returning API information
257
+
258
+ ### Documents
259
+ - `GET /api/v1/documents` - Get list of uploaded documents
260
+ - `DELETE /api/v1/documents/{document_id}` - Delete a document
261
+
262
+ ### File Upload
263
+ - `POST /api/v1/upload` - Upload PDF document
264
+
265
+ ### Query
266
+ - `POST /api/v1/query` - Process query with specified mode
267
+ - Request body: `{"query": "your query", "mode": "pdf|web|hybrid|restricted", "document_ids": ["optional document IDs"]}`
268
+ - Response: `{"response": "answer", "sources": [], "confidence": 0.85}`
269
+
270
+ ### Additional Endpoints
271
+ - `GET /api/docs` - Interactive API documentation (Swagger UI)
272
+ - `GET /api/redoc` - Alternative API documentation (ReDoc)
273
+
274
+ ## Frontend Components
275
+
276
+ ### Layout Components
277
+ - **Header**: Navigation and branding
278
+ - **Sidebar**: Document management and settings
279
+ - **MainContent**: Primary content area
280
+
281
+ ### Document Components
282
+ - **FileUpload**: Drag-and-drop PDF upload
283
+ - **DocumentList**: Display of uploaded documents
284
+ - **DocumentCard**: Individual document information
285
+
286
+ ### Query Components
287
+ - **QueryInput**: Input field with mode selector
288
+ - **ModeSelector**: Options for PDF-only, Web-only, Hybrid, or Restricted queries
289
+
290
+ ### Results Components
291
+ - **ResultsDisplay**: Container for query results
292
+ - **AnswerCard**: Display of the AI-generated answer
293
+ - **SourcesList**: List of source documents
294
+ - **SourceCard**: Detailed source information
295
+ - **ConfidenceIndicator**: Visual representation of response confidence
296
+
297
+ ### Settings Components
298
+ - **SettingsModal**: Configuration options
299
+
300
+ ## Contributing
301
+
302
+ 1. Fork the repository
303
+ 2. Create a feature branch (`git checkout -b feature/amazing-feature`)
304
+ 3. Commit your changes (`git commit -m 'Add some amazing feature'`)
305
+ 4. Push to the branch (`git push origin feature/amazing-feature`)
306
+ 5. Open a Pull Request
307
+
308
+ ## Deploying to Hugging Face Spaces
309
+
310
+ This application is configured for deployment on Hugging Face Spaces using the Docker SDK. The repository includes:
311
+
312
+ - A `Dockerfile` that sets up the complete environment
313
+ - A `README.md` with proper Hugging Face metadata
314
+ - All necessary backend and frontend code
315
+
316
+ To deploy to your Space:
317
+
318
+ 1. Create a new Space with the Docker SDK
319
+ 2. Point it to this repository
320
+ 3. Add your API keys as Space Secrets:
321
+ - `GROQ_API_KEY`: Your Groq API key
322
+ 4. The Space will automatically build and deploy using the Dockerfile
323
+
324
+ Your application will be served at the port specified in the Dockerfile (7860).
325
+
326
+ ### Option 1: Using the Docker Image
327
+
328
+ 1. Create a new Space on Hugging Face with the following settings:
329
+ - **Space SDK**: Docker
330
+ - **Hardware**: Choose based on your needs (GPU recommended for better performance)
331
+
332
+ 2. Add your Hugging Face token and API keys as secrets in the Space settings:
333
+ - `HF_TOKEN`: Your Hugging Face token (`your_hf_token_here`)
334
+ - `GROQ_API_KEY`: Your Groq API key
335
+ - Any other required API keys
336
+
337
+ 3. Create a `Dockerfile` in your Space repository with the following content:
338
+
339
+ ```dockerfile
340
+ FROM python:3.11-slim
341
+
342
+ WORKDIR /app
343
+
344
+ # Install nodejs for the frontend
345
+ RUN apt-get update && apt-get install -y nodejs npm && apt-get clean
346
+
347
+ # Copy backend requirements and install Python dependencies
348
+ COPY backend/requirements.txt .
349
+ RUN pip install --no-cache-dir -r requirements.txt
350
+
351
+ # Install frontend dependencies
352
+ COPY frontend/package*.json ./frontend/
353
+ RUN cd frontend && npm ci --only=production
354
+
355
+ # Copy the rest of the application
356
+ COPY . .
357
+
358
+ # Build the frontend
359
+ RUN cd frontend && npm run build
360
+
361
+ # Expose the port Hugging Face Spaces expects
362
+ EXPOSE 7860
363
+
364
+ # Start both backend and frontend
365
+ CMD bash -c "cd backend && python -m uvicorn app.main:app --host 0.0.0.0 --port 7860 & cd frontend && npx serve -s dist -l 7861"
366
+ ```
367
+
368
+ 4. Create an `.env` file in the backend directory with your API keys:
369
+
370
+ ```env
371
+ GROQ_API_KEY=your_groq_api_key_here
372
+ # Add other required environment variables
373
+ ```
374
+
375
+ ### Option 2: Deploying Your Existing React Frontend (Recommended)
376
+
377
+ To deploy your existing React frontend along with the FastAPI backend (this preserves your original UI):
378
+
379
+ 1. In your Hugging Face Space repository, copy your entire project
380
+
381
+ 2. Create a Dockerfile that builds and serves both applications:
382
+
383
+ ```dockerfile
384
+ FROM node:18-alpine AS frontend-build
385
+ WORKDIR /app
386
+ COPY frontend/package*.json .
387
+ RUN npm ci
388
+ COPY frontend/ .
389
+ RUN npm run build
390
+
391
+ FROM python:3.11-slim AS backend-build
392
+ WORKDIR /app
393
+
394
+ # Install dependencies
395
+ COPY backend/requirements.txt .
396
+ RUN pip install --no-cache-dir -r requirements.txt
397
+
398
+ # Copy application code
399
+ COPY backend/ .
400
+
401
+ # Copy built frontend
402
+ COPY --from=frontend-build /app/dist ./frontend/dist
403
+
404
+ # Install node for serving frontend
405
+ RUN apt-get update && apt-get install -y nodejs npm && apt-get clean
406
+
407
+ EXPOSE 7860
408
+
409
+ CMD python -m uvicorn app.main:app --host 0.0.0.0 --port 7860
410
+ ```
411
+
412
+ 3. Update your backend CORS settings in `backend/app/config.py` to allow the Hugging Face Space URL
413
+
414
+ 4. Add your API keys as Space secrets:
415
+ - `GROQ_API_KEY`: Your Groq API key
416
+ - Other required API keys
417
+
418
+ Note: This approach maintains your original React interface which is more feature-rich than a Gradio interface. Your existing frontend with its document cards, sidebar, settings modal, and responsive design will be preserved.
419
+
420
+ ## Deployment Steps
421
+
422
+ 1. Create a new repository on Hugging Face Spaces
423
+ 2. Push your code to the repository
424
+ 3. Add your API keys as secrets in the Space settings
425
+ 4. The application will automatically build and deploy
426
+
427
+ Your RAG application is now ready for deployment on Hugging Face Spaces with your Hugging Face token: `your_hf_token_here`
428
+
429
+ ## License
430
+
431
+ This project is licensed under the MIT License - see the LICENSE file for details.
backend/Dockerfile ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ WORKDIR /app
4
+
5
+ RUN apt-get update && apt-get install -y \
6
+ build-essential \
7
+ tesseract-ocr \
8
+ poppler-utils \
9
+ nodejs \
10
+ npm \
11
+ && rm -rf /var/lib/apt/lists/*
12
+
13
+ COPY backend/requirements.txt .
14
+ RUN pip install --no-cache-dir -r requirements.txt
15
+
16
+ COPY backend/ ./backend/
17
+ COPY frontend/ ./frontend/
18
+
19
+ RUN cd frontend && npm install && npm run build
20
+
21
+ RUN mkdir -p /app/storage/uploads /app/storage/vector_db
22
+
23
+ EXPOSE 7860
24
+
25
+ CMD ["bash", "-c", "cd backend && uvicorn app.main:app --host 0.0.0.0 --port 7860 & cd frontend && npx serve -s dist -l 7861"]
backend/app/__init__.py ADDED
File without changes
backend/app/api/__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ from app.api.v1 import api_router
2
+
3
+ __all__ = ["api_router"]
backend/app/api/v1/__init__.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ from fastapi import APIRouter
2
+ from app.api.v1.routes import router
3
+
4
+ api_router = APIRouter()
5
+ api_router.include_router(router, prefix="/v1")
backend/app/api/v1/routes/__init__.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter
2
+ from .upload import router as upload_router
3
+ from .query import router as query_router
4
+ from .documents import router as documents_router
5
+ from .health import router as health_router
6
+
7
+ router = APIRouter()
8
+
9
+ router.include_router(upload_router)
10
+ router.include_router(query_router)
11
+ router.include_router(documents_router)
12
+ router.include_router(health_router)
backend/app/api/v1/routes/documents.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter
2
+ from app.models.schemas import DocumentListResponse, Document
3
+ from app.services.vector_store import vector_store
4
+ from pathlib import Path
5
+ from datetime import datetime
6
+ import json
7
+
8
+ router = APIRouter(prefix="/documents", tags=["documents"])
9
+
10
+ DOCUMENTS_DB = "./storage/documents.json"
11
+
12
+
13
+ def load_documents_db():
14
+ if Path(DOCUMENTS_DB).exists():
15
+ with open(DOCUMENTS_DB, "r") as f:
16
+ return json.load(f)
17
+ return {}
18
+
19
+
20
+ def save_documents_db(documents):
21
+ Path(DOCUMENTS_DB).parent.mkdir(exist_ok=True, parents=True)
22
+ with open(DOCUMENTS_DB, "w") as f:
23
+ json.dump(documents, f, default=str)
24
+
25
+
26
+ @router.get("/", response_model=DocumentListResponse)
27
+ async def list_documents():
28
+ documents_db = load_documents_db()
29
+
30
+ documents = []
31
+ for doc_id, doc_data in documents_db.items():
32
+ documents.append(
33
+ Document(
34
+ id=doc_id,
35
+ filename=doc_data.get("filename", "Unknown"),
36
+ upload_date=datetime.fromisoformat(
37
+ doc_data.get("upload_date", datetime.utcnow().isoformat())
38
+ ),
39
+ chunk_count=doc_data.get("chunk_count", 0),
40
+ file_size=doc_data.get("file_size", 0),
41
+ status=doc_data.get("status", "ready"),
42
+ )
43
+ )
44
+
45
+ return DocumentListResponse(
46
+ documents=sorted(documents, key=lambda x: x.upload_date, reverse=True),
47
+ total=len(documents),
48
+ )
49
+
50
+
51
+ @router.delete("/{document_id}")
52
+ async def delete_document(document_id: str):
53
+ documents_db = load_documents_db()
54
+
55
+ if document_id not in documents_db:
56
+ return {"message": "Document not found"}
57
+
58
+ file_path = Path(f"./storage/uploads/{document_id}.pdf")
59
+ if file_path.exists():
60
+ file_path.unlink()
61
+
62
+ del documents_db[document_id]
63
+ save_documents_db(documents_db)
64
+
65
+ try:
66
+ await vector_store.delete_document(document_id)
67
+ except:
68
+ pass
69
+
70
+ return {"message": "Document deleted"}
backend/app/api/v1/routes/health.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter
2
+ from app.models.schemas import HealthResponse
3
+
4
+ router = APIRouter(prefix="/health", tags=["health"])
5
+
6
+
7
+ @router.get("/", response_model=HealthResponse)
8
+ async def health_check():
9
+ return HealthResponse(
10
+ status="healthy",
11
+ embedding_model="all-MiniLM-L6-v2",
12
+ vector_db="chromadb",
13
+ llm="groq",
14
+ )
backend/app/api/v1/routes/query.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, HTTPException
2
+ from app.models.schemas import QueryRequest, QueryResponse
3
+ from app.services.retriever import retriever_service
4
+ from app.services.enhanced_llm import enhanced_llm_service
5
+ from app.services.prompt_guard import prompt_guard
6
+ from app.services.confidence import confidence_service
7
+ from datetime import datetime
8
+ import time
9
+
10
+ router = APIRouter(prefix="/query", tags=["query"])
11
+
12
+
13
+ @router.post("/", response_model=QueryResponse)
14
+ async def query_documents(request: QueryRequest):
15
+ start_time = time.time()
16
+
17
+ # Validate restricted mode
18
+ try:
19
+ if request.mode.value == "restricted":
20
+ await prompt_guard.validate_input(request.query, request.restrictions)
21
+ except Exception as e:
22
+ raise HTTPException(status_code=400, detail=str(e))
23
+
24
+ # Retrieve sources
25
+ try:
26
+ sources = await retriever_service.retrieve(
27
+ query=request.query,
28
+ mode=request.mode,
29
+ top_k=request.top_k or 5,
30
+ document_ids=request.document_ids,
31
+ )
32
+ except Exception as e:
33
+ raise HTTPException(status_code=500, detail=f"Retrieval failed: {str(e)}")
34
+
35
+ # Handle no sources
36
+ if not sources:
37
+ return QueryResponse(
38
+ answer="No relevant sources found for your query.",
39
+ sources=[],
40
+ confidence=0,
41
+ mode_used=request.mode,
42
+ query=request.query,
43
+ timestamp=datetime.utcnow(),
44
+ processing_time_ms=int((time.time() - start_time) * 1000),
45
+ )
46
+
47
+ # Generate answer with enhanced LLM service
48
+ try:
49
+ answer = await enhanced_llm_service.generate_answer(
50
+ query=request.query, sources=sources
51
+ )
52
+ except Exception as e:
53
+ return QueryResponse(
54
+ answer=f"Unable to generate answer at this time due to high demand. Please try again in a few moments.",
55
+ sources=sources,
56
+ confidence=50,
57
+ mode_used=request.mode,
58
+ query=request.query,
59
+ timestamp=datetime.utcnow(),
60
+ processing_time_ms=int((time.time() - start_time) * 1000),
61
+ )
62
+
63
+ # Sanitize output
64
+ try:
65
+ answer = await prompt_guard.sanitize_output(answer)
66
+ except:
67
+ pass
68
+
69
+ # Calculate confidence
70
+ try:
71
+ confidence = confidence_service.calculate_confidence(
72
+ query=request.query, answer=answer, sources=sources
73
+ )
74
+ except:
75
+ confidence = 50.0
76
+
77
+ processing_time = int((time.time() - start_time) * 1000)
78
+
79
+ return QueryResponse(
80
+ answer=answer,
81
+ sources=sources,
82
+ confidence=confidence,
83
+ mode_used=request.mode,
84
+ query=request.query,
85
+ timestamp=datetime.utcnow(),
86
+ processing_time_ms=processing_time,
87
+ )
backend/app/api/v1/routes/upload.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, UploadFile, File, HTTPException
2
+ from app.models.schemas import UploadResponse
3
+ from app.services.pdf_processor import pdf_processor
4
+ from app.services.embeddings import embedding_service
5
+ from app.services.vector_store import vector_store
6
+ from pathlib import Path
7
+ import uuid
8
+ from datetime import datetime
9
+ import shutil
10
+ import os
11
+ import json
12
+
13
+ router = APIRouter(prefix="/upload", tags=["upload"])
14
+
15
+ DOCUMENTS_DB = "./storage/documents.json"
16
+
17
+
18
+ def load_documents_db():
19
+ if Path(DOCUMENTS_DB).exists():
20
+ with open(DOCUMENTS_DB, "r") as f:
21
+ return json.load(f)
22
+ return {}
23
+
24
+
25
+ def save_documents_db(documents):
26
+ Path(DOCUMENTS_DB).parent.mkdir(exist_ok=True, parents=True)
27
+ with open(DOCUMENTS_DB, "w") as f:
28
+ json.dump(documents, f, default=str)
29
+
30
+
31
+ @router.post("/", response_model=UploadResponse)
32
+ async def upload_pdf(file: UploadFile = File(...)):
33
+ if not file.filename.endswith(".pdf"):
34
+ raise HTTPException(status_code=400, detail="Only PDF files are allowed")
35
+
36
+ document_id = str(uuid.uuid4())
37
+
38
+ upload_dir = Path("./storage/uploads")
39
+ upload_dir.mkdir(exist_ok=True, parents=True)
40
+ file_path = upload_dir / f"{document_id}.pdf"
41
+
42
+ with file_path.open("wb") as buffer:
43
+ shutil.copyfileobj(file.file, buffer)
44
+
45
+ try:
46
+ pdf_processor.validate_file(file_path)
47
+ except Exception as e:
48
+ if file_path.exists():
49
+ os.remove(file_path)
50
+ raise HTTPException(status_code=400, detail=str(e))
51
+
52
+ try:
53
+ chunks = await pdf_processor.process_document(file_path, document_id)
54
+ except Exception as e:
55
+ if file_path.exists():
56
+ os.remove(file_path)
57
+ raise HTTPException(status_code=500, detail=f"Failed to process PDF: {str(e)}")
58
+
59
+ try:
60
+ texts = [chunk["text"] for chunk in chunks]
61
+ embeddings = embedding_service.embed_batch(texts)
62
+ await vector_store.add_chunks(chunks, embeddings)
63
+ except Exception as e:
64
+ raise HTTPException(
65
+ status_code=500, detail=f"Failed to index document: {str(e)}"
66
+ )
67
+
68
+ file_size = file_path.stat().st_size if file_path.exists() else 0
69
+
70
+ documents_db = load_documents_db()
71
+ documents_db[document_id] = {
72
+ "id": document_id,
73
+ "filename": file.filename,
74
+ "upload_date": datetime.utcnow().isoformat(),
75
+ "chunk_count": len(chunks),
76
+ "file_size": file_size,
77
+ "status": "ready",
78
+ }
79
+ save_documents_db(documents_db)
80
+
81
+ return UploadResponse(
82
+ document_id=document_id,
83
+ filename=file.filename,
84
+ status="completed",
85
+ chunks_created=len(chunks),
86
+ upload_date=datetime.utcnow(),
87
+ )
backend/app/config.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from pathlib import Path
3
+ from dotenv import load_dotenv
4
+ from pydantic import BaseModel
5
+
6
+ load_dotenv()
7
+
8
+
9
+ class Settings(BaseModel):
10
+ VECTOR_DB_PATH: Path = Path(os.getenv("VECTOR_DB_PATH", "./storage/vector_db"))
11
+ EMBEDDING_MODEL: str = os.getenv("EMBEDDING_MODEL", "all-MiniLM-L6-v2")
12
+ CHUNK_SIZE: int = int(os.getenv("CHUNK_SIZE", "512"))
13
+ CHUNK_OVERLAP: int = int(os.getenv("CHUNK_OVERLAP", "50"))
14
+ TOP_K: int = int(os.getenv("TOP_K", "5"))
15
+
16
+ GROQ_API_KEY: str = os.getenv("GROQ_API_KEY", "")
17
+ SERPER_API_KEY: str = os.getenv("SERPER_API_KEY", "")
18
+ TAVILY_API_KEY: str = os.getenv("TAVILY_API_KEY", "")
19
+
20
+ ALLOWED_MODES: list = ["web", "pdf", "hybrid", "restricted"]
21
+
22
+ UPLOAD_DIR: Path = Path(os.path.abspath("./storage/uploads"))
23
+
24
+ MAX_FILE_SIZE: int = 10 * 1024 * 1024
25
+ ALLOWED_FILE_TYPES: list = ["application/pdf"]
26
+
27
+ GROQ_MODEL: str = os.getenv("GROQ_MODEL", "llama-3.3-70b-versatile")
28
+ GROQ_MODEL_PRIMARY: str = os.getenv("GROQ_MODEL_PRIMARY", "llama-3.1-8b-instant")
29
+ GROQ_MODEL_SECONDARY: str = os.getenv("GROQ_MODEL_SECONDARY", "mixtral-8x7b-32768")
30
+ GROQ_MODEL_FALLBACK: str = os.getenv(
31
+ "GROQ_MODEL_FALLBACK", "llama-3.3-70b-versatile"
32
+ )
33
+ MAX_TOKENS: int = int(os.getenv("MAX_TOKENS", "2048"))
34
+ TEMPERATURE: float = float(os.getenv("TEMPERATURE", "0.1"))
35
+
36
+ WEB_SEARCH_MAX_RESULTS: int = int(os.getenv("WEB_SEARCH_MAX_RESULTS", "5"))
37
+ SIMILARITY_THRESHOLD: float = float(os.getenv("SIMILARITY_THRESHOLD", "0.7"))
38
+
39
+ CORS_ORIGINS: list = [
40
+ "http://localhost:3000",
41
+ "http://localhost:5173",
42
+ "https://yuvis-web-based-rag.hf.space",
43
+ "https://*.hf.space",
44
+ ]
45
+
46
+
47
+ settings = Settings()
backend/app/main.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from fastapi.middleware.cors import CORSMiddleware
3
+ from fastapi.staticfiles import StaticFiles
4
+ from fastapi.responses import FileResponse
5
+ from app.api.v1.routes import router as api_router
6
+ from app.config import settings
7
+ import os
8
+
9
+ app = FastAPI(
10
+ title="RAG System API",
11
+ description="Production-grade RAG system with PDF and web search",
12
+ version="1.0.0",
13
+ docs_url="/api/docs",
14
+ redoc_url="/api/redoc",
15
+ )
16
+
17
+ app.add_middleware(
18
+ CORSMiddleware,
19
+ allow_origins=settings.CORS_ORIGINS,
20
+ allow_credentials=True,
21
+ allow_methods=["*"],
22
+ allow_headers=["*"],
23
+ )
24
+
25
+ app.include_router(api_router, prefix="/api/v1")
26
+
27
+ # Serve static files (React frontend)
28
+ frontend_path = "/app"
29
+ try:
30
+ app.mount("/", StaticFiles(directory=frontend_path, html=True), name="static")
31
+ except RuntimeError:
32
+ # Fallback path if /app doesn't exist
33
+ fallback_path = os.path.join(os.path.dirname(__file__), "../../frontend/dist")
34
+ if os.path.exists(fallback_path):
35
+ app.mount("/", StaticFiles(directory=fallback_path, html=True), name="static")
36
+
37
+
38
+ @app.get("/")
39
+ async def serve_index():
40
+ index_path = os.path.join(frontend_path, "index.html")
41
+ if os.path.exists(index_path):
42
+ return FileResponse(index_path)
43
+ # Also try fallback path
44
+ fallback_path = os.path.join(os.path.dirname(__file__), "../../frontend/dist/index.html")
45
+ if os.path.exists(fallback_path):
46
+ return FileResponse(fallback_path)
47
+ return {"message": "RAG System API", "version": "1.0.0"}
48
+
49
+
50
+ @app.get("/health")
51
+ async def health_check():
52
+ return {"status": "healthy", "service": "RAG System"}
backend/app/models/__init__.py ADDED
File without changes
backend/app/models/schemas.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel, Field
2
+ from typing import List, Optional
3
+ from datetime import datetime
4
+ from enum import Enum
5
+
6
+
7
+ class QueryMode(str, Enum):
8
+ WEB = "web"
9
+ PDF = "pdf"
10
+ HYBRID = "hybrid"
11
+ RESTRICTED = "restricted"
12
+
13
+
14
+ class SourceType(str, Enum):
15
+ PDF = "pdf"
16
+ WEB = "web"
17
+
18
+
19
+ class QueryRequest(BaseModel):
20
+ query: str = Field(..., min_length=1, max_length=1000)
21
+ mode: QueryMode
22
+ document_ids: Optional[List[str]] = None
23
+ restrictions: Optional[List[str]] = None
24
+ top_k: Optional[int] = Field(default=5, ge=1, le=20)
25
+
26
+
27
+ class Source(BaseModel):
28
+ type: SourceType
29
+ content: str
30
+ reference: str
31
+ title: str
32
+ relevance_score: Optional[float] = None
33
+
34
+
35
+ class QueryResponse(BaseModel):
36
+ answer: str
37
+ sources: List[Source]
38
+ confidence: float = Field(..., ge=0, le=100)
39
+ mode_used: QueryMode
40
+ query: str
41
+ timestamp: datetime
42
+ processing_time_ms: int
43
+
44
+
45
+ class UploadResponse(BaseModel):
46
+ document_id: str
47
+ filename: str
48
+ status: str
49
+ chunks_created: int
50
+ upload_date: datetime
51
+
52
+
53
+ class Document(BaseModel):
54
+ id: str
55
+ filename: str
56
+ upload_date: datetime
57
+ chunk_count: int
58
+ file_size: int
59
+ status: str
60
+
61
+
62
+ class DocumentListResponse(BaseModel):
63
+ documents: List[Document]
64
+ total: int
65
+
66
+
67
+ class HealthResponse(BaseModel):
68
+ status: str
69
+ embedding_model: str
70
+ vector_db: str
71
+ llm: str
backend/app/services/__init__.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .embeddings import embedding_service
2
+ from .pdf_processor import pdf_processor
3
+ from .vector_store import vector_store
4
+ from .retriever import retriever_service
5
+ from .web_search import web_search_service
6
+ from .llm_service import llm_service
7
+ from .prompt_guard import prompt_guard
8
+ from .confidence import confidence_service
9
+
10
+ __all__ = [
11
+ "embedding_service",
12
+ "pdf_processor",
13
+ "vector_store",
14
+ "retriever_service",
15
+ "web_search_service",
16
+ "llm_service",
17
+ "prompt_guard",
18
+ "confidence_service",
19
+ ]
backend/app/services/confidence.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List
2
+ from app.models.schemas import Source
3
+ from app.services.embeddings import embedding_service
4
+
5
+
6
+ class ConfidenceService:
7
+ def calculate_confidence(
8
+ self, query: str, answer: str, sources: List[Source]
9
+ ) -> float:
10
+ factors = {}
11
+
12
+ # Factor 1: Source count (25%)
13
+ source_count = len(sources)
14
+ if source_count == 0:
15
+ factors["source_count"] = 0
16
+ elif source_count == 1:
17
+ factors["source_count"] = 15
18
+ elif source_count == 2:
19
+ factors["source_count"] = 20
20
+ else:
21
+ factors["source_count"] = 25
22
+
23
+ # Factor 2: Average relevance score (30%)
24
+ if sources:
25
+ avg_relevance = sum(s.relevance_score or 0 for s in sources) / len(sources)
26
+ factors["source_relevance"] = avg_relevance * 30
27
+ else:
28
+ factors["source_relevance"] = 0
29
+
30
+ # Factor 3: Semantic similarity (30%)
31
+ try:
32
+ query_emb = embedding_service.embed_text(query)
33
+ answer_text = answer[:1000] if len(answer) > 1000 else answer
34
+ answer_emb = embedding_service.embed_text(answer_text)
35
+ similarity = embedding_service.calculate_similarity(query_emb, answer_emb)
36
+ factors["semantic_similarity"] = similarity * 30
37
+ except Exception:
38
+ factors["semantic_similarity"] = 0
39
+
40
+ # Factor 4: Citation density (15%)
41
+ citation_count = answer.count("[Source")
42
+ if citation_count == 0:
43
+ factors["citation_density"] = 0
44
+ elif citation_count == 1:
45
+ factors["citation_density"] = 10
46
+ elif citation_count == 2:
47
+ factors["citation_density"] = 13
48
+ else:
49
+ factors["citation_density"] = 15
50
+
51
+ # Calculate total
52
+ total_confidence = sum(factors.values())
53
+
54
+ # Ensure minimum confidence for valid responses
55
+ if source_count > 0 and total_confidence < 30:
56
+ total_confidence = 35
57
+
58
+ return round(min(total_confidence, 100), 2)
59
+
60
+
61
+ confidence_service = ConfidenceService()
backend/app/services/embeddings.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sentence_transformers import SentenceTransformer
2
+ from typing import List
3
+ import numpy as np
4
+ from app.config import settings
5
+
6
+
7
+ class EmbeddingService:
8
+ def __init__(self):
9
+ self.model = SentenceTransformer(settings.EMBEDDING_MODEL)
10
+ self.dimension = 384
11
+
12
+ def embed_text(self, text: str) -> List[float]:
13
+ embedding = self.model.encode(text, convert_to_numpy=True)
14
+ return embedding.tolist()
15
+
16
+ def embed_batch(self, texts: List[str]) -> List[List[float]]:
17
+ embeddings = self.model.encode(texts, convert_to_numpy=True, batch_size=32)
18
+ return embeddings.tolist()
19
+
20
+ def calculate_similarity(
21
+ self, embedding1: List[float], embedding2: List[float]
22
+ ) -> float:
23
+ vec1 = np.array(embedding1)
24
+ vec2 = np.array(embedding2)
25
+
26
+ dot_product = np.dot(vec1, vec2)
27
+ norm1 = np.linalg.norm(vec1)
28
+ norm2 = np.linalg.norm(vec2)
29
+
30
+ return float(dot_product / (norm1 * norm2))
31
+
32
+
33
+ embedding_service = EmbeddingService()
backend/app/services/enhanced_llm.py ADDED
@@ -0,0 +1,267 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Enhanced LLM Service with Multi-Model Support
3
+ - Groq llama-3.1-8b-instant (Primary - Fast)
4
+ - Groq mixtral-8x7b-32768 (Secondary - Quality)
5
+ - Request queuing to prevent rate limits
6
+ - Response caching for repeated queries
7
+ """
8
+
9
+ import aiohttp
10
+ import asyncio
11
+ import time
12
+ import hashlib
13
+ import json
14
+ from typing import List, Optional, Dict
15
+ from datetime import datetime
16
+ from dataclasses import dataclass
17
+ from enum import Enum
18
+
19
+ from app.models.schemas import Source
20
+ from app.config import settings
21
+
22
+
23
+ class ModelType(Enum):
24
+ PRIMARY = "llama-3.1-8b-instant" # Fast, good for simple queries
25
+ SECONDARY = "mixtral-8x7b-32768" # Better for complex queries
26
+ FALLBACK = "llama-3.1-70b-versatile" # Highest quality
27
+
28
+
29
+ @dataclass
30
+ class ModelConfig:
31
+ name: str
32
+ max_tokens: int
33
+ temperature: float
34
+ priority: int # Lower = higher priority
35
+
36
+
37
+ # Model configurations
38
+ MODEL_CONFIGS: Dict[ModelType, ModelConfig] = {
39
+ ModelType.PRIMARY: ModelConfig(
40
+ name=getattr(settings, "GROQ_MODEL_PRIMARY", "llama-3.1-8b-instant"),
41
+ max_tokens=2048,
42
+ temperature=0.1,
43
+ priority=1,
44
+ ),
45
+ ModelType.SECONDARY: ModelConfig(
46
+ name=getattr(settings, "GROQ_MODEL_SECONDARY", "mixtral-8x7b-32768"),
47
+ max_tokens=4096,
48
+ temperature=0.1,
49
+ priority=2,
50
+ ),
51
+ ModelType.FALLBACK: ModelConfig(
52
+ name=getattr(settings, "GROQ_MODEL_FALLBACK", "llama-3.1-70b-versatile"),
53
+ max_tokens=4096,
54
+ temperature=0.1,
55
+ priority=3,
56
+ ),
57
+ }
58
+
59
+
60
+ class RequestQueue:
61
+ """Simple async queue for serializing LLM requests"""
62
+
63
+ def __init__(self, min_delay: float = 1.0):
64
+ self.min_delay = min_delay
65
+ self.last_request_time: float = 0
66
+ self.lock = asyncio.Lock()
67
+
68
+ async def acquire(self):
69
+ """Wait until enough time has passed since last request"""
70
+ async with self.lock:
71
+ now = time.time()
72
+ elapsed = now - self.last_request_time
73
+ if elapsed < self.min_delay:
74
+ await asyncio.sleep(self.min_delay - elapsed)
75
+ self.last_request_time = time.time()
76
+
77
+
78
+ class ResponseCache:
79
+ """Simple TTL cache for LLM responses"""
80
+
81
+ def __init__(self, ttl_seconds: int = 300):
82
+ self.ttl = ttl_seconds
83
+ self._cache: Dict[str, tuple] = {}
84
+ self.lock = asyncio.Lock()
85
+
86
+ def _make_key(self, prompt: str, model: str) -> str:
87
+ """Create cache key from prompt and model"""
88
+ content = f"{model}:{prompt}"
89
+ return hashlib.md5(content.encode()).hexdigest()
90
+
91
+ async def get(self, prompt: str, model: str) -> Optional[str]:
92
+ """Get cached response"""
93
+ key = self._make_key(prompt, model)
94
+ async with self.lock:
95
+ if key in self._cache:
96
+ response, timestamp = self._cache[key]
97
+ if (datetime.now() - timestamp).total_seconds() < self.ttl:
98
+ return response
99
+ del self._cache[key]
100
+ return None
101
+
102
+ async def set(self, prompt: str, model: str, response: str):
103
+ """Cache a response"""
104
+ key = self._make_key(prompt, model)
105
+ async with self.lock:
106
+ self._cache[key] = (response, datetime.now())
107
+
108
+ # Clean old entries if cache is too large
109
+ if len(self._cache) > 100:
110
+ oldest = sorted(self._cache.items(), key=lambda x: x[1][1])[:10]
111
+ for k, _ in oldest:
112
+ del self._cache[k]
113
+
114
+ def clear(self):
115
+ """Clear all cached responses"""
116
+ self._cache.clear()
117
+
118
+
119
+ class EnhancedLLMService:
120
+ """Enhanced LLM service with multi-model support and rate limiting"""
121
+
122
+ def __init__(self):
123
+ self.api_key = settings.GROQ_API_KEY
124
+ self.base_url = "https://api.groq.com/openai/v1/chat/completions"
125
+ self.request_queue = RequestQueue(min_delay=1.0)
126
+ self.cache = ResponseCache(ttl_seconds=300)
127
+ self.model_order = [
128
+ ModelType.PRIMARY,
129
+ ModelType.SECONDARY,
130
+ ModelType.FALLBACK,
131
+ ]
132
+ self.max_retries = int(getattr(settings, "LLM_MAX_RETRIES", 5))
133
+ self.retry_delay = int(getattr(settings, "LLM_RETRY_DELAY", 2))
134
+
135
+ def _build_context(self, sources: List[Source]) -> str:
136
+ """Build context string from sources"""
137
+ if not sources:
138
+ return "No context available."
139
+
140
+ context_parts = []
141
+ for i, source in enumerate(sources, 1):
142
+ context_parts.append(
143
+ f"[Source {i}] {source.title}\n"
144
+ f"Reference: {source.reference}\n"
145
+ f"Content: {source.content}\n"
146
+ )
147
+
148
+ return "\n\n".join(context_parts)
149
+
150
+ def _get_system_prompt(self) -> str:
151
+ """Get system prompt"""
152
+ return """You are a precise assistant that answers questions using only the provided context.
153
+
154
+ Rules:
155
+ 1. Base your answer ONLY on the provided context
156
+ 2. When citing sources, use the actual page number or reference provided (e.g., "According to Page 21..." or "As stated on Page 34...")
157
+ 3. Do NOT use generic citations like [Source 1] or [Source 2]
158
+ 4. If the context doesn't contain enough information, say "I don't have enough information to answer this question"
159
+ 5. Be concise and accurate
160
+ 6. Do not make assumptions or use external knowledge
161
+ 7. Write in a clear, professional tone"""
162
+
163
+ def _build_prompt(self, query: str, pdf_context: str, web_context: str) -> str:
164
+ """Build final prompt"""
165
+ return f"""Context from Documents:
166
+ {pdf_context}
167
+
168
+ Context from Web:
169
+ {web_context}
170
+
171
+ Question: {query}
172
+
173
+ Provide a comprehensive answer based on the context above. Include source citations."""
174
+
175
+ async def _call_groq(self, model: ModelType, prompt: str) -> Optional[str]:
176
+ """Make API call to Groq with specific model"""
177
+ config = MODEL_CONFIGS[model]
178
+
179
+ payload = {
180
+ "model": config.name,
181
+ "messages": [
182
+ {"role": "system", "content": self._get_system_prompt()},
183
+ {"role": "user", "content": prompt},
184
+ ],
185
+ "temperature": config.temperature,
186
+ "max_tokens": config.max_tokens,
187
+ }
188
+
189
+ headers = {
190
+ "Authorization": f"Bearer {self.api_key}",
191
+ "Content-Type": "application/json",
192
+ }
193
+
194
+ for attempt in range(self.max_retries):
195
+ try:
196
+ async with aiohttp.ClientSession() as session:
197
+ timeout = aiohttp.ClientTimeout(total=60)
198
+ async with session.post(
199
+ self.base_url, headers=headers, json=payload, timeout=timeout
200
+ ) as response:
201
+ if response.status == 429:
202
+ # Rate limited - wait and retry
203
+ delay = self.retry_delay * (2**attempt)
204
+ await asyncio.sleep(delay)
205
+ continue
206
+
207
+ if response.status != 200:
208
+ return None
209
+
210
+ data = await response.json()
211
+ return data["choices"][0]["message"]["content"]
212
+
213
+ except Exception as e:
214
+ if attempt < self.max_retries - 1:
215
+ delay = self.retry_delay * (2**attempt)
216
+ await asyncio.sleep(delay)
217
+ continue
218
+
219
+ return None
220
+
221
+ async def generate_answer(self, query: str, sources: List[Source]) -> str:
222
+ """Generate answer with multi-model fallback"""
223
+
224
+ # Build prompt
225
+ pdf_context = self._build_context([s for s in sources if s.type.value == "pdf"])
226
+ web_context = self._build_context([s for s in sources if s.type.value == "web"])
227
+ prompt = self._build_prompt(query, pdf_context, web_context)
228
+
229
+ # Acquire queue lock (prevent burst requests)
230
+ await self.request_queue.acquire()
231
+
232
+ # Try each model in order
233
+ for model_type in self.model_order:
234
+ # Check cache first
235
+ cached = await self.cache.get(prompt, MODEL_CONFIGS[model_type].name)
236
+ if cached:
237
+ return cached
238
+
239
+ # Try this model
240
+ response = await self._call_groq(model_type, prompt)
241
+
242
+ if response:
243
+ # Cache successful response
244
+ await self.cache.set(prompt, MODEL_CONFIGS[model_type].name, response)
245
+ return response
246
+
247
+ # All models failed
248
+ raise Exception("All LLM models failed after retries")
249
+
250
+ def get_model_info(self) -> Dict:
251
+ """Get information about configured models"""
252
+ return {
253
+ "primary": MODEL_CONFIGS[ModelType.PRIMARY].name,
254
+ "secondary": MODEL_CONFIGS[ModelType.SECONDARY].name,
255
+ "fallback": MODEL_CONFIGS[ModelType.FALLBACK].name,
256
+ "max_retries": self.max_retries,
257
+ "retry_delay": self.retry_delay,
258
+ "cache_ttl": self.cache.ttl,
259
+ }
260
+
261
+ def clear_cache(self):
262
+ """Clear the response cache"""
263
+ self.cache.clear()
264
+
265
+
266
+ # Create singleton instance
267
+ enhanced_llm_service = EnhancedLLMService()
backend/app/services/llm_service.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import aiohttp
2
+ import asyncio
3
+ import time
4
+ from typing import List
5
+ from app.models.schemas import Source
6
+ from app.config import settings
7
+
8
+
9
+ class LLMService:
10
+ def __init__(self):
11
+ self.api_key = settings.GROQ_API_KEY
12
+ self.model = settings.GROQ_MODEL
13
+ self.base_url = "https://api.groq.com/openai/v1/chat/completions"
14
+ self.max_retries = 5
15
+ self.base_delay = 3
16
+
17
+ async def generate_answer(self, query: str, sources: List[Source]) -> str:
18
+ pdf_context = self._build_context([s for s in sources if s.type.value == "pdf"])
19
+ web_context = self._build_context([s for s in sources if s.type.value == "web"])
20
+
21
+ prompt = self._build_prompt(query, pdf_context, web_context)
22
+
23
+ headers = {
24
+ "Authorization": f"Bearer {self.api_key}",
25
+ "Content-Type": "application/json",
26
+ }
27
+
28
+ payload = {
29
+ "model": self.model,
30
+ "messages": [
31
+ {"role": "system", "content": self._get_system_prompt()},
32
+ {"role": "user", "content": prompt},
33
+ ],
34
+ "temperature": settings.TEMPERATURE,
35
+ "max_tokens": settings.MAX_TOKENS,
36
+ }
37
+
38
+ for attempt in range(self.max_retries):
39
+ try:
40
+ async with aiohttp.ClientSession() as session:
41
+ async with session.post(
42
+ self.base_url,
43
+ headers=headers,
44
+ json=payload,
45
+ timeout=aiohttp.ClientTimeout(total=60),
46
+ ) as response:
47
+ if response.status == 429:
48
+ delay = self.base_delay * (2**attempt)
49
+ await asyncio.sleep(delay)
50
+ continue
51
+
52
+ if response.status != 200:
53
+ raise Exception(f"LLM API failed: {response.status}")
54
+
55
+ data = await response.json()
56
+ answer = data["choices"][0]["message"]["content"]
57
+ return answer
58
+
59
+ except Exception as e:
60
+ if attempt < self.max_retries - 1:
61
+ delay = self.base_delay * (2**attempt)
62
+ await asyncio.sleep(delay)
63
+ continue
64
+ raise
65
+
66
+ raise Exception("LLM generation failed after retries")
67
+
68
+ def _build_context(self, sources: List[Source]) -> str:
69
+ if not sources:
70
+ return "No context available."
71
+
72
+ context_parts = []
73
+ for i, source in enumerate(sources, 1):
74
+ context_parts.append(
75
+ f"[Source {i}] {source.title}\n"
76
+ f"Reference: {source.reference}\n"
77
+ f"Content: {source.content}\n"
78
+ )
79
+
80
+ return "\n\n".join(context_parts)
81
+
82
+ def _get_system_prompt(self) -> str:
83
+ return """You are a precise assistant that answers questions using only the provided context.
84
+
85
+ Rules:
86
+ 1. Base your answer ONLY on the provided context
87
+ 2. Cite sources using [Source N] notation
88
+ 3. If the context doesn't contain enough information, say "I don't have enough information to answer this question"
89
+ 4. Be concise and accurate
90
+ 5. Do not make assumptions or use external knowledge"""
91
+
92
+ def _build_prompt(self, query: str, pdf_context: str, web_context: str) -> str:
93
+ return f"""Context from Documents:
94
+ {pdf_context}
95
+
96
+ Context from Web:
97
+ {web_context}
98
+
99
+ Question: {query}
100
+
101
+ Provide a comprehensive answer based on the context above. Include source citations."""
102
+
103
+
104
+ llm_service = LLMService()
backend/app/services/pdf_processor.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pypdf
2
+ import pdfplumber
3
+ from pathlib import Path
4
+ from typing import List, Tuple
5
+ from app.utils.chunking import intelligent_chunk, create_chunk_metadata
6
+
7
+
8
+ class PDFProcessor:
9
+ def __init__(self):
10
+ self.max_file_size = 10 * 1024 * 1024
11
+
12
+ async def extract_text(self, file_path: Path) -> List[Tuple[str, int]]:
13
+ pages_text = []
14
+
15
+ try:
16
+ with pdfplumber.open(file_path) as pdf:
17
+ for i, page in enumerate(pdf.pages, 1):
18
+ text = page.extract_text() or ""
19
+ if text.strip():
20
+ pages_text.append((text, i))
21
+ except Exception as e:
22
+ try:
23
+ with open(file_path, "rb") as file:
24
+ reader = pypdf.PdfReader(file)
25
+ for i, page in enumerate(reader.pages, 1):
26
+ text = page.extract_text() or ""
27
+ if text.strip():
28
+ pages_text.append((text, i))
29
+ except Exception as fallback_error:
30
+ raise Exception(f"Failed to extract text: {fallback_error}")
31
+
32
+ return pages_text
33
+
34
+ async def process_document(self, file_path: Path, document_id: str) -> List[dict]:
35
+ pages_text = await self.extract_text(file_path)
36
+
37
+ full_text = "\n\n".join([text for text, _ in pages_text])
38
+
39
+ chunks = intelligent_chunk(text=full_text, chunk_size=512, overlap=50)
40
+
41
+ processed_chunks = []
42
+ for idx, chunk in enumerate(chunks):
43
+ page_num = self._find_page_number(chunk, pages_text)
44
+
45
+ chunk_data = {
46
+ "text": chunk,
47
+ "metadata": create_chunk_metadata(
48
+ document_id=document_id,
49
+ chunk_index=idx,
50
+ page_number=page_num,
51
+ total_chunks=len(chunks),
52
+ ),
53
+ }
54
+ processed_chunks.append(chunk_data)
55
+
56
+ return processed_chunks
57
+
58
+ def _find_page_number(self, chunk: str, pages_text: List[Tuple[str, int]]) -> int:
59
+ chunk_start = chunk[:50]
60
+
61
+ for text, page_num in pages_text:
62
+ if chunk_start in text:
63
+ return page_num
64
+
65
+ return 0
66
+
67
+ def validate_file(self, file_path: Path) -> bool:
68
+ if not file_path.exists():
69
+ raise Exception("File does not exist")
70
+
71
+ if file_path.stat().st_size > self.max_file_size:
72
+ raise Exception("File size exceeds limit")
73
+
74
+ try:
75
+ with open(file_path, "rb") as f:
76
+ pypdf.PdfReader(f)
77
+ return True
78
+ except Exception:
79
+ raise Exception("Invalid PDF file")
80
+
81
+
82
+ pdf_processor = PDFProcessor()
backend/app/services/prompt_guard.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List, Optional
2
+
3
+
4
+ class PromptGuardService:
5
+ def __init__(self):
6
+ self.jailbreak_patterns = [
7
+ "ignore previous instructions",
8
+ "disregard all prior",
9
+ "forget everything above",
10
+ "you are now",
11
+ "new role",
12
+ "system:",
13
+ "admin mode",
14
+ ]
15
+
16
+ self.restricted_topics = []
17
+
18
+ async def validate_input(
19
+ self, query: str, restrictions: Optional[List[str]] = None
20
+ ) -> bool:
21
+ query_lower = query.lower()
22
+
23
+ for pattern in self.jailbreak_patterns:
24
+ if pattern in query_lower:
25
+ raise Exception(f"Detected potential jailbreak attempt: '{pattern}'")
26
+
27
+ if restrictions:
28
+ for restriction in restrictions:
29
+ if restriction.lower() in query_lower:
30
+ raise Exception(f"Query violates restriction: '{restriction}'")
31
+
32
+ if len(query) > 1000:
33
+ raise Exception("Query exceeds maximum length")
34
+
35
+ return True
36
+
37
+ async def sanitize_output(self, answer: str) -> str:
38
+ sanitized = answer.strip()
39
+
40
+ prefixes_to_remove = ["System:", "Admin:", "Debug:"]
41
+ for prefix in prefixes_to_remove:
42
+ if sanitized.startswith(prefix):
43
+ sanitized = sanitized[len(prefix) :].strip()
44
+
45
+ return sanitized
46
+
47
+ def set_restrictions(self, topics: List[str]):
48
+ self.restricted_topics = topics
49
+
50
+
51
+ prompt_guard = PromptGuardService()
backend/app/services/retriever.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List, Dict, Optional
2
+ from app.services.embeddings import embedding_service
3
+ from app.services.vector_store import vector_store
4
+ from app.services.web_search import web_search_service
5
+ from app.models.schemas import Source, SourceType, QueryMode
6
+
7
+
8
+ class RetrieverService:
9
+ async def retrieve(
10
+ self,
11
+ query: str,
12
+ mode: QueryMode,
13
+ top_k: int = 5,
14
+ document_ids: Optional[List[str]] = None,
15
+ ) -> List[Source]:
16
+ if mode == QueryMode.PDF:
17
+ return await self._retrieve_from_pdf(query, top_k, document_ids or [])
18
+
19
+ elif mode == QueryMode.WEB:
20
+ return await self._retrieve_from_web(query, top_k)
21
+
22
+ elif mode == QueryMode.HYBRID:
23
+ # Get PDF sources
24
+ pdf_sources = await self._retrieve_from_pdf(
25
+ query, top_k // 2, document_ids or []
26
+ )
27
+ # Get web sources
28
+ web_sources = await self._retrieve_from_web(query, top_k // 2)
29
+ # Combine and rerank
30
+ return self._merge_and_rerank(pdf_sources + web_sources, top_k)
31
+
32
+ elif mode == QueryMode.RESTRICTED:
33
+ return await self._retrieve_from_pdf(query, top_k, document_ids or [])
34
+
35
+ async def _retrieve_from_pdf(
36
+ self, query: str, top_k: int, document_ids: Optional[List[str]] = None
37
+ ) -> List[Source]:
38
+ query_embedding = embedding_service.embed_text(query)
39
+
40
+ results = await vector_store.search(
41
+ query_embedding=query_embedding, top_k=top_k, document_ids=document_ids
42
+ )
43
+
44
+ sources = []
45
+ for result in results:
46
+ sources.append(
47
+ Source(
48
+ type=SourceType.PDF,
49
+ content=result["text"],
50
+ reference=f"Page {result['metadata']['page_number']}",
51
+ title=f"Document {result['metadata']['document_id']}",
52
+ relevance_score=result["similarity"],
53
+ )
54
+ )
55
+
56
+ return sources
57
+
58
+ async def _retrieve_from_web(self, query: str, top_k: int) -> List[Source]:
59
+ results = await web_search_service.search(query, max_results=top_k)
60
+
61
+ sources = []
62
+ for result in results:
63
+ sources.append(
64
+ Source(
65
+ type=SourceType.WEB,
66
+ content=result["snippet"],
67
+ reference=result["url"],
68
+ title=result["title"],
69
+ relevance_score=result.get("score", 0.8),
70
+ )
71
+ )
72
+
73
+ return sources
74
+
75
+ def _merge_and_rerank(self, sources: List[Source], top_k: int) -> List[Source]:
76
+ sorted_sources = sorted(
77
+ sources, key=lambda x: x.relevance_score or 0, reverse=True
78
+ )
79
+
80
+ return sorted_sources[:top_k]
81
+
82
+
83
+ retriever_service = RetrieverService()
backend/app/services/vector_store.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import chromadb
2
+ from chromadb.config import Settings as ChromaSettings
3
+ from typing import List, Dict, Optional
4
+ from pathlib import Path
5
+ from app.config import settings
6
+
7
+
8
+ class VectorStore:
9
+ def __init__(self):
10
+ self.client = chromadb.PersistentClient(
11
+ path=str(settings.VECTOR_DB_PATH),
12
+ settings=ChromaSettings(anonymized_telemetry=False),
13
+ )
14
+ self.collection = self.client.get_or_create_collection(
15
+ name="documents", metadata={"hnsw:space": "cosine"}
16
+ )
17
+
18
+ async def add_chunks(self, chunks: List[dict], embeddings: List[List[float]]):
19
+ ids = [
20
+ f"{chunk['metadata']['document_id']}_chunk_{chunk['metadata']['chunk_index']}"
21
+ for chunk in chunks
22
+ ]
23
+
24
+ documents = [chunk["text"] for chunk in chunks]
25
+
26
+ metadatas = [
27
+ {
28
+ "document_id": chunk["metadata"]["document_id"],
29
+ "chunk_index": chunk["metadata"]["chunk_index"],
30
+ "page_number": chunk["metadata"]["page_number"] or 0,
31
+ "total_chunks": chunk["metadata"]["total_chunks"],
32
+ }
33
+ for chunk in chunks
34
+ ]
35
+
36
+ self.collection.add(
37
+ ids=ids, embeddings=embeddings, documents=documents, metadatas=metadatas
38
+ )
39
+
40
+ async def search(
41
+ self,
42
+ query_embedding: List[float],
43
+ top_k: int = 5,
44
+ document_ids: Optional[List[str]] = None,
45
+ ) -> List[Dict]:
46
+ where_filter = None
47
+ if document_ids:
48
+ where_filter = {"document_id": {"$in": document_ids}}
49
+
50
+ results = self.collection.query(
51
+ query_embeddings=[query_embedding], n_results=top_k, where=where_filter
52
+ )
53
+
54
+ search_results = []
55
+ for i in range(len(results["ids"][0])):
56
+ search_results.append(
57
+ {
58
+ "text": results["documents"][0][i],
59
+ "metadata": results["metadatas"][0][i],
60
+ "similarity": 1 - results["distances"][0][i],
61
+ "id": results["ids"][0][i],
62
+ }
63
+ )
64
+
65
+ return search_results
66
+
67
+ async def delete_document(self, document_id: str):
68
+ self.collection.delete(where={"document_id": document_id})
69
+
70
+ async def get_stats(self) -> Dict:
71
+ count = self.collection.count()
72
+ return {"total_chunks": count, "collection_name": self.collection.name}
73
+
74
+
75
+ vector_store = VectorStore()
backend/app/services/web_search.py ADDED
@@ -0,0 +1,416 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Unified Web Search Service
3
+ Supports multiple search providers:
4
+ - Tavily (AI-optimized, RAG-ready)
5
+ - Serper (Google search)
6
+ - Brave Search (Privacy-focused)
7
+ - You.com (AI-ready)
8
+ """
9
+
10
+ import aiohttp
11
+ import asyncio
12
+ import time
13
+ from typing import List, Dict, Optional, Literal
14
+ from abc import ABC, abstractmethod
15
+ from dataclasses import dataclass
16
+ from enum import Enum
17
+ import json
18
+
19
+ from app.config import settings
20
+
21
+
22
+ class SearchProvider(Enum):
23
+ TAVILY = "tavily"
24
+ SERPER = "serper"
25
+ BRAVE = "brave"
26
+ YOUCOM = "youcom"
27
+
28
+
29
+ @dataclass
30
+ class SearchResult:
31
+ title: str
32
+ url: str
33
+ snippet: str
34
+ score: float = 0.8
35
+ provider: str = "unknown"
36
+
37
+
38
+ @dataclass
39
+ class SearchConfig:
40
+ provider: SearchProvider
41
+ api_key: str
42
+ max_results: int = 5
43
+ timeout: int = 15
44
+ retry_attempts: int = 3
45
+ retry_delay: float = 2.0
46
+ cache_ttl: int = 300 # 5 minutes
47
+
48
+
49
+ class BaseSearchProvider(ABC):
50
+ """Abstract base class for search providers"""
51
+
52
+ def __init__(self, config: SearchConfig):
53
+ self.config = config
54
+ self._cache: Dict[str, tuple] = {}
55
+
56
+ @abstractmethod
57
+ async def search(self, query: str) -> List[SearchResult]:
58
+ pass
59
+
60
+ @abstractmethod
61
+ def _format_results(self, raw_data) -> List[SearchResult]:
62
+ pass
63
+
64
+ def _get_cache(self, query: str) -> Optional[List[SearchResult]]:
65
+ if query in self._cache:
66
+ data, timestamp = self._cache[query]
67
+ if time.time() - timestamp < self.config.cache_ttl:
68
+ return data
69
+ del self._cache[query]
70
+ return None
71
+
72
+ def _set_cache(self, query: str, results: List[SearchResult]):
73
+ self._cache[query] = (results, time.time())
74
+
75
+ # Clean old cache entries
76
+ if len(self._cache) > 100:
77
+ oldest = sorted(self._cache.keys(), key=lambda k: self._cache[k][1])[:10]
78
+ for k in oldest:
79
+ del self._cache[k]
80
+
81
+ async def _make_request(
82
+ self, url: str, params: Dict = None, headers: Dict = None, method: str = "GET"
83
+ ) -> Dict:
84
+ """Make HTTP request with retry logic"""
85
+
86
+ for attempt in range(self.config.retry_attempts):
87
+ try:
88
+ timeout = aiohttp.ClientTimeout(total=self.config.timeout)
89
+
90
+ async with aiohttp.ClientSession(timeout=timeout) as session:
91
+ if method == "GET":
92
+ async with session.get(
93
+ url, params=params, headers=headers
94
+ ) as response:
95
+ if response.status == 429:
96
+ await asyncio.sleep(
97
+ self.config.retry_delay * (attempt + 1)
98
+ )
99
+ continue
100
+ if response.status != 200:
101
+ return {}
102
+ return await response.json()
103
+ else:
104
+ async with session.post(
105
+ url, json=params, headers=headers
106
+ ) as response:
107
+ if response.status == 429:
108
+ await asyncio.sleep(
109
+ self.config.retry_delay * (attempt + 1)
110
+ )
111
+ continue
112
+ if response.status != 200:
113
+ return {}
114
+ return await response.json()
115
+
116
+ except Exception as e:
117
+ if attempt < self.config.retry_attempts - 1:
118
+ await asyncio.sleep(self.config.retry_delay)
119
+ continue
120
+
121
+ return {}
122
+
123
+
124
+ class TavilySearchProvider(BaseSearchProvider):
125
+ """Tavily AI Search - Optimized for RAG and AI applications"""
126
+
127
+ def __init__(self, api_key: str, max_results: int = 5):
128
+ config = SearchConfig(
129
+ provider=SearchProvider.TAVILY, api_key=api_key, max_results=max_results
130
+ )
131
+ super().__init__(config)
132
+ self.base_url = "https://api.tavily.com/search"
133
+
134
+ async def search(self, query: str) -> List[SearchResult]:
135
+ # Check cache first
136
+ cached = self._get_cache(query)
137
+ if cached:
138
+ return cached
139
+
140
+ payload = {
141
+ "api_key": self.config.api_key,
142
+ "query": query,
143
+ "search_depth": "advanced",
144
+ "max_results": self.config.max_results,
145
+ "include_answer": True,
146
+ "include_raw_content": False,
147
+ "include_images": False,
148
+ }
149
+
150
+ data = await self._make_request(self.base_url, params=payload, method="POST")
151
+
152
+ results = self._format_results(data)
153
+ self._set_cache(query, results)
154
+ return results
155
+
156
+ def _format_results(self, data: Dict) -> List[SearchResult]:
157
+ results = []
158
+
159
+ search_results = data.get("results", [])
160
+
161
+ for i, result in enumerate(search_results[: self.config.max_results]):
162
+ results.append(
163
+ SearchResult(
164
+ title=result.get("title", ""),
165
+ url=result.get("url", ""),
166
+ snippet=result.get("content", ""),
167
+ score=result.get("score", 0.9 - (i * 0.05)),
168
+ provider="tavily",
169
+ )
170
+ )
171
+
172
+ return results
173
+
174
+
175
+ class SerperSearchProvider(BaseSearchProvider):
176
+ """Serper.dev - Google Search API"""
177
+
178
+ def __init__(self, api_key: str, max_results: int = 5):
179
+ config = SearchConfig(
180
+ provider=SearchProvider.SERPER, api_key=api_key, max_results=max_results
181
+ )
182
+ super().__init__(config)
183
+ self.base_url = "https://serpapi.com/search"
184
+
185
+ async def search(self, query: str) -> List[SearchResult]:
186
+ cached = self._get_cache(query)
187
+ if cached:
188
+ return cached
189
+
190
+ params = {
191
+ "engine": "google",
192
+ "q": query,
193
+ "api_key": self.config.api_key,
194
+ "num": self.config.max_results,
195
+ }
196
+
197
+ data = await self._make_request(self.base_url, params=params)
198
+
199
+ results = self._format_results(data)
200
+ self._set_cache(query, results)
201
+ return results
202
+
203
+ def _format_results(self, data: Dict) -> List[SearchResult]:
204
+ results = []
205
+
206
+ organic_results = data.get("organic_results", [])
207
+
208
+ for i, result in enumerate(organic_results[: self.config.max_results]):
209
+ results.append(
210
+ SearchResult(
211
+ title=result.get("title", ""),
212
+ url=result.get("link", ""),
213
+ snippet=result.get("snippet", ""),
214
+ score=0.8 - (i * 0.05),
215
+ provider="serper",
216
+ )
217
+ )
218
+
219
+ return results
220
+
221
+
222
+ class BraveSearchProvider(BaseSearchProvider):
223
+ """Brave Search API - Privacy-focused"""
224
+
225
+ def __init__(self, api_key: str, max_results: int = 5):
226
+ config = SearchConfig(
227
+ provider=SearchProvider.BRAVE, api_key=api_key, max_results=max_results
228
+ )
229
+ super().__init__(config)
230
+ self.base_url = "https://api.search.brave.com/res/v1/web/search"
231
+
232
+ async def search(self, query: str) -> List[SearchResult]:
233
+ cached = self._get_cache(query)
234
+ if cached:
235
+ return cached
236
+
237
+ headers = {
238
+ "Accept": "application/json",
239
+ "X-Subscription-Token": self.config.api_key,
240
+ }
241
+
242
+ params = {"q": query, "count": self.config.max_results}
243
+
244
+ data = await self._make_request(self.base_url, params=params, headers=headers)
245
+
246
+ results = self._format_results(data)
247
+ self._set_cache(query, results)
248
+ return results
249
+
250
+ def _format_results(self, data: Dict) -> List[SearchResult]:
251
+ results = []
252
+
253
+ web_results = data.get("web", {}).get("results", [])
254
+
255
+ for i, result in enumerate(web_results[: self.config.max_results]):
256
+ results.append(
257
+ SearchResult(
258
+ title=result.get("title", ""),
259
+ url=result.get("url", ""),
260
+ snippet=result.get("description", ""),
261
+ score=0.85 - (i * 0.05),
262
+ provider="brave",
263
+ )
264
+ )
265
+
266
+ return results
267
+
268
+
269
+ class YouComSearchProvider(BaseSearchProvider):
270
+ """You.com - AI-Optimized Search"""
271
+
272
+ def __init__(self, api_key: str, max_results: int = 5):
273
+ config = SearchConfig(
274
+ provider=SearchProvider.YOUCOM, api_key=api_key, max_results=max_results
275
+ )
276
+ super().__init__(config)
277
+ self.base_url = "https://api.you.com/search"
278
+
279
+ async def search(self, query: str) -> List[SearchResult]:
280
+ cached = self._get_cache(query)
281
+ if cached:
282
+ return cached
283
+
284
+ headers = {"Authorization": f"Bearer {self.config.api_key}"}
285
+
286
+ params = {"query": query, "num": self.config.max_results}
287
+
288
+ data = await self._make_request(self.base_url, params=params, headers=headers)
289
+
290
+ results = self._format_results(data)
291
+ self._set_cache(query, results)
292
+ return results
293
+
294
+ def _format_results(self, data: Dict) -> List[SearchResult]:
295
+ results = []
296
+
297
+ search_results = data.get("results", [])
298
+
299
+ for i, result in enumerate(search_results[: self.config.max_results]):
300
+ results.append(
301
+ SearchResult(
302
+ title=result.get("title", ""),
303
+ url=result.get("url", ""),
304
+ snippet=result.get("snippet", ""),
305
+ score=result.get("score", 0.85 - (i * 0.05)),
306
+ provider="youcom",
307
+ )
308
+ )
309
+
310
+ return results
311
+
312
+
313
+ class WebSearchService:
314
+ """
315
+ Unified web search service with provider selection
316
+ """
317
+
318
+ def __init__(self):
319
+ # Default to Tavily (RAG-optimized)
320
+ self.default_provider = SearchProvider.TAVILY
321
+ self._providers: Dict[SearchProvider, BaseSearchProvider] = {}
322
+ self._initialize_providers()
323
+
324
+ def _initialize_providers(self):
325
+ """Initialize available search providers"""
326
+
327
+ # Tavily (Recommended for RAG)
328
+ tavily_key = getattr(settings, "TAVILY_API_KEY", None) or getattr(
329
+ settings, "SERPER_API_KEY", None
330
+ )
331
+ if tavily_key:
332
+ self._providers[SearchProvider.TAVILY] = TavilySearchProvider(
333
+ tavily_key, max_results=5
334
+ )
335
+
336
+ # Serper
337
+ serper_key = settings.SERPER_API_KEY
338
+ if serper_key:
339
+ self._providers[SearchProvider.SERPER] = SerperSearchProvider(
340
+ serper_key, max_results=5
341
+ )
342
+
343
+ def set_provider(self, provider: SearchProvider):
344
+ """Change the active search provider"""
345
+ if provider in self._providers:
346
+ self.default_provider = provider
347
+
348
+ async def search(
349
+ self,
350
+ query: str,
351
+ max_results: int = 5,
352
+ provider: Optional[SearchProvider] = None,
353
+ ) -> List[Dict]:
354
+ """
355
+ Search using specified or default provider
356
+
357
+ Args:
358
+ query: Search query
359
+ max_results: Maximum number of results
360
+ provider: Specific provider to use (optional)
361
+
362
+ Returns:
363
+ List of search results
364
+ """
365
+ search_provider = provider or self.default_provider
366
+
367
+ if search_provider not in self._providers:
368
+ # Fallback to any available provider
369
+ if self._providers:
370
+ search_provider = list(self._providers.keys())[0]
371
+ else:
372
+ return []
373
+
374
+ provider = self._providers[search_provider]
375
+ results = await provider.search(query)
376
+
377
+ # Convert to dict format
378
+ return [
379
+ {
380
+ "title": r.title,
381
+ "url": r.url,
382
+ "snippet": r.snippet,
383
+ "score": r.score,
384
+ }
385
+ for r in results
386
+ ]
387
+
388
+ def get_available_providers(self) -> List[str]:
389
+ """Get list of available providers"""
390
+ return [p.value for p in self._providers.keys()]
391
+
392
+ def get_current_provider(self) -> str:
393
+ """Get current provider"""
394
+ return self.default_provider.value
395
+
396
+
397
+ # Factory function to create service
398
+ def create_web_search_service(provider: str = "tavily") -> WebSearchService:
399
+ """Create web search service with specified provider"""
400
+ service = WebSearchService()
401
+
402
+ provider_map = {
403
+ "tavily": SearchProvider.TAVILY,
404
+ "serper": SearchProvider.SERPER,
405
+ "brave": SearchProvider.BRAVE,
406
+ "youcom": SearchProvider.YOUCOM,
407
+ }
408
+
409
+ if provider.lower() in provider_map:
410
+ service.set_provider(provider_map[provider.lower()])
411
+
412
+ return service
413
+
414
+
415
+ # Default instance
416
+ web_search_service = WebSearchService()
backend/app/utils/__init__.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ from .chunking import intelligent_chunk, create_chunk_metadata
2
+ from .rate_limiter import RateLimiter, RequestCache, RateLimitedWebSearch
3
+
4
+ __all__ = [
5
+ "intelligent_chunk",
6
+ "create_chunk_metadata",
7
+ "RateLimiter",
8
+ "RequestCache",
9
+ "RateLimitedWebSearch",
10
+ ]
backend/app/utils/chunking.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ from typing import List, Optional, Dict, Any
3
+
4
+
5
+ def intelligent_chunk(text: str, chunk_size: int = 512, overlap: int = 50) -> List[str]:
6
+ sentences = re.split(r"(?<=[.!?])\s+", text)
7
+
8
+ chunks = []
9
+ current_chunk = []
10
+ current_length = 0
11
+
12
+ for sentence in sentences:
13
+ sentence_length = len(sentence.split())
14
+
15
+ if current_length + sentence_length > chunk_size and current_chunk:
16
+ chunks.append(" ".join(current_chunk))
17
+
18
+ overlap_sentences = (
19
+ current_chunk[-overlap:]
20
+ if len(current_chunk) > overlap
21
+ else current_chunk
22
+ )
23
+ current_chunk = overlap_sentences + [sentence]
24
+ current_length = sum(len(s.split()) for s in current_chunk)
25
+ else:
26
+ current_chunk.append(sentence)
27
+ current_length += sentence_length
28
+
29
+ if current_chunk:
30
+ chunks.append(" ".join(current_chunk))
31
+
32
+ return chunks
33
+
34
+
35
+ def create_chunk_metadata(
36
+ document_id: str,
37
+ chunk_index: int,
38
+ page_number: Optional[int] = None,
39
+ section: Optional[str] = None,
40
+ total_chunks: int = 0,
41
+ ) -> Dict[str, Any]:
42
+ return {
43
+ "document_id": document_id,
44
+ "chunk_index": chunk_index,
45
+ "page_number": page_number,
46
+ "section": section,
47
+ "total_chunks": total_chunks,
48
+ }
backend/app/utils/rate_limiter.py ADDED
@@ -0,0 +1,199 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Rate Limiting Handler for Web Search API
3
+
4
+ This module provides:
5
+ 1. Exponential backoff retry logic
6
+ 2. Request caching
7
+ 3. Rate limit detection and handling
8
+ 4. Request queuing
9
+ """
10
+
11
+ import asyncio
12
+ import time
13
+ from typing import List, Dict, Optional, Callable
14
+ from dataclasses import dataclass
15
+ from datetime import datetime, timedelta
16
+ import threading
17
+
18
+
19
+ @dataclass
20
+ class RateLimitConfig:
21
+ max_requests_per_minute: int = 30
22
+ max_requests_per_hour: int = 500
23
+ retry_base_delay: float = 2.0
24
+ max_retry_attempts: int = 3
25
+ cache_ttl_seconds: int = 300
26
+
27
+
28
+ class RateLimiter:
29
+ """Token bucket rate limiter"""
30
+
31
+ def __init__(self, config: RateLimitConfig = None):
32
+ self.config = config or RateLimitConfig()
33
+ self.tokens = self.config.max_requests_per_minute
34
+ self.last_update = datetime.now()
35
+ self.lock = threading.Lock()
36
+
37
+ def acquire(self) -> bool:
38
+ with self.lock:
39
+ now = datetime.now()
40
+ elapsed = (now - self.last_update).total_seconds()
41
+
42
+ # Refill tokens
43
+ tokens_to_add = elapsed * (self.config.max_requests_per_minute / 60)
44
+ self.tokens = min(
45
+ self.config.max_requests_per_minute, self.tokens + tokens_to_add
46
+ )
47
+
48
+ if self.tokens >= 1:
49
+ self.tokens -= 1
50
+ self.last_update = now
51
+ return True
52
+
53
+ return False
54
+
55
+ def wait_for_token(self, timeout: float = 60) -> bool:
56
+ start = time.time()
57
+ while time.time() - start < timeout:
58
+ if self.acquire():
59
+ return True
60
+ time.sleep(0.1)
61
+ return False
62
+
63
+
64
+ class RequestCache:
65
+ """Simple TTL-based cache"""
66
+
67
+ def __init__(self, ttl_seconds: int = 300):
68
+ self.ttl = ttl_seconds
69
+ self._cache: Dict[str, tuple] = {}
70
+ self.lock = threading.Lock()
71
+
72
+ def get(self, key: str) -> Optional[List[Dict]]:
73
+ with self.lock:
74
+ if key in self._cache:
75
+ data, timestamp = self._cache[key]
76
+ if (datetime.now() - timestamp).total_seconds() < self.ttl:
77
+ return data
78
+ del self._cache[key]
79
+ return None
80
+
81
+ def set(self, key: str, data: List[Dict]):
82
+ with self.lock:
83
+ self._cache[key] = (data, datetime.now())
84
+
85
+ # Clean old entries if cache is too large
86
+ if len(self._cache) > 100:
87
+ oldest = sorted(self._cache.items(), key=lambda x: x[1][1])[:10]
88
+ for k, _ in oldest:
89
+ del self._cache[k]
90
+
91
+ def clear(self):
92
+ with self.lock:
93
+ self._cache.clear()
94
+
95
+
96
+ class RateLimitedWebSearch:
97
+ """Web search with rate limiting and caching"""
98
+
99
+ def __init__(self, search_func: Callable, config: RateLimitConfig = None):
100
+ self.config = config or RateLimitConfig()
101
+ self.rate_limiter = RateLimiter(self.config)
102
+ self.cache = RequestCache(self.config.cache_ttl_seconds)
103
+ self.search_func = search_func
104
+
105
+ async def search(
106
+ self, query: str, max_results: int = 5, use_cache: bool = True
107
+ ) -> List[Dict]:
108
+ cache_key = f"{query}_{max_results}"
109
+
110
+ # Check cache
111
+ if use_cache:
112
+ cached = self.cache.get(cache_key)
113
+ if cached:
114
+ return cached
115
+
116
+ # Wait for rate limit
117
+ if not self.rate_limiter.wait_for_token():
118
+ return []
119
+
120
+ # Retry with exponential backoff
121
+ for attempt in range(self.config.max_retry_attempts):
122
+ try:
123
+ results = await self.search_func(query, max_results)
124
+
125
+ if results:
126
+ if use_cache:
127
+ self.cache.set(cache_key, results)
128
+ return results
129
+
130
+ # If empty results, might be rate limited
131
+ await asyncio.sleep(self.config.retry_base_delay * (attempt + 1))
132
+
133
+ except Exception as e:
134
+ if attempt < self.config.max_retry_attempts - 1:
135
+ await asyncio.sleep(self.config.retry_base_delay * (2**attempt))
136
+ continue
137
+
138
+ return []
139
+
140
+ def get_cache_stats(self) -> Dict:
141
+ return {
142
+ "cached_items": len(self.cache._cache),
143
+ "ttl_seconds": self.config.cache_ttl_seconds,
144
+ "rate_limit_per_minute": self.config.max_requests_per_minute,
145
+ }
146
+
147
+ def clear_cache(self):
148
+ self.cache.clear()
149
+
150
+
151
+ # Example usage with Serper API
152
+ async def serper_search(query: str, max_results: int = 5) -> List[Dict]:
153
+ """Example Serper API search function"""
154
+ import aiohttp
155
+
156
+ api_key = "92dc65b1fe92ca96ece7d0b02729f2d29f68f4fda5e31908e8d447a808e9797f"
157
+ url = "https://serpapi.com/search"
158
+
159
+ params = {
160
+ "engine": "google",
161
+ "q": query,
162
+ "api_key": api_key,
163
+ "num": max_results,
164
+ }
165
+
166
+ async with aiohttp.ClientSession() as session:
167
+ async with session.get(url, params=params) as response:
168
+ if response.status == 200:
169
+ data = await response.json()
170
+ results = data.get("organic_results", [])
171
+ return [
172
+ {
173
+ "title": r.get("title", ""),
174
+ "url": r.get("link", ""),
175
+ "snippet": r.get("snippet", ""),
176
+ "score": 0.8,
177
+ }
178
+ for r in results[:max_results]
179
+ ]
180
+ return []
181
+
182
+
183
+ # Create rate-limited instance
184
+ rate_limited_search = RateLimitedWebSearch(serper_search)
185
+
186
+
187
+ if __name__ == "__main__":
188
+
189
+ async def test():
190
+ # Test the rate-limited search
191
+ results = await rate_limited_search.search("Python programming", 3)
192
+ print(f"Found {len(results)} results")
193
+ print(f"Cache stats: {rate_limited_search.get_cache_stats()}")
194
+
195
+ # Test cache hit
196
+ results2 = await rate_limited_search.search("Python programming", 3)
197
+ print(f"Cache hit: {len(results2)} results")
198
+
199
+ asyncio.run(test())
backend/reproduce_query.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import time
3
+ import sys
4
+ import os
5
+
6
+ # Configuration
7
+ BASE_URL = "http://localhost:8000/api/v1"
8
+ UPLOAD_URL = f"{BASE_URL}/upload/"
9
+ QUERY_URL = f"{BASE_URL}/query/"
10
+ PDF_FILE = "test.pdf"
11
+
12
+ def create_dummy_pdf():
13
+ from reportlab.pdfgen import canvas
14
+ c = canvas.Canvas(PDF_FILE)
15
+ c.drawString(100, 750, "This is a test PDF document for RAG system debugging.")
16
+ c.drawString(100, 730, "The secret code is: ALPHA-BETA-GAMMA.")
17
+ c.save()
18
+ print(f"Created dummy PDF: {PDF_FILE}")
19
+
20
+ def upload_pdf():
21
+ print(f"Uploading {PDF_FILE}...")
22
+ with open(PDF_FILE, "rb") as f:
23
+ files = {"file": f}
24
+ try:
25
+ response = requests.post(UPLOAD_URL, files=files, timeout=10)
26
+ if response.status_code == 200:
27
+ print("Upload success:", response.json())
28
+ return response.json()["document_id"]
29
+ else:
30
+ print(f"Upload failed: {response.status_code} - {response.text}")
31
+ return None
32
+ except Exception as e:
33
+ print(f"Upload error: {e}")
34
+ return None
35
+
36
+ def query_pdf(document_id, query="What is the secret code?"):
37
+ print(f"Querying: '{query}' for document {document_id}")
38
+ payload = {
39
+ "query": query,
40
+ "mode": "pdf",
41
+ "document_ids": [document_id] if document_id else [],
42
+ "top_k": 3
43
+ }
44
+
45
+ try:
46
+ response = requests.post(QUERY_URL, json=payload, timeout=30)
47
+ print(f"Status Code: {response.status_code}")
48
+ print(f"Response: {response.text}")
49
+
50
+ if response.status_code == 200:
51
+ data = response.json()
52
+ print(f"Answer: {data.get('answer')}")
53
+ print(f"Sources: {len(data.get('sources', []))}")
54
+ if data.get('answer') and "ALPHA-BETA-GAMMA" in data.get('answer'):
55
+ print("SUCCESS: Retrieved correct answer.")
56
+ return True
57
+ else:
58
+ print("FAILURE: Answer incorrect or missing.")
59
+ return False
60
+ return False
61
+ except Exception as e:
62
+ print(f"Query Error: {e}")
63
+ return False
64
+
65
+ if __name__ == "__main__":
66
+ if not os.path.exists(PDF_FILE):
67
+ create_dummy_pdf()
68
+
69
+ doc_id = upload_pdf()
70
+ if doc_id:
71
+ # Wait a bit for indexing if async? (Though implementation seemed synchronous await)
72
+ time.sleep(2)
73
+ success = query_pdf(doc_id)
74
+ if success:
75
+ sys.exit(0)
76
+ else:
77
+ sys.exit(1)
78
+ else:
79
+ sys.exit(1)
backend/reproduce_upload.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import requests
3
+ import os
4
+
5
+ # Create a dummy PDF file
6
+ with open("test.pdf", "wb") as f:
7
+ f.write(b"%PDF-1.4\n1 0 obj\n<<\n/Type /Catalog\n/Pages 2 0 R\n>>\nendobj\n2 0 obj\n<<\n/Type /Pages\n/Kids [3 0 R]\n/Count 1\n>>\nendobj\n3 0 obj\n<<\n/Type /Page\n/Parent 2 0 R\n/MediaBox [0 0 612 792]\n/Resources <<\n/Font <<\n/F1 4 0 R\n>>\n>>\n/Contents 5 0 R\n>>\nendobj\n4 0 obj\n<<\n/Type /Font\n/Subtype /Type1\n/BaseFont /Helvetica\n>>\nendobj\n5 0 obj\n<<\n/Length 44\n>>\nstream\nBT\n/F1 24 Tf\n100 100 Td\n(Hello World) Tj\nET\nendstream\nendobj\nxref\n0 6\n0000000000 65535 f\n0000000010 00000 n\n0000000060 00000 n\n0000000117 00000 n\n0000000216 00000 n\n0000000303 00000 n\ntrailer\n<<\n/Size 6\n/Root 1 0 R\n>>\nstartxref\n397\n%%EOF")
8
+
9
+ url = "http://localhost:8000/api/v1/upload/"
10
+ files = {'file': ('test.pdf', open('test.pdf', 'rb'), 'application/pdf')}
11
+
12
+ try:
13
+ response = requests.post(url, files=files)
14
+ print(f"Status Code: {response.status_code}")
15
+ print(f"Response: {response.text}")
16
+ except Exception as e:
17
+ print(f"Error: {e}")
18
+ finally:
19
+ if os.path.exists("test.pdf"):
20
+ os.remove("test.pdf")
backend/requirements.txt ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi==0.109.0
2
+ uvicorn[standard]==0.27.0
3
+ python-dotenv==1.0.0
4
+ pydantic==2.5.0
5
+ pydantic-settings==2.1.0
6
+ groq==0.4.0
7
+ pypdf==3.17.0
8
+ pdfplumber==0.11.0
9
+ requests==2.31.0
10
+ python-multipart==0.0.6
11
+ aiofiles==23.2.1
12
+ chromadb==0.4.24
13
+ numpy<2.0.0
14
+ sentence-transformers>=3.0.0
15
+ aiohttp>=3.9.0
16
+
17
+ # OCR Dependencies for Scanned PDFs
18
+ pytesseract==0.3.10
19
+ pdf2image==1.17.0
20
+ Pillow==10.3.0
frontend/.env.example ADDED
@@ -0,0 +1 @@
 
 
1
+ VITE_API_URL=http://localhost:8000/api/v1
frontend/.env.local ADDED
@@ -0,0 +1 @@
 
 
1
+ VITE_API_URL=http://localhost:8001/api/v1
frontend/README.md ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # RAG System Frontend
2
+
3
+ React + TypeScript frontend for the Production-Grade RAG System.
4
+
5
+ ## Tech Stack
6
+
7
+ - **Framework**: React 18+ with TypeScript
8
+ - **Styling**: Tailwind CSS
9
+ - **State Management**: React Context API
10
+ - **HTTP Client**: Axios
11
+ - **File Upload**: react-dropzone
12
+ - **Icons**: lucide-react
13
+ - **Notifications**: react-hot-toast
14
+
15
+ ## Project Structure
16
+
17
+ ```
18
+ src/
19
+ ├── components/
20
+ │ ├── layout/
21
+ │ │ ├── Header.tsx
22
+ │ │ ├── Sidebar.tsx
23
+ │ │ └── MainContent.tsx
24
+ │ ├── documents/
25
+ │ │ ├── FileUpload.tsx
26
+ │ │ ├── DocumentList.tsx
27
+ │ │ └── DocumentCard.tsx
28
+ │ ├── query/
29
+ │ │ ├── QueryInput.tsx
30
+ │ │ └── ModeSelector.tsx
31
+ │ ├── results/
32
+ │ │ ├── ResultsDisplay.tsx
33
+ │ │ ├── AnswerCard.tsx
34
+ │ │ ├── ConfidenceIndicator.tsx
35
+ │ │ ├── SourcesList.tsx
36
+ │ │ └── SourceCard.tsx
37
+ │ ├── common/
38
+ │ │ └── EmptyState.tsx
39
+ │ └── settings/
40
+ │ └── SettingsModal.tsx
41
+ ├── services/
42
+ │ └── api.ts
43
+ ├── hooks/
44
+ ├── context/
45
+ │ └── AppContext.tsx
46
+ ├── types/
47
+ │ └── index.ts
48
+ ├── App.tsx
49
+ └── index.tsx
50
+ ```
51
+
52
+ ## Installation
53
+
54
+ ```bash
55
+ npm install
56
+ ```
57
+
58
+ ## Development
59
+
60
+ ```bash
61
+ npm run dev
62
+ ```
63
+
64
+ ## Build
65
+
66
+ ```bash
67
+ npm run build
68
+ ```
69
+
70
+ ## Configuration
71
+
72
+ Copy `.env.example` to `.env` and configure:
73
+
74
+ ```
75
+ VITE_API_URL=http://localhost:8000/api/v1
76
+ ```
77
+
78
+ ## Features
79
+
80
+ - PDF document upload with drag-and-drop
81
+ - Document management (list, select, delete)
82
+ - Multiple query modes (Web, PDF, Hybrid, Restricted)
83
+ - Real-time confidence scoring
84
+ - Source citations and attribution
85
+ - Dark/light theme
86
+ - Responsive design
87
+ - Keyboard shortcuts (Enter to submit)
frontend/index.html ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8" />
5
+ <link rel="icon" type="image/svg+xml" href="/vite.svg" />
6
+ <meta name="viewport" content="width=device-width, initial-scale=1.0" />
7
+ <title>RAG System - Production Ready</title>
8
+ </head>
9
+ <body>
10
+ <div id="root"></div>
11
+ <script type="module" src="/src/main.tsx"></script>
12
+ </body>
13
+ </html>
frontend/package-lock.json ADDED
The diff for this file is too large to render. See raw diff
 
frontend/package.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "rag-frontend",
3
+ "version": "1.0.0",
4
+ "private": true,
5
+ "dependencies": {
6
+ "react": "^18.2.0",
7
+ "react-dom": "^18.2.0",
8
+ "axios": "^1.6.0",
9
+ "react-dropzone": "^14.2.0",
10
+ "lucide-react": "^0.294.0",
11
+ "react-hot-toast": "^2.4.1",
12
+ "clsx": "^2.0.0"
13
+ },
14
+ "devDependencies": {
15
+ "@types/react": "^18.2.0",
16
+ "@types/react-dom": "^18.2.0",
17
+ "@vitejs/plugin-react": "^4.2.0",
18
+ "typescript": "^5.3.0",
19
+ "vite": "^5.0.0",
20
+ "tailwindcss": "^3.3.0",
21
+ "postcss": "^8.4.0",
22
+ "autoprefixer": "^10.4.0"
23
+ },
24
+ "scripts": {
25
+ "dev": "vite",
26
+ "build": "tsc && vite build",
27
+ "preview": "vite preview"
28
+ }
29
+ }
frontend/postcss.config.js ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ module.exports = {
2
+ plugins: {
3
+ tailwindcss: {},
4
+ autoprefixer: {},
5
+ },
6
+ }
frontend/public/vite.svg ADDED
frontend/src/App.tsx ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { AppProvider } from './context/AppContext';
2
+ import { Header } from './components/layout/Header';
3
+ import { Sidebar } from './components/layout/Sidebar';
4
+ import { MainContent } from './components/layout/MainContent';
5
+ import { SettingsModal } from './components/settings/SettingsModal';
6
+
7
+ function App() {
8
+ return (
9
+ <AppProvider>
10
+ <div className="min-h-screen bg-gray-50 dark:bg-gray-900">
11
+ <Header />
12
+ <Sidebar />
13
+ <MainContent />
14
+ <SettingsModal />
15
+ </div>
16
+ </AppProvider>
17
+ );
18
+ }
19
+
20
+ export default App;
frontend/src/components/common/EmptyState.tsx ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import React from 'react';
2
+ import {
3
+ Brain,
4
+ FileText,
5
+ Search,
6
+ MessageCircle,
7
+ ArrowRight
8
+ } from 'lucide-react';
9
+
10
+ export const EmptyState: React.FC = () => {
11
+ const features = [
12
+ {
13
+ icon: <FileText className="w-5 h-5" />,
14
+ title: 'Upload Documents',
15
+ description: 'Drag and drop PDF files to add them to your knowledge base',
16
+ },
17
+ {
18
+ icon: <Search className="w-5 h-5" />,
19
+ title: 'Smart Search',
20
+ description: 'Ask questions and get answers from your documents and the web',
21
+ },
22
+ {
23
+ icon: <Brain className="w-5 h-5" />,
24
+ title: 'AI Powered',
25
+ description: 'Powered by Groq LLM for fast, accurate responses',
26
+ },
27
+ {
28
+ icon: <MessageCircle className="w-5 h-5" />,
29
+ title: 'Source Citations',
30
+ description: 'Every answer includes sources so you can verify the information',
31
+ },
32
+ ];
33
+
34
+ return (
35
+ <div className="text-center py-16">
36
+ <div className="inline-flex items-center justify-center w-20 h-20 rounded-full bg-primary-100 dark:bg-primary-900/30 mb-6">
37
+ <Brain className="w-10 h-10 text-primary-600 dark:text-primary-400" />
38
+ </div>
39
+
40
+ <h2 className="text-2xl font-bold text-gray-900 dark:text-white mb-2">
41
+ Welcome to RAG System
42
+ </h2>
43
+
44
+ <p className="text-gray-600 dark:text-gray-400 mb-8 max-w-md mx-auto">
45
+ Upload documents and ask questions to get AI-powered answers with source citations.
46
+ </p>
47
+
48
+ <div className="grid grid-cols-1 md:grid-cols-2 gap-4 max-w-2xl mx-auto">
49
+ {features.map((feature, index) => (
50
+ <div
51
+ key={index}
52
+ className="flex items-start gap-3 p-4 bg-white dark:bg-gray-800 rounded-xl border border-gray-200 dark:border-gray-700 text-left"
53
+ >
54
+ <div className="flex-shrink-0 w-10 h-10 rounded-lg bg-primary-100 dark:bg-primary-900/30 flex items-center justify-center">
55
+ <div className="text-primary-600 dark:text-primary-400">
56
+ {feature.icon}
57
+ </div>
58
+ </div>
59
+ <div>
60
+ <h3 className="font-medium text-gray-900 dark:text-white">
61
+ {feature.title}
62
+ </h3>
63
+ <p className="text-sm text-gray-500 dark:text-gray-500 mt-1">
64
+ {feature.description}
65
+ </p>
66
+ </div>
67
+ </div>
68
+ ))}
69
+ </div>
70
+
71
+ <div className="mt-8 flex items-center justify-center gap-2 text-sm text-gray-500">
72
+ <span>Start by uploading a document</span>
73
+ <ArrowRight className="w-4 h-4" />
74
+ </div>
75
+ </div>
76
+ );
77
+ };
frontend/src/components/documents/DocumentCard.tsx ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import React from 'react';
2
+ import { FileText, Trash2, CheckCircle, AlertCircle } from 'lucide-react';
3
+ import type { Document } from '../../types';
4
+ import { useApp } from '../../context/AppContext';
5
+
6
+ interface DocumentCardProps {
7
+ document: Document;
8
+ }
9
+
10
+ export const DocumentCard: React.FC<DocumentCardProps> = ({ document }) => {
11
+ const { state, dispatch, handleDeleteDocument } = useApp();
12
+ const isSelected = state.selectedDocuments.includes(document.id);
13
+
14
+ const handleToggle = () => {
15
+ dispatch({ type: 'TOGGLE_DOCUMENT_SELECTION', payload: document.id });
16
+ };
17
+
18
+ const handleDelete = async (e: React.MouseEvent) => {
19
+ e.stopPropagation();
20
+ await handleDeleteDocument(document.id);
21
+ };
22
+
23
+ const formatDate = (dateString: string) => {
24
+ const date = new Date(dateString);
25
+ return date.toLocaleDateString('en-US', {
26
+ month: 'short',
27
+ day: 'numeric',
28
+ year: 'numeric',
29
+ });
30
+ };
31
+
32
+ return (
33
+ <div
34
+ onClick={handleToggle}
35
+ className={`
36
+ p-3 rounded-lg border cursor-pointer transition-all duration-200
37
+ ${isSelected
38
+ ? 'border-primary-500 bg-primary-50 dark:bg-primary-900/20'
39
+ : 'border-gray-200 dark:border-gray-700 hover:border-gray-300 dark:hover:border-gray-600'
40
+ }
41
+ `}
42
+ >
43
+ <div className="flex items-start gap-3">
44
+ <div className={`
45
+ w-8 h-8 rounded-lg flex items-center justify-center flex-shrink-0
46
+ ${isSelected
47
+ ? 'bg-primary-100 dark:bg-primary-900/40'
48
+ : 'bg-gray-100 dark:bg-gray-700'
49
+ }
50
+ `}>
51
+ {isSelected ? (
52
+ <CheckCircle className="w-5 h-5 text-primary-600 dark:text-primary-400" />
53
+ ) : (
54
+ <FileText className="w-5 h-5 text-gray-500 dark:text-gray-400" />
55
+ )}
56
+ </div>
57
+
58
+ <div className="flex-1 min-w-0">
59
+ <p className="text-sm font-medium text-gray-900 dark:text-white truncate">
60
+ {document.filename}
61
+ </p>
62
+ <div className="flex items-center gap-2 mt-1">
63
+ <span className="text-xs text-gray-500 dark:text-gray-500">
64
+ {formatDate(document.uploadDate)}
65
+ </span>
66
+ <span className="text-xs px-1.5 py-0.5 bg-gray-100 dark:bg-gray-700 text-gray-600 dark:text-gray-400 rounded">
67
+ {document.chunkCount} chunks
68
+ </span>
69
+ </div>
70
+ </div>
71
+
72
+ <button
73
+ onClick={handleDelete}
74
+ className="p-1 hover:bg-red-100 dark:hover:bg-red-900/20 rounded transition-colors"
75
+ >
76
+ <Trash2 className="w-4 h-4 text-gray-400 hover:text-red-500" />
77
+ </button>
78
+ </div>
79
+
80
+ {document.status !== 'ready' && (
81
+ <div className="flex items-center gap-1 mt-2 text-xs text-amber-600 dark:text-amber-400">
82
+ {document.status === 'processing' ? (
83
+ <>
84
+ <AlertCircle className="w-3 h-3" />
85
+ <span>Processing...</span>
86
+ </>
87
+ ) : (
88
+ <>
89
+ <AlertCircle className="w-3 h-3" />
90
+ <span>Error processing</span>
91
+ </>
92
+ )}
93
+ </div>
94
+ )}
95
+ </div>
96
+ );
97
+ };
frontend/src/components/documents/DocumentList.tsx ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import React from 'react';
2
+ import { FileText, Search } from 'lucide-react';
3
+ import { useApp } from '../../context/AppContext';
4
+ import { DocumentCard } from './DocumentCard';
5
+
6
+ export const DocumentList: React.FC = () => {
7
+ const { state } = useApp();
8
+
9
+ return (
10
+ <div className="space-y-4">
11
+ <div className="flex items-center justify-between">
12
+ <h3 className="text-sm font-semibold text-gray-900 dark:text-white uppercase tracking-wide">
13
+ Documents
14
+ </h3>
15
+ <span className="text-xs text-gray-500 dark:text-gray-500">
16
+ {state.documents.length} file{state.documents.length !== 1 ? 's' : ''}
17
+ </span>
18
+ </div>
19
+
20
+ <div className="relative">
21
+ <Search className="absolute left-3 top-1/2 -translate-y-1/2 w-4 h-4 text-gray-400" />
22
+ <input
23
+ type="text"
24
+ placeholder="Search documents..."
25
+ className="w-full pl-10 pr-4 py-2 text-sm border border-gray-300 dark:border-gray-600 rounded-lg bg-white dark:bg-gray-700 text-gray-900 dark:text-white placeholder-gray-500 focus:outline-none focus:ring-2 focus:ring-primary-500"
26
+ />
27
+ </div>
28
+
29
+ <div className="space-y-2 max-h-96 overflow-y-auto scrollbar-thin">
30
+ {state.documents.length === 0 ? (
31
+ <div className="text-center py-8">
32
+ <FileText className="w-12 h-12 mx-auto text-gray-300 dark:text-gray-600 mb-3" />
33
+ <p className="text-sm text-gray-500 dark:text-gray-500">
34
+ No documents uploaded
35
+ </p>
36
+ <p className="text-xs text-gray-400 dark:text-gray-600 mt-1">
37
+ Upload PDFs to get started
38
+ </p>
39
+ </div>
40
+ ) : (
41
+ state.documents.map(doc => (
42
+ <DocumentCard key={doc.id} document={doc} />
43
+ ))
44
+ )}
45
+ </div>
46
+ </div>
47
+ );
48
+ };
frontend/src/components/documents/FileUpload.tsx ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import React, { useCallback } from 'react';
2
+ import { useDropzone } from 'react-dropzone';
3
+ import { Upload, FileText, Loader2 } from 'lucide-react';
4
+ import { useApp } from '../../context/AppContext';
5
+
6
+ export const FileUpload: React.FC = () => {
7
+ const { state, handleUpload } = useApp();
8
+
9
+ const onDrop = useCallback(
10
+ (acceptedFiles: File[]) => {
11
+ const pdfFile = acceptedFiles.find(file => file.type === 'application/pdf');
12
+ if (pdfFile) {
13
+ handleUpload(pdfFile);
14
+ }
15
+ },
16
+ [handleUpload]
17
+ );
18
+
19
+ const { getRootProps, getInputProps, isDragActive } = useDropzone({
20
+ onDrop,
21
+ accept: {
22
+ 'application/pdf': ['.pdf'],
23
+ },
24
+ maxFiles: 1,
25
+ });
26
+
27
+ return (
28
+ <div className="space-y-4">
29
+ <h3 className="text-sm font-semibold text-gray-900 dark:text-white uppercase tracking-wide">
30
+ Upload Documents
31
+ </h3>
32
+
33
+ <div
34
+ {...getRootProps()}
35
+ className={`
36
+ border-2 border-dashed rounded-xl p-6 text-center cursor-pointer transition-all duration-200
37
+ ${isDragActive
38
+ ? 'border-primary-500 bg-primary-50 dark:bg-primary-900/20'
39
+ : 'border-gray-300 dark:border-gray-600 hover:border-primary-400 hover:bg-gray-50 dark:hover:bg-gray-700/50'
40
+ }
41
+ `}
42
+ >
43
+ <input {...getInputProps()} />
44
+
45
+ {state.isUploading ? (
46
+ <div className="flex flex-col items-center gap-2">
47
+ <Loader2 className="w-10 h-10 text-primary-500 animate-spin" />
48
+ <p className="text-sm text-gray-600 dark:text-gray-400">
49
+ Processing... {state.uploadProgress}%
50
+ </p>
51
+ <div className="w-full max-w-xs bg-gray-200 dark:bg-gray-700 rounded-full h-2">
52
+ <div
53
+ className="bg-primary-500 h-2 rounded-full transition-all duration-300"
54
+ style={{ width: `${state.uploadProgress}%` }}
55
+ />
56
+ </div>
57
+ </div>
58
+ ) : isDragActive ? (
59
+ <div className="flex flex-col items-center gap-2">
60
+ <FileText className="w-10 h-10 text-primary-500" />
61
+ <p className="text-sm font-medium text-primary-600 dark:text-primary-400">
62
+ Drop your PDF here
63
+ </p>
64
+ </div>
65
+ ) : (
66
+ <div className="flex flex-col items-center gap-2">
67
+ <Upload className="w-10 h-10 text-gray-400" />
68
+ <p className="text-sm font-medium text-gray-600 dark:text-gray-400">
69
+ Drag & drop a PDF
70
+ </p>
71
+ <p className="text-xs text-gray-500 dark:text-gray-500">
72
+ or click to browse
73
+ </p>
74
+ </div>
75
+ )}
76
+ </div>
77
+
78
+ <p className="text-xs text-gray-500 dark:text-gray-500 text-center">
79
+ Supports PDF files up to 10MB
80
+ </p>
81
+ </div>
82
+ );
83
+ };
frontend/src/components/layout/Header.tsx ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import React from 'react';
2
+ import {
3
+ Brain,
4
+ Settings,
5
+ Menu,
6
+ Sun,
7
+ Moon
8
+ } from 'lucide-react';
9
+ import { useApp } from '../../context/AppContext';
10
+ import { ModeSelector } from '../query/ModeSelector';
11
+
12
+ export const Header: React.FC = () => {
13
+ const { state, dispatch, toggleTheme } = useApp();
14
+
15
+ return (
16
+ <header className="fixed top-0 left-0 right-0 h-16 bg-white dark:bg-gray-800 border-b border-gray-200 dark:border-gray-700 z-50 flex items-center justify-between px-4">
17
+ <div className="flex items-center gap-4">
18
+ <button
19
+ onClick={() => dispatch({ type: 'TOGGLE_SIDEBAR' })}
20
+ className="p-2 hover:bg-gray-100 dark:hover:bg-gray-700 rounded-lg lg:hidden"
21
+ >
22
+ <Menu className="w-5 h-5 text-gray-600 dark:text-gray-300" />
23
+ </button>
24
+
25
+ <div className="flex items-center gap-2">
26
+ <Brain className="w-8 h-8 text-primary-600" />
27
+ <span className="text-xl font-bold text-gray-900 dark:text-white">
28
+ RAG System
29
+ </span>
30
+ </div>
31
+ </div>
32
+
33
+ <div className="flex items-center gap-4">
34
+ <ModeSelector />
35
+
36
+ <button
37
+ onClick={toggleTheme}
38
+ className="p-2 hover:bg-gray-100 dark:hover:bg-gray-700 rounded-lg transition-colors"
39
+ >
40
+ {state.theme === 'light' ? (
41
+ <Moon className="w-5 h-5 text-gray-600" />
42
+ ) : (
43
+ <Sun className="w-5 h-5 text-yellow-500" />
44
+ )}
45
+ </button>
46
+
47
+ <button
48
+ onClick={() => dispatch({ type: 'TOGGLE_SETTINGS' })}
49
+ className="p-2 hover:bg-gray-100 dark:hover:bg-gray-700 rounded-lg transition-colors"
50
+ >
51
+ <Settings className="w-5 h-5 text-gray-600 dark:text-gray-300" />
52
+ </button>
53
+ </div>
54
+ </header>
55
+ );
56
+ };
frontend/src/components/layout/MainContent.tsx ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import React from 'react';
2
+ import { useApp } from '../../context/AppContext';
3
+ import { QueryInput } from '../query/QueryInput';
4
+ import { ResultsDisplay } from '../results/ResultsDisplay';
5
+ import { EmptyState } from '../common/EmptyState';
6
+
7
+ export const MainContent: React.FC = () => {
8
+ const { state } = useApp();
9
+
10
+ return (
11
+ <main className="pt-16 min-h-screen">
12
+ <div className={`flex-1 p-6 ${state.sidebarOpen ? 'ml-80' : 'ml-0'}`}>
13
+ <div className="max-w-4xl mx-auto space-y-6">
14
+ <QueryInput />
15
+
16
+ {state.currentAnswer ? (
17
+ <ResultsDisplay answer={state.currentAnswer} />
18
+ ) : (
19
+ <EmptyState />
20
+ )}
21
+ </div>
22
+ </div>
23
+ </main>
24
+ );
25
+ };
frontend/src/components/layout/Sidebar.tsx ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import React from 'react';
2
+ import { useApp } from '../../context/AppContext';
3
+ import { FileUpload } from '../documents/FileUpload';
4
+ import { DocumentList } from '../documents/DocumentList';
5
+
6
+ export const Sidebar: React.FC = () => {
7
+ const { state } = useApp();
8
+
9
+ if (!state.sidebarOpen) return null;
10
+
11
+ return (
12
+ <aside className="fixed left-0 top-16 bottom-0 w-80 bg-white dark:bg-gray-800 border-r border-gray-200 dark:border-gray-700 flex flex-col z-40">
13
+ <div className="flex-1 overflow-y-auto p-4 space-y-6 scrollbar-thin">
14
+ <FileUpload />
15
+ <DocumentList />
16
+ </div>
17
+ </aside>
18
+ );
19
+ };
frontend/src/components/query/ModeSelector.tsx ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import React from 'react';
2
+ import { Globe, FileText, GitMerge, Lock } from 'lucide-react';
3
+ import { useApp } from '../../context/AppContext';
4
+ import type { QueryMode } from '../../types';
5
+
6
+ const modes: { id: QueryMode; label: string; icon: React.ReactNode; description: string }[] = [
7
+ {
8
+ id: 'web',
9
+ label: 'Web Search',
10
+ icon: <Globe className="w-4 h-4" />,
11
+ description: 'Search the web for information',
12
+ },
13
+ {
14
+ id: 'pdf',
15
+ label: 'PDF Only',
16
+ icon: <FileText className="w-4 h-4" />,
17
+ description: 'Query only uploaded documents',
18
+ },
19
+ {
20
+ id: 'hybrid',
21
+ label: 'Hybrid',
22
+ icon: <GitMerge className="w-4 h-4" />,
23
+ description: 'Combine web and document search',
24
+ },
25
+ {
26
+ id: 'restricted',
27
+ label: 'Restricted',
28
+ icon: <Lock className="w-4 h-4" />,
29
+ description: 'Safe mode with content filtering',
30
+ },
31
+ ];
32
+
33
+ export const ModeSelector: React.FC = () => {
34
+ const { state, dispatch } = useApp();
35
+
36
+ return (
37
+ <div className="flex items-center gap-1 bg-gray-100 dark:bg-gray-800 p-1 rounded-lg">
38
+ {modes.map(mode => (
39
+ <button
40
+ key={mode.id}
41
+ onClick={() => dispatch({ type: 'SET_QUERY_MODE', payload: mode.id })}
42
+ className={`
43
+ mode-tab ${state.queryMode === mode.id ? 'mode-tab-active' : 'mode-tab-inactive'}
44
+ `}
45
+ title={mode.description}
46
+ >
47
+ {mode.icon}
48
+ <span className="hidden sm:inline text-sm">{mode.label}</span>
49
+ </button>
50
+ ))}
51
+ </div>
52
+ );
53
+ };
frontend/src/components/query/QueryInput.tsx ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import React, { useState, useCallback, useRef, useEffect } from 'react';
2
+ import { Send, Sparkles, X } from 'lucide-react';
3
+ import { useApp } from '../../context/AppContext';
4
+
5
+ const sampleQueries = [
6
+ "What is the main topic of my documents?",
7
+ "Summarize the key findings",
8
+ "Extract important dates and events",
9
+ ];
10
+
11
+ export const QueryInput: React.FC = () => {
12
+ const { state, handleQuery, dispatch, clearResults } = useApp();
13
+ const [showSamples, setShowSamples] = useState(true);
14
+ const textareaRef = useRef<HTMLTextAreaElement>(null);
15
+
16
+ const handleKeyDown = useCallback(
17
+ (e: React.KeyboardEvent<HTMLTextAreaElement>) => {
18
+ if (e.key === 'Enter' && !e.shiftKey) {
19
+ e.preventDefault();
20
+ handleQuery();
21
+ }
22
+ },
23
+ [handleQuery]
24
+ );
25
+
26
+ const handleSubmit = useCallback(() => {
27
+ handleQuery();
28
+ }, [handleQuery]);
29
+
30
+ const autoResize = useCallback(() => {
31
+ const textarea = textareaRef.current;
32
+ if (textarea) {
33
+ textarea.style.height = 'auto';
34
+ textarea.style.height = `${Math.min(textarea.scrollHeight, 200)}px`;
35
+ }
36
+ }, []);
37
+
38
+ useEffect(() => {
39
+ autoResize();
40
+ }, [state.currentQuery, autoResize]);
41
+
42
+ const handleClear = () => {
43
+ clearResults();
44
+ setShowSamples(true);
45
+ };
46
+
47
+ return (
48
+ <div className="space-y-4">
49
+ <div className="card p-1">
50
+ <div className="flex items-start gap-2">
51
+ <textarea
52
+ ref={textareaRef}
53
+ value={state.currentQuery}
54
+ onChange={e => {
55
+ dispatch({ type: 'SET_CURRENT_QUERY', payload: e.target.value });
56
+ if (e.target.value && showSamples) {
57
+ setShowSamples(false);
58
+ }
59
+ }}
60
+ onKeyDown={handleKeyDown}
61
+ placeholder="Ask a question about your documents..."
62
+ className="flex-1 min-h-[120px] max-h-[200px] p-4 bg-transparent text-gray-900 dark:text-white placeholder-gray-500 resize-none focus:outline-none"
63
+ disabled={state.isLoading}
64
+ />
65
+
66
+ {state.currentQuery && (
67
+ <button
68
+ onClick={handleClear}
69
+ className="p-2 hover:bg-gray-100 dark:hover:bg-gray-700 rounded-lg transition-colors mt-1"
70
+ >
71
+ <X className="w-4 h-4 text-gray-400" />
72
+ </button>
73
+ )}
74
+ </div>
75
+
76
+ <div className="flex items-center justify-between px-4 pb-4">
77
+ <div className="flex items-center gap-2">
78
+ <Sparkles className="w-4 h-4 text-gray-400" />
79
+ <span className="text-xs text-gray-500 dark:text-gray-500">
80
+ Press Enter to submit, Shift+Enter for new line
81
+ </span>
82
+ </div>
83
+
84
+ <button
85
+ onClick={handleSubmit}
86
+ disabled={!state.currentQuery.trim() || state.isLoading}
87
+ className="btn-primary flex items-center gap-2"
88
+ >
89
+ {state.isLoading ? (
90
+ <>
91
+ <div className="w-4 h-4 border-2 border-white/30 border-t-white rounded-full animate-spin" />
92
+ <span>Processing...</span>
93
+ </>
94
+ ) : (
95
+ <>
96
+ <Send className="w-4 h-4" />
97
+ <span>Submit</span>
98
+ </>
99
+ )}
100
+ </button>
101
+ </div>
102
+ </div>
103
+
104
+ {showSamples && !state.currentQuery && (
105
+ <div className="flex flex-wrap gap-2">
106
+ <span className="text-xs text-gray-500 dark:text-gray-500 py-1">
107
+ Try:
108
+ </span>
109
+ {sampleQueries.map((query, index) => (
110
+ <button
111
+ key={index}
112
+ onClick={() => {
113
+ dispatch({ type: 'SET_CURRENT_QUERY', payload: query });
114
+ setShowSamples(false);
115
+ }}
116
+ className="text-xs px-3 py-1 bg-gray-100 dark:bg-gray-800 text-gray-600 dark:text-gray-400 rounded-full hover:bg-gray-200 dark:hover:bg-gray-700 transition-colors"
117
+ >
118
+ {query}
119
+ </button>
120
+ ))}
121
+ </div>
122
+ )}
123
+ </div>
124
+ );
125
+ };
frontend/src/components/results/AnswerCard.tsx ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import React from 'react';
2
+ import { Clock, Globe, FileText, GitMerge, Lock } from 'lucide-react';
3
+ import type { Answer, QueryMode } from '../../types';
4
+ import { ConfidenceIndicator } from './ConfidenceIndicator';
5
+
6
+ interface AnswerCardProps {
7
+ answer: Answer;
8
+ }
9
+
10
+ const modeIcons: Record<QueryMode, React.ReactNode> = {
11
+ web: <Globe className="w-3 h-3" />,
12
+ pdf: <FileText className="w-3 h-3" />,
13
+ hybrid: <GitMerge className="w-3 h-3" />,
14
+ restricted: <Lock className="w-3 h-3" />,
15
+ };
16
+
17
+ const modeLabels: Record<QueryMode, string> = {
18
+ web: 'Web Search',
19
+ pdf: 'PDF Only',
20
+ hybrid: 'Hybrid',
21
+ restricted: 'Restricted',
22
+ };
23
+
24
+ export const AnswerCard: React.FC<AnswerCardProps> = ({ answer }) => {
25
+ const formatTime = (timestamp: string) => {
26
+ const date = new Date(timestamp);
27
+ return date.toLocaleTimeString('en-US', {
28
+ hour: 'numeric',
29
+ minute: '2-digit',
30
+ });
31
+ };
32
+
33
+ return (
34
+ <div className="card overflow-hidden">
35
+ <div className="border-b border-gray-200 dark:border-gray-700 p-4 bg-gray-50 dark:bg-gray-900/50">
36
+ <p className="text-sm text-gray-600 dark:text-gray-400 mb-2">
37
+ Question:
38
+ </p>
39
+ <p className="text-lg font-medium text-gray-900 dark:text-white">
40
+ {answer.query}
41
+ </p>
42
+ </div>
43
+
44
+ <div className="p-6 space-y-4">
45
+ <div className="flex items-center justify-between">
46
+ <div className="flex items-center gap-2">
47
+ {modeIcons[answer.mode]}
48
+ <span className="text-xs font-medium text-gray-600 dark:text-gray-400 uppercase">
49
+ {modeLabels[answer.mode]}
50
+ </span>
51
+ </div>
52
+
53
+ <div className="flex items-center gap-4">
54
+ <ConfidenceIndicator confidence={answer.confidence} />
55
+ <div className="flex items-center gap-1 text-xs text-gray-500">
56
+ <Clock className="w-3 h-3" />
57
+ <span>{formatTime(answer.timestamp)}</span>
58
+ </div>
59
+ </div>
60
+ </div>
61
+
62
+ <div className="prose prose-gray dark:prose-invert max-w-none">
63
+ <p className="text-gray-800 dark:text-gray-200 leading-relaxed whitespace-pre-wrap">
64
+ {answer.text}
65
+ </p>
66
+ </div>
67
+ </div>
68
+ </div>
69
+ );
70
+ };