manan75 commited on
Commit Β·
f9e2c6d
1
Parent(s): 7773c49
initial commit
Browse files- .gitignore +23 -0
- README.md +232 -9
- app.py +643 -0
- config.py +75 -0
- dev.zip +3 -0
- evaluation/__init__.py +1 -0
- evaluation/judge.py +126 -0
- evaluation/metrics.py +131 -0
- ingestion/__init__.py +1 -0
- ingestion/chunker.py +133 -0
- ingestion/indexer.py +106 -0
- ingestion/loader.py +85 -0
- llm/__init__.py +1 -0
- llm/generator.py +105 -0
- requirements.txt +25 -0
- retrieval/__init__.py +1 -0
- retrieval/query_rewriter.py +103 -0
- retrieval/retriever.py +64 -0
- utils/__init__.py +1 -0
- utils/helpers.py +100 -0
.gitignore
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Python
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.py[cod]
|
| 4 |
+
*.pyo
|
| 5 |
+
venv/
|
| 6 |
+
.venv/
|
| 7 |
+
*.egg-info/
|
| 8 |
+
|
| 9 |
+
# Environment
|
| 10 |
+
.env
|
| 11 |
+
|
| 12 |
+
# Data (generated at runtime)
|
| 13 |
+
data/uploads/
|
| 14 |
+
data/vector_db/
|
| 15 |
+
|
| 16 |
+
# IDE
|
| 17 |
+
.vscode/
|
| 18 |
+
.idea/
|
| 19 |
+
*.swp
|
| 20 |
+
|
| 21 |
+
# OS
|
| 22 |
+
.DS_Store
|
| 23 |
+
Thumbs.db
|
README.md
CHANGED
|
@@ -1,12 +1,235 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
sdk_version: 6.10.0
|
| 8 |
-
app_file: app.py
|
| 9 |
-
pinned: false
|
| 10 |
---
|
| 11 |
|
| 12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# DevDocsAI
|
| 2 |
+
|
| 3 |
+
# DevDocs AI β Codebase RAG Assistant
|
| 4 |
+
|
| 5 |
+
A production-quality **Retrieval-Augmented Generation** system for querying codebases with natural language. Upload any ZIP archive, index it once, and ask questions about the code.
|
| 6 |
+

|
| 7 |
+
|
| 8 |
+
## Architecture
|
| 9 |
+
|
| 10 |
+
```
|
| 11 |
+
User Query
|
| 12 |
+
β
|
| 13 |
+
βΌ
|
| 14 |
+
[Query Rewriter] β optional rule-based or LLM rewrite
|
| 15 |
+
β
|
| 16 |
+
βΌ
|
| 17 |
+
[Retriever] β similarity search OR MMR (configurable)
|
| 18 |
+
β ChromaDB + HuggingFace all-MiniLM-L6-v2 embeddings
|
| 19 |
+
βΌ
|
| 20 |
+
[Retrieved Chunks]
|
| 21 |
+
β
|
| 22 |
+
ββββ [LLM Generator] β Answer (gpt-4.1-nano, 1 call)
|
| 23 |
+
β
|
| 24 |
+
ββββ [Evaluator]
|
| 25 |
+
βββ Retrieval Metrics (Recall@K, MRR, nDCG) β FREE
|
| 26 |
+
βββ LLM Judge (Accuracy, Completeness, Relevance, Groundedness) β 1 call
|
| 27 |
+
```
|
| 28 |
+
|
| 29 |
+
## Cost Model
|
| 30 |
+
|
| 31 |
+
| Operation | Cost |
|
| 32 |
+
|----------------------|------------------|
|
| 33 |
+
| Embedding (indexing) | **FREE** (local) |
|
| 34 |
+
| Embedding (query) | **FREE** (local) |
|
| 35 |
+
| Answer generation | ~$0.0001 / query |
|
| 36 |
+
| LLM judge evaluation | ~$0.0001 / query |
|
| 37 |
+
| Query rewriting (LLM)| ~$0.00005 / query|
|
| 38 |
+
|
| 39 |
+
> At $5 budget you can run ~25,000 queries with full evaluation enabled.
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
## Project Structure
|
| 43 |
+
|
| 44 |
+
```
|
| 45 |
+
devdocs-ai/
|
| 46 |
+
βββ app.py # Gradio UI (3 tabs)
|
| 47 |
+
βββ config.py # All configuration in one place
|
| 48 |
+
βββ requirements.txt
|
| 49 |
+
βββ .env.example
|
| 50 |
+
β
|
| 51 |
+
βββ ingestion/
|
| 52 |
+
β βββ __init__.py
|
| 53 |
+
β βββ loader.py # ZIP extraction + file reading
|
| 54 |
+
β βββ chunker.py # AST-aware Python chunking + generic splitter
|
| 55 |
+
β βββ indexer.py # HuggingFace embeddings + ChromaDB persistence
|
| 56 |
+
β
|
| 57 |
+
βββ retrieval/
|
| 58 |
+
β βββ __init__.py
|
| 59 |
+
β βββ retriever.py # Similarity + MMR search
|
| 60 |
+
β βββ query_rewriter.py # Rule-based + optional LLM rewrite
|
| 61 |
+
β
|
| 62 |
+
βββ llm/
|
| 63 |
+
β βββ __init__.py
|
| 64 |
+
β βββ generator.py # Grounded answer generation via litellm
|
| 65 |
+
β
|
| 66 |
+
βββ evaluation/
|
| 67 |
+
β βββ __init__.py
|
| 68 |
+
β βββ metrics.py # Recall@K, MRR, nDCG (free, keyword-based)
|
| 69 |
+
β βββ judge.py # LLM-as-judge (Accuracy/Completeness/Relevance/Groundedness)
|
| 70 |
+
β
|
| 71 |
+
βββ utils/
|
| 72 |
+
β βββ __init__.py
|
| 73 |
+
β βββ helpers.py # Logging, display formatters
|
| 74 |
+
β
|
| 75 |
+
βββ data/
|
| 76 |
+
βββ uploads/ # Extracted ZIP contents (auto-created)
|
| 77 |
+
βββ vector_db/ # ChromaDB persistent storage (auto-created)
|
| 78 |
+
```
|
| 79 |
+
|
| 80 |
+
## Quick Start
|
| 81 |
+
|
| 82 |
+
### 1. Clone / download the project
|
| 83 |
+
|
| 84 |
+
```bash
|
| 85 |
+
cd devdocs-ai
|
| 86 |
+
```
|
| 87 |
+
|
| 88 |
+
### 2. Create virtual environment
|
| 89 |
+
|
| 90 |
+
```bash
|
| 91 |
+
python -m venv venv
|
| 92 |
+
source venv/bin/activate # Linux/macOS
|
| 93 |
+
# venv\Scripts\activate # Windows
|
| 94 |
+
```
|
| 95 |
+
|
| 96 |
+
### 3. Install dependencies
|
| 97 |
+
|
| 98 |
+
```bash
|
| 99 |
+
pip install -r requirements.txt
|
| 100 |
+
```
|
| 101 |
+
|
| 102 |
+
> First run will download the `all-MiniLM-L6-v2` model (~90 MB) automatically.
|
| 103 |
+
|
| 104 |
+
### 4. Set your OpenAI API key
|
| 105 |
+
|
| 106 |
+
```bash
|
| 107 |
+
cp .env.example .env
|
| 108 |
+
# Edit .env and set OPENAI_API_KEY=sk-...
|
| 109 |
+
```
|
| 110 |
+
|
| 111 |
+
Or export directly:
|
| 112 |
+
|
| 113 |
+
```bash
|
| 114 |
+
export OPENAI_API_KEY="sk-your-key-here"
|
| 115 |
+
```
|
| 116 |
+
|
| 117 |
+
### 5. Launch the app
|
| 118 |
+
|
| 119 |
+
```bash
|
| 120 |
+
python app.py
|
| 121 |
+
```
|
| 122 |
+
|
| 123 |
+
Open **http://localhost:7860** in your browser.
|
| 124 |
+
|
| 125 |
+
---
|
| 126 |
+
|
| 127 |
+
## Usage Guide
|
| 128 |
+
|
| 129 |
+
### Tab 1 β Index Repository
|
| 130 |
+

|
| 131 |
+
1. Click **Upload ZIP file** and select your repository archive.
|
| 132 |
+
2. Click **π Index Repository**.
|
| 133 |
+
3. Wait for the status message β indexing is one-time per repository.
|
| 134 |
+
|
| 135 |
+
> Re-indexing a new ZIP clears the previous index automatically.
|
| 136 |
+
|
| 137 |
+
### Tab 2 β Ask Questions
|
| 138 |
+
|
| 139 |
+
1. Type a natural language question.
|
| 140 |
+
2. Configure retrieval options:
|
| 141 |
+
- **Top-K**: number of chunks to retrieve (default 5)
|
| 142 |
+
- **Use MMR**: diversity-aware retrieval (avoids redundant chunks)
|
| 143 |
+
- **Use query rewriting**: expands abbreviations before retrieval
|
| 144 |
+
- **Run evaluation**: computes all metrics (costs 1 extra LLM call)
|
| 145 |
+
3. Click **π Ask**.
|
| 146 |
+
4. View the **Answer**, **Retrieved Chunks**, and **Metrics Panel**.
|
| 147 |
+

|
| 148 |
+
|
| 149 |
+
|
| 150 |
+
### Tab 3 β Compare Modes
|
| 151 |
+
|
| 152 |
+
Run both **Similarity** and **MMR** retrieval side-by-side for the same question to compare answer quality and chunk diversity.
|
| 153 |
+

|
| 154 |
+
---
|
| 155 |
+
|
| 156 |
+
## Configuration Reference
|
| 157 |
+
|
| 158 |
+
All parameters are in `config.py`:
|
| 159 |
+
|
| 160 |
+
| Parameter | Default | Description |
|
| 161 |
+
|------------------------|-----------------------|------------------------------------------|
|
| 162 |
+
| `EMBEDDING_MODEL` | `all-MiniLM-L6-v2` | HuggingFace sentence-transformer model |
|
| 163 |
+
| `CHUNK_SIZE` | `400` tokens | Target chunk size |
|
| 164 |
+
| `CHUNK_OVERLAP` | `60` tokens | Overlap between consecutive chunks |
|
| 165 |
+
| `DEFAULT_TOP_K` | `5` | Chunks retrieved per query |
|
| 166 |
+
| `MMR_FETCH_K` | `20` | Candidate pool size for MMR |
|
| 167 |
+
| `MMR_LAMBDA_MULT` | `0.5` | MMR diversity/relevance balance (0β1) |
|
| 168 |
+
| `LLM_MODEL` | `openai/gpt-4.1-nano` | LLM for answer generation |
|
| 169 |
+
| `LLM_MAX_TOKENS` | `1024` | Max tokens in LLM response |
|
| 170 |
+
| `ALLOWED_EXTENSIONS` | `.py .js .ts .md ...` | File types included in indexing |
|
| 171 |
+
| `MAX_FILE_SIZE_MB` | `2` | Files larger than this are skipped |
|
| 172 |
+
|
| 173 |
+
---
|
| 174 |
+
|
| 175 |
+
## Evaluation Metrics Explained
|
| 176 |
+
|
| 177 |
+
### Retrieval Metrics (free, keyword-based proxy)
|
| 178 |
+
|
| 179 |
+
| Metric | Formula | Range |
|
| 180 |
+
|------------|--------------------------------------------------|-------|
|
| 181 |
+
| Recall@K | relevant retrieved / K | 0β1 |
|
| 182 |
+
| MRR | 1 / rank of first relevant doc | 0β1 |
|
| 183 |
+
| nDCG@K | DCG / IDCG using binary relevance | 0β1 |
|
| 184 |
+
|
| 185 |
+
> Relevance is determined by keyword overlap between query and chunk (β₯2 shared tokens).
|
| 186 |
+
|
| 187 |
+
### Answer Quality (LLM judge, 1 call)
|
| 188 |
+
|
| 189 |
+
| Dimension | Meaning | Scale |
|
| 190 |
+
|---------------|---------------------------------------------------|-------|
|
| 191 |
+
| Accuracy | Every claim is factually correct given context | 1β5 |
|
| 192 |
+
| Completeness | All parts of the question are addressed | 1β5 |
|
| 193 |
+
| Relevance | Answer is focused and on-topic | 1β5 |
|
| 194 |
+
| Groundedness | All claims are directly supported by context | 1β5 |
|
| 195 |
+
| Overall | Mean of the four scores | 1β5 |
|
| 196 |
+
|
| 197 |
---
|
| 198 |
+

|
| 199 |
+
## Supported File Types
|
| 200 |
+
|
| 201 |
+
`.py` `.js` `.ts` `.jsx` `.tsx` `.md` `.txt` `.java` `.go` `.rs` `.cpp` `.c` `.h`
|
| 202 |
+
|
|
|
|
|
|
|
|
|
|
| 203 |
---
|
| 204 |
|
| 205 |
+
## Chunking Strategy
|
| 206 |
+
|
| 207 |
+
| File Type | Strategy |
|
| 208 |
+
|---------------|-----------------------------------------------------------------|
|
| 209 |
+
| `.py` | AST-based: one chunk per top-level function/class |
|
| 210 |
+
| All others | Recursive character splitter (400-token chunks, 60-token overlap)|
|
| 211 |
+
|
| 212 |
+
Python files that fail AST parsing (e.g. syntax errors) fall back to the generic splitter automatically.
|
| 213 |
+
|
| 214 |
+
---
|
| 215 |
+
|
| 216 |
+
## Troubleshooting
|
| 217 |
+
|
| 218 |
+
**"Vector store is empty" error**
|
| 219 |
+
β Index a repository first via Tab 1.
|
| 220 |
+
|
| 221 |
+
**Slow first query**
|
| 222 |
+
β The embedding model is downloaded on first use (~90 MB). Subsequent runs are fast.
|
| 223 |
+
|
| 224 |
+
**"No API key" warnings**
|
| 225 |
+
β Set `OPENAI_API_KEY` in `.env` or as an environment variable.
|
| 226 |
+
|
| 227 |
+
**ChromaDB dimension mismatch error**
|
| 228 |
+
β Delete `data/vector_db/` and re-index. This happens if you switch embedding models mid-session.
|
| 229 |
+
|
| 230 |
+
```bash
|
| 231 |
+
rm -rf data/vector_db/
|
| 232 |
+
```
|
| 233 |
+
|
| 234 |
+
**Out of memory on large repos**
|
| 235 |
+
β Lower `MAX_FILE_SIZE_MB` in `config.py` or reduce `CHUNK_SIZE`.
|
app.py
ADDED
|
@@ -0,0 +1,643 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""app.py β Gradio UI for DevDocs AI: Codebase RAG Assistant.
|
| 2 |
+
|
| 3 |
+
A polished, product-like interface with a softer visual language,
|
| 4 |
+
modern typography, improved spacing, and clearer output cards.
|
| 5 |
+
|
| 6 |
+
Dashboard tabs:
|
| 7 |
+
1. Index Repository β upload ZIP, trigger ingestion pipeline.
|
| 8 |
+
2. Ask Questions β query the indexed codebase with configurable retrieval.
|
| 9 |
+
3. Compare Modes β side-by-side similarity vs MMR retrieval.
|
| 10 |
+
"""
|
| 11 |
+
|
| 12 |
+
import logging
|
| 13 |
+
import shutil
|
| 14 |
+
from pathlib import Path
|
| 15 |
+
from typing import Tuple
|
| 16 |
+
|
| 17 |
+
import gradio as gr
|
| 18 |
+
|
| 19 |
+
from config import UPLOAD_DIR, DEFAULT_TOP_K
|
| 20 |
+
from ingestion.loader import extract_zip, load_files
|
| 21 |
+
from ingestion.chunker import chunk_documents
|
| 22 |
+
from ingestion.indexer import index_documents, is_index_populated
|
| 23 |
+
from retrieval.retriever import retrieve
|
| 24 |
+
from retrieval.query_rewriter import rewrite_query
|
| 25 |
+
from llm.generator import generate_answer
|
| 26 |
+
from evaluation.metrics import compute_retrieval_metrics
|
| 27 |
+
from evaluation.judge import judge_answer
|
| 28 |
+
from utils.helpers import setup_logging, format_chunks_for_display, format_metrics_for_display
|
| 29 |
+
|
| 30 |
+
setup_logging(logging.INFO)
|
| 31 |
+
logger = logging.getLogger(__name__)
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 35 |
+
# Pipeline functions
|
| 36 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 37 |
+
|
| 38 |
+
def run_indexing(zip_file) -> str:
|
| 39 |
+
"""Gradio handler: extract ZIP β load files β chunk β embed β index."""
|
| 40 |
+
if zip_file is None:
|
| 41 |
+
return "β Please upload a ZIP file first."
|
| 42 |
+
|
| 43 |
+
try:
|
| 44 |
+
src = Path(zip_file.name)
|
| 45 |
+
dest = UPLOAD_DIR / src.name
|
| 46 |
+
shutil.copy2(src, dest)
|
| 47 |
+
|
| 48 |
+
gr.Info("π¦ Extracting ZIP archive...")
|
| 49 |
+
extract_dir = extract_zip(str(dest))
|
| 50 |
+
|
| 51 |
+
gr.Info("π Loading source files...")
|
| 52 |
+
raw_docs = load_files(extract_dir)
|
| 53 |
+
if not raw_docs:
|
| 54 |
+
return "β οΈ No supported source files found in the ZIP."
|
| 55 |
+
|
| 56 |
+
gr.Info(f"βοΈ Chunking {len(raw_docs)} files...")
|
| 57 |
+
chunks = chunk_documents(raw_docs)
|
| 58 |
+
|
| 59 |
+
gr.Info(f"π§ Embedding and indexing {len(chunks)} chunks...")
|
| 60 |
+
index_documents(chunks)
|
| 61 |
+
|
| 62 |
+
return (
|
| 63 |
+
f"β
Indexing complete!\n\n"
|
| 64 |
+
f"Files processed: {len(raw_docs)}\n"
|
| 65 |
+
f"Chunks indexed: {len(chunks)}\n"
|
| 66 |
+
f"Status: Ready to query"
|
| 67 |
+
)
|
| 68 |
+
|
| 69 |
+
except Exception as e:
|
| 70 |
+
logger.exception("Indexing failed")
|
| 71 |
+
return f"β Indexing failed: {e}"
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
def run_query(
|
| 75 |
+
query: str,
|
| 76 |
+
use_mmr: bool,
|
| 77 |
+
use_rewriting: bool,
|
| 78 |
+
top_k: int,
|
| 79 |
+
run_evaluation: bool,
|
| 80 |
+
) -> Tuple[str, str, str]:
|
| 81 |
+
"""Gradio handler: rewrite query β retrieve β generate answer β evaluate."""
|
| 82 |
+
if not query.strip():
|
| 83 |
+
return "β Please enter a question.", "", ""
|
| 84 |
+
|
| 85 |
+
if not is_index_populated():
|
| 86 |
+
return "β No index found. Please index a repository first.", "", ""
|
| 87 |
+
|
| 88 |
+
try:
|
| 89 |
+
effective_query = query
|
| 90 |
+
if use_rewriting:
|
| 91 |
+
gr.Info("π Rewriting query...")
|
| 92 |
+
effective_query = rewrite_query(query, use_llm=False)
|
| 93 |
+
|
| 94 |
+
search_type = "mmr" if use_mmr else "similarity"
|
| 95 |
+
gr.Info(f"π Retrieving with {search_type.upper()}...")
|
| 96 |
+
docs, scores = retrieve(effective_query, search_type=search_type, top_k=int(top_k))
|
| 97 |
+
|
| 98 |
+
context_display = format_chunks_for_display(docs, scores)
|
| 99 |
+
if effective_query != query:
|
| 100 |
+
context_display = f"π Rewritten query: \"{effective_query}\"\n\n" + context_display
|
| 101 |
+
|
| 102 |
+
gr.Info("π¬ Generating answer...")
|
| 103 |
+
answer, _source_files = generate_answer(query, docs)
|
| 104 |
+
|
| 105 |
+
metrics_display = ""
|
| 106 |
+
if run_evaluation:
|
| 107 |
+
gr.Info("π Running evaluation...")
|
| 108 |
+
retrieval_metrics = compute_retrieval_metrics(query, docs)
|
| 109 |
+
answer_scores = judge_answer(query, docs, answer)
|
| 110 |
+
metrics_display = format_metrics_for_display(retrieval_metrics, answer_scores)
|
| 111 |
+
else:
|
| 112 |
+
metrics_display = "βΉοΈ Enable 'Run evaluation' to see metrics."
|
| 113 |
+
|
| 114 |
+
return answer, context_display, metrics_display
|
| 115 |
+
|
| 116 |
+
except Exception as e:
|
| 117 |
+
logger.exception("Query failed")
|
| 118 |
+
return f"β Error: {e}", "", ""
|
| 119 |
+
|
| 120 |
+
|
| 121 |
+
def run_comparison(query: str, top_k: int) -> Tuple[str, str, str, str]:
|
| 122 |
+
"""Gradio handler: run both similarity and MMR side-by-side."""
|
| 123 |
+
if not query.strip():
|
| 124 |
+
return "β Please enter a question.", "", "", ""
|
| 125 |
+
|
| 126 |
+
if not is_index_populated():
|
| 127 |
+
msg = "β No index found."
|
| 128 |
+
return msg, "", msg, ""
|
| 129 |
+
|
| 130 |
+
try:
|
| 131 |
+
k = int(top_k)
|
| 132 |
+
|
| 133 |
+
sim_docs, sim_scores = retrieve(query, search_type="similarity", top_k=k)
|
| 134 |
+
mmr_docs, mmr_scores = retrieve(query, search_type="mmr", top_k=k)
|
| 135 |
+
|
| 136 |
+
sim_answer, _ = generate_answer(query, sim_docs)
|
| 137 |
+
mmr_answer, _ = generate_answer(query, mmr_docs)
|
| 138 |
+
|
| 139 |
+
sim_context = format_chunks_for_display(sim_docs, sim_scores)
|
| 140 |
+
mmr_context = format_chunks_for_display(mmr_docs, mmr_scores)
|
| 141 |
+
|
| 142 |
+
return sim_answer, sim_context, mmr_answer, mmr_context
|
| 143 |
+
|
| 144 |
+
except Exception as e:
|
| 145 |
+
logger.exception("Comparison failed")
|
| 146 |
+
err = f"β Error: {e}"
|
| 147 |
+
return err, "", err, ""
|
| 148 |
+
|
| 149 |
+
|
| 150 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 151 |
+
# Theme + Styling
|
| 152 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 153 |
+
|
| 154 |
+
THEME = gr.themes.Soft(
|
| 155 |
+
primary_hue="indigo",
|
| 156 |
+
secondary_hue="cyan",
|
| 157 |
+
neutral_hue="slate",
|
| 158 |
+
font=gr.themes.GoogleFont("Inter"),
|
| 159 |
+
)
|
| 160 |
+
|
| 161 |
+
CSS = """
|
| 162 |
+
:root {
|
| 163 |
+
--bg-0: #0b1020;
|
| 164 |
+
--bg-1: #11162a;
|
| 165 |
+
--bg-2: #151b31;
|
| 166 |
+
--card: rgba(17, 24, 39, 0.72);
|
| 167 |
+
--card-strong: rgba(15, 23, 42, 0.92);
|
| 168 |
+
--card-border: rgba(148, 163, 184, 0.14);
|
| 169 |
+
--text-main: #e5e7eb;
|
| 170 |
+
--text-soft: #94a3b8;
|
| 171 |
+
--accent: #8b5cf6;
|
| 172 |
+
--accent-2: #22c55e;
|
| 173 |
+
--accent-3: #38bdf8;
|
| 174 |
+
--danger: #f87171;
|
| 175 |
+
--shadow: 0 20px 60px rgba(0, 0, 0, 0.25);
|
| 176 |
+
}
|
| 177 |
+
|
| 178 |
+
html, body {
|
| 179 |
+
background:
|
| 180 |
+
radial-gradient(circle at top left, rgba(139,92,246,0.18), transparent 28%),
|
| 181 |
+
radial-gradient(circle at top right, rgba(56,189,248,0.14), transparent 22%),
|
| 182 |
+
linear-gradient(180deg, var(--bg-0), var(--bg-1) 45%, #0a0f1d 100%) !important;
|
| 183 |
+
color: var(--text-main) !important;
|
| 184 |
+
}
|
| 185 |
+
|
| 186 |
+
.gradio-container {
|
| 187 |
+
max-width: 1240px !important;
|
| 188 |
+
margin: 0 auto !important;
|
| 189 |
+
}
|
| 190 |
+
|
| 191 |
+
/* Main shell */
|
| 192 |
+
#app-shell {
|
| 193 |
+
border: 1px solid var(--card-border);
|
| 194 |
+
background: linear-gradient(180deg, rgba(17,24,39,0.84), rgba(15,23,42,0.74));
|
| 195 |
+
box-shadow: var(--shadow);
|
| 196 |
+
border-radius: 28px;
|
| 197 |
+
padding: 22px;
|
| 198 |
+
backdrop-filter: blur(18px);
|
| 199 |
+
}
|
| 200 |
+
|
| 201 |
+
/* Hero */
|
| 202 |
+
.hero-wrap {
|
| 203 |
+
display: grid;
|
| 204 |
+
grid-template-columns: 1.4fr 0.8fr;
|
| 205 |
+
gap: 18px;
|
| 206 |
+
align-items: stretch;
|
| 207 |
+
margin-bottom: 18px;
|
| 208 |
+
}
|
| 209 |
+
.hero-card, .mini-card, .section-card {
|
| 210 |
+
background: var(--card);
|
| 211 |
+
border: 1px solid var(--card-border);
|
| 212 |
+
border-radius: 24px;
|
| 213 |
+
box-shadow: 0 12px 30px rgba(0, 0, 0, 0.16);
|
| 214 |
+
backdrop-filter: blur(14px);
|
| 215 |
+
}
|
| 216 |
+
.hero-card {
|
| 217 |
+
padding: 24px 24px 22px;
|
| 218 |
+
}
|
| 219 |
+
.hero-kicker {
|
| 220 |
+
display: inline-flex;
|
| 221 |
+
align-items: center;
|
| 222 |
+
gap: 8px;
|
| 223 |
+
padding: 8px 12px;
|
| 224 |
+
border-radius: 999px;
|
| 225 |
+
background: rgba(139,92,246,0.14);
|
| 226 |
+
color: #d8b4fe;
|
| 227 |
+
font-size: 0.82rem;
|
| 228 |
+
font-weight: 600;
|
| 229 |
+
letter-spacing: 0.02em;
|
| 230 |
+
margin-bottom: 14px;
|
| 231 |
+
}
|
| 232 |
+
.hero-title {
|
| 233 |
+
margin: 0;
|
| 234 |
+
font-size: clamp(2rem, 3vw, 3.1rem);
|
| 235 |
+
line-height: 1.05;
|
| 236 |
+
letter-spacing: -0.03em;
|
| 237 |
+
color: #f8fafc;
|
| 238 |
+
}
|
| 239 |
+
.hero-subtitle {
|
| 240 |
+
margin-top: 12px;
|
| 241 |
+
color: var(--text-soft);
|
| 242 |
+
font-size: 1rem;
|
| 243 |
+
line-height: 1.65;
|
| 244 |
+
max-width: 68ch;
|
| 245 |
+
}
|
| 246 |
+
.hero-badges {
|
| 247 |
+
display: flex;
|
| 248 |
+
flex-wrap: wrap;
|
| 249 |
+
gap: 10px;
|
| 250 |
+
margin-top: 18px;
|
| 251 |
+
}
|
| 252 |
+
.badge-pill {
|
| 253 |
+
display: inline-flex;
|
| 254 |
+
align-items: center;
|
| 255 |
+
gap: 8px;
|
| 256 |
+
padding: 9px 12px;
|
| 257 |
+
border-radius: 999px;
|
| 258 |
+
font-size: 0.86rem;
|
| 259 |
+
color: #e2e8f0;
|
| 260 |
+
background: rgba(15,23,42,0.55);
|
| 261 |
+
border: 1px solid rgba(148,163,184,0.16);
|
| 262 |
+
}
|
| 263 |
+
.mini-card {
|
| 264 |
+
padding: 18px;
|
| 265 |
+
display: flex;
|
| 266 |
+
flex-direction: column;
|
| 267 |
+
justify-content: space-between;
|
| 268 |
+
}
|
| 269 |
+
.mini-card h4 {
|
| 270 |
+
margin: 0 0 8px;
|
| 271 |
+
color: #f8fafc;
|
| 272 |
+
font-size: 1rem;
|
| 273 |
+
}
|
| 274 |
+
.mini-card p {
|
| 275 |
+
margin: 0;
|
| 276 |
+
color: var(--text-soft);
|
| 277 |
+
line-height: 1.6;
|
| 278 |
+
font-size: 0.95rem;
|
| 279 |
+
}
|
| 280 |
+
.mini-grid {
|
| 281 |
+
display: grid;
|
| 282 |
+
grid-template-columns: 1fr 1fr;
|
| 283 |
+
gap: 10px;
|
| 284 |
+
margin-top: 14px;
|
| 285 |
+
}
|
| 286 |
+
.stat {
|
| 287 |
+
border-radius: 18px;
|
| 288 |
+
padding: 14px;
|
| 289 |
+
background: rgba(15,23,42,0.72);
|
| 290 |
+
border: 1px solid rgba(148,163,184,0.12);
|
| 291 |
+
}
|
| 292 |
+
.stat .label {
|
| 293 |
+
color: var(--text-soft);
|
| 294 |
+
font-size: 0.78rem;
|
| 295 |
+
margin-bottom: 6px;
|
| 296 |
+
}
|
| 297 |
+
.stat .value {
|
| 298 |
+
color: #f8fafc;
|
| 299 |
+
font-size: 1rem;
|
| 300 |
+
font-weight: 700;
|
| 301 |
+
}
|
| 302 |
+
|
| 303 |
+
/* Tabs */
|
| 304 |
+
.tab-nav {
|
| 305 |
+
margin-top: 8px !important;
|
| 306 |
+
}
|
| 307 |
+
.gradio-tabs .tab-nav button {
|
| 308 |
+
border-radius: 999px !important;
|
| 309 |
+
border: 1px solid rgba(148,163,184,0.14) !important;
|
| 310 |
+
background: rgba(15,23,42,0.55) !important;
|
| 311 |
+
color: #cbd5e1 !important;
|
| 312 |
+
padding: 10px 14px !important;
|
| 313 |
+
transition: all 0.2s ease !important;
|
| 314 |
+
}
|
| 315 |
+
.gradio-tabs .tab-nav button.selected {
|
| 316 |
+
background: linear-gradient(135deg, rgba(139,92,246,0.95), rgba(59,130,246,0.85)) !important;
|
| 317 |
+
color: white !important;
|
| 318 |
+
box-shadow: 0 12px 24px rgba(91, 33, 182, 0.25) !important;
|
| 319 |
+
}
|
| 320 |
+
|
| 321 |
+
/* Sections and widgets */
|
| 322 |
+
.section-card {
|
| 323 |
+
padding: 18px;
|
| 324 |
+
margin-bottom: 14px;
|
| 325 |
+
}
|
| 326 |
+
.section-title {
|
| 327 |
+
margin: 0 0 6px;
|
| 328 |
+
font-size: 1.05rem;
|
| 329 |
+
color: #f8fafc;
|
| 330 |
+
letter-spacing: -0.01em;
|
| 331 |
+
}
|
| 332 |
+
.section-desc {
|
| 333 |
+
margin: 0;
|
| 334 |
+
color: var(--text-soft);
|
| 335 |
+
font-size: 0.95rem;
|
| 336 |
+
line-height: 1.6;
|
| 337 |
+
}
|
| 338 |
+
|
| 339 |
+
textarea, input, .wrap, .prose, .markdown, .svelte-textbox, .svelte-slider, .svelte-checkbox {
|
| 340 |
+
font-family: Inter, ui-sans-serif, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif !important;
|
| 341 |
+
}
|
| 342 |
+
|
| 343 |
+
textarea, .gr-textbox textarea, .gr-textbox input, .gr-file, .gr-number input {
|
| 344 |
+
background: rgba(15,23,42,0.72) !important;
|
| 345 |
+
color: var(--text-main) !important;
|
| 346 |
+
border: 1px solid rgba(148,163,184,0.14) !important;
|
| 347 |
+
border-radius: 18px !important;
|
| 348 |
+
}
|
| 349 |
+
|
| 350 |
+
.gr-textbox label, .gr-slider label, .gr-checkbox label, .gr-file label {
|
| 351 |
+
color: #e2e8f0 !important;
|
| 352 |
+
font-weight: 600 !important;
|
| 353 |
+
}
|
| 354 |
+
|
| 355 |
+
.gr-button {
|
| 356 |
+
border-radius: 16px !important;
|
| 357 |
+
border: 1px solid rgba(255,255,255,0.08) !important;
|
| 358 |
+
padding: 12px 16px !important;
|
| 359 |
+
font-weight: 700 !important;
|
| 360 |
+
letter-spacing: 0.01em;
|
| 361 |
+
}
|
| 362 |
+
.gr-button.primary {
|
| 363 |
+
background: linear-gradient(135deg, #8b5cf6, #3b82f6) !important;
|
| 364 |
+
color: white !important;
|
| 365 |
+
box-shadow: 0 16px 30px rgba(59,130,246,0.22) !important;
|
| 366 |
+
}
|
| 367 |
+
.gr-button:hover {
|
| 368 |
+
transform: translateY(-1px);
|
| 369 |
+
}
|
| 370 |
+
|
| 371 |
+
/* Outputs */
|
| 372 |
+
.answer-box, .metric-box, .chunk-box, .output-card {
|
| 373 |
+
border-radius: 22px !important;
|
| 374 |
+
border: 1px solid rgba(148,163,184,0.14) !important;
|
| 375 |
+
background: rgba(2, 6, 23, 0.48) !important;
|
| 376 |
+
box-shadow: 0 12px 30px rgba(0,0,0,0.14);
|
| 377 |
+
}
|
| 378 |
+
.answer-box {
|
| 379 |
+
padding: 16px !important;
|
| 380 |
+
line-height: 1.75 !important;
|
| 381 |
+
}
|
| 382 |
+
.answer-box h1, .answer-box h2, .answer-box h3, .answer-box h4 {
|
| 383 |
+
color: #f8fafc !important;
|
| 384 |
+
letter-spacing: -0.02em;
|
| 385 |
+
}
|
| 386 |
+
.answer-box p, .answer-box li {
|
| 387 |
+
color: #e2e8f0 !important;
|
| 388 |
+
}
|
| 389 |
+
.answer-box code, .chunk-box code, .metric-box code {
|
| 390 |
+
background: rgba(15,23,42,0.9) !important;
|
| 391 |
+
color: #e2e8f0 !important;
|
| 392 |
+
border-radius: 8px !important;
|
| 393 |
+
padding: 0.12rem 0.35rem !important;
|
| 394 |
+
}
|
| 395 |
+
.chunk-box, .metric-box {
|
| 396 |
+
padding: 14px !important;
|
| 397 |
+
white-space: pre-wrap !important;
|
| 398 |
+
color: #cbd5e1 !important;
|
| 399 |
+
line-height: 1.7 !important;
|
| 400 |
+
}
|
| 401 |
+
|
| 402 |
+
/* Make the built-in markdown areas feel cleaner */
|
| 403 |
+
.prose, .markdown {
|
| 404 |
+
color: #e2e8f0 !important;
|
| 405 |
+
}
|
| 406 |
+
.prose h1, .prose h2, .prose h3, .markdown h1, .markdown h2, .markdown h3 {
|
| 407 |
+
color: #f8fafc !important;
|
| 408 |
+
}
|
| 409 |
+
|
| 410 |
+
footer { display: none !important; }
|
| 411 |
+
|
| 412 |
+
/* Responsive */
|
| 413 |
+
@media (max-width: 1000px) {
|
| 414 |
+
.hero-wrap { grid-template-columns: 1fr; }
|
| 415 |
+
}
|
| 416 |
+
"""
|
| 417 |
+
|
| 418 |
+
|
| 419 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 420 |
+
# UI helpers
|
| 421 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 422 |
+
|
| 423 |
+
def hero_panel() -> str:
|
| 424 |
+
return """
|
| 425 |
+
<div class="hero-wrap">
|
| 426 |
+
<div class="hero-card">
|
| 427 |
+
<div class="hero-kicker">β¨ DevDocs AI Β· Codebase RAG Assistant</div>
|
| 428 |
+
<h1 class="hero-title">A calm, premium workspace for exploring your codebase.</h1>
|
| 429 |
+
<p class="hero-subtitle">
|
| 430 |
+
Upload a repository ZIP, index it once, and ask natural-language questions with a cleaner
|
| 431 |
+
reading experience. The interface keeps the workflow fast while feeling intentionally designed,
|
| 432 |
+
not template-generated.
|
| 433 |
+
</p>
|
| 434 |
+
<div class="hero-badges">
|
| 435 |
+
<span class="badge-pill">β‘ Fast indexing flow</span>
|
| 436 |
+
<span class="badge-pill">π§ Query rewriting</span>
|
| 437 |
+
<span class="badge-pill">π Similarity + MMR</span>
|
| 438 |
+
<span class="badge-pill">π Built-in evaluation</span>
|
| 439 |
+
</div>
|
| 440 |
+
</div>
|
| 441 |
+
<div class="mini-card">
|
| 442 |
+
<div>
|
| 443 |
+
<h4>What this interface emphasizes</h4>
|
| 444 |
+
<p>
|
| 445 |
+
Clear hierarchy, softer contrast, rounded surfaces, better spacing, and output cards that are easier to scan.
|
| 446 |
+
</p>
|
| 447 |
+
</div>
|
| 448 |
+
<div class="mini-grid">
|
| 449 |
+
<div class="stat">
|
| 450 |
+
<div class="label">Primary feel</div>
|
| 451 |
+
<div class="value">Modern glass UI</div>
|
| 452 |
+
</div>
|
| 453 |
+
<div class="stat">
|
| 454 |
+
<div class="label">Typography</div>
|
| 455 |
+
<div class="value">Inter</div>
|
| 456 |
+
</div>
|
| 457 |
+
<div class="stat">
|
| 458 |
+
<div class="label">Tone</div>
|
| 459 |
+
<div class="value">Soft + premium</div>
|
| 460 |
+
</div>
|
| 461 |
+
<div class="stat">
|
| 462 |
+
<div class="label">Outputs</div>
|
| 463 |
+
<div class="value">Readable cards</div>
|
| 464 |
+
</div>
|
| 465 |
+
</div>
|
| 466 |
+
</div>
|
| 467 |
+
</div>
|
| 468 |
+
"""
|
| 469 |
+
|
| 470 |
+
|
| 471 |
+
def section_block(title: str, desc: str) -> str:
|
| 472 |
+
return f"""
|
| 473 |
+
<div class="section-card">
|
| 474 |
+
<div class="section-title">{title}</div>
|
| 475 |
+
<p class="section-desc">{desc}</p>
|
| 476 |
+
</div>
|
| 477 |
+
"""
|
| 478 |
+
|
| 479 |
+
|
| 480 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 481 |
+
# Build UI
|
| 482 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββοΏ½οΏ½ββββββββββββββββ
|
| 483 |
+
|
| 484 |
+
def build_ui() -> gr.Blocks:
|
| 485 |
+
with gr.Blocks(theme=THEME, css=CSS, title="DevDocs AI") as demo:
|
| 486 |
+
with gr.Column(elem_id="app-shell"):
|
| 487 |
+
gr.HTML(hero_panel())
|
| 488 |
+
|
| 489 |
+
with gr.Tabs(elem_classes=["tab-nav"]):
|
| 490 |
+
# ββ Tab 1: Index ββββββββββββββββββββββββββββββββββββββββββββββ
|
| 491 |
+
with gr.Tab("π¦ Index Repository"):
|
| 492 |
+
gr.HTML(section_block(
|
| 493 |
+
"Step 1 β Add your codebase",
|
| 494 |
+
"Upload a ZIP file, extract it, chunk the files, and build the local vector index."
|
| 495 |
+
))
|
| 496 |
+
with gr.Row():
|
| 497 |
+
with gr.Column(scale=2):
|
| 498 |
+
zip_input = gr.File(
|
| 499 |
+
label="Upload ZIP file",
|
| 500 |
+
file_types=[".zip"],
|
| 501 |
+
type="filepath",
|
| 502 |
+
)
|
| 503 |
+
index_btn = gr.Button("π Index Repository", variant="primary", size="lg")
|
| 504 |
+
with gr.Column(scale=3):
|
| 505 |
+
index_status = gr.Textbox(
|
| 506 |
+
label="Indexing Status",
|
| 507 |
+
lines=9,
|
| 508 |
+
interactive=False,
|
| 509 |
+
placeholder="Status will appear here after indexing...",
|
| 510 |
+
)
|
| 511 |
+
|
| 512 |
+
index_btn.click(
|
| 513 |
+
fn=run_indexing,
|
| 514 |
+
inputs=[zip_input],
|
| 515 |
+
outputs=[index_status],
|
| 516 |
+
)
|
| 517 |
+
|
| 518 |
+
# ββ Tab 2: Query ββββββββββββββββββββββββββββββββββββββββββββββ
|
| 519 |
+
with gr.Tab("π¬ Ask Questions"):
|
| 520 |
+
gr.HTML(section_block(
|
| 521 |
+
"Step 2 β Ask about the code",
|
| 522 |
+
"Use retrieval settings to control how the assistant searches the indexed repository."
|
| 523 |
+
))
|
| 524 |
+
with gr.Row():
|
| 525 |
+
with gr.Column(scale=3):
|
| 526 |
+
query_input = gr.Textbox(
|
| 527 |
+
label="Your Question",
|
| 528 |
+
placeholder="e.g. How does the authentication flow work?",
|
| 529 |
+
lines=2,
|
| 530 |
+
)
|
| 531 |
+
with gr.Column(scale=1):
|
| 532 |
+
top_k_slider = gr.Slider(
|
| 533 |
+
minimum=1,
|
| 534 |
+
maximum=15,
|
| 535 |
+
value=DEFAULT_TOP_K,
|
| 536 |
+
step=1,
|
| 537 |
+
label="Top-K chunks",
|
| 538 |
+
)
|
| 539 |
+
|
| 540 |
+
with gr.Row():
|
| 541 |
+
use_mmr_toggle = gr.Checkbox(label="Use MMR retrieval", value=False)
|
| 542 |
+
use_rewrite_toggle = gr.Checkbox(label="Use query rewriting", value=False)
|
| 543 |
+
run_eval_toggle = gr.Checkbox(label="Run evaluation (costs 1 LLM call)", value=True)
|
| 544 |
+
query_btn = gr.Button("π Ask", variant="primary")
|
| 545 |
+
|
| 546 |
+
with gr.Row():
|
| 547 |
+
with gr.Column(scale=2):
|
| 548 |
+
gr.HTML('<div class="section-title">Answer</div>')
|
| 549 |
+
answer_output = gr.Markdown(elem_classes=["answer-box"])
|
| 550 |
+
with gr.Column(scale=1):
|
| 551 |
+
metrics_output = gr.Textbox(
|
| 552 |
+
label="π Evaluation Metrics",
|
| 553 |
+
lines=18,
|
| 554 |
+
interactive=False,
|
| 555 |
+
elem_classes=["metric-box"],
|
| 556 |
+
)
|
| 557 |
+
|
| 558 |
+
gr.HTML('<div class="section-title">Retrieved Context</div>')
|
| 559 |
+
context_output = gr.Textbox(
|
| 560 |
+
label="",
|
| 561 |
+
lines=15,
|
| 562 |
+
interactive=False,
|
| 563 |
+
elem_classes=["chunk-box"],
|
| 564 |
+
)
|
| 565 |
+
|
| 566 |
+
query_btn.click(
|
| 567 |
+
fn=run_query,
|
| 568 |
+
inputs=[query_input, use_mmr_toggle, use_rewrite_toggle, top_k_slider, run_eval_toggle],
|
| 569 |
+
outputs=[answer_output, context_output, metrics_output],
|
| 570 |
+
)
|
| 571 |
+
|
| 572 |
+
# ββ Tab 3: Compare ββββββββββββββββββββββββββββββββββββββββββββ
|
| 573 |
+
with gr.Tab("βοΈ Compare: Similarity vs MMR"):
|
| 574 |
+
gr.HTML(section_block(
|
| 575 |
+
"Step 3 β Compare retrieval styles",
|
| 576 |
+
"Run similarity and MMR side-by-side to inspect how the context and answer change."
|
| 577 |
+
))
|
| 578 |
+
with gr.Row():
|
| 579 |
+
cmp_query = gr.Textbox(
|
| 580 |
+
label="Question",
|
| 581 |
+
placeholder="e.g. Where is database initialisation handled?",
|
| 582 |
+
lines=2,
|
| 583 |
+
scale=4,
|
| 584 |
+
)
|
| 585 |
+
cmp_top_k = gr.Slider(
|
| 586 |
+
minimum=1,
|
| 587 |
+
maximum=10,
|
| 588 |
+
value=4,
|
| 589 |
+
step=1,
|
| 590 |
+
label="Top-K",
|
| 591 |
+
scale=1,
|
| 592 |
+
)
|
| 593 |
+
cmp_btn = gr.Button("βοΈ Compare", variant="primary")
|
| 594 |
+
|
| 595 |
+
with gr.Row():
|
| 596 |
+
with gr.Column():
|
| 597 |
+
gr.HTML('<div class="section-title">Similarity Search</div>')
|
| 598 |
+
sim_answer_out = gr.Markdown(elem_classes=["answer-box"])
|
| 599 |
+
sim_context_out = gr.Textbox(
|
| 600 |
+
lines=10,
|
| 601 |
+
interactive=False,
|
| 602 |
+
label="Chunks",
|
| 603 |
+
elem_classes=["chunk-box"],
|
| 604 |
+
)
|
| 605 |
+
with gr.Column():
|
| 606 |
+
gr.HTML('<div class="section-title">MMR Search</div>')
|
| 607 |
+
mmr_answer_out = gr.Markdown(elem_classes=["answer-box"])
|
| 608 |
+
mmr_context_out = gr.Textbox(
|
| 609 |
+
lines=10,
|
| 610 |
+
interactive=False,
|
| 611 |
+
label="Chunks",
|
| 612 |
+
elem_classes=["chunk-box"],
|
| 613 |
+
)
|
| 614 |
+
|
| 615 |
+
cmp_btn.click(
|
| 616 |
+
fn=run_comparison,
|
| 617 |
+
inputs=[cmp_query, cmp_top_k],
|
| 618 |
+
outputs=[sim_answer_out, sim_context_out, mmr_answer_out, mmr_context_out],
|
| 619 |
+
)
|
| 620 |
+
|
| 621 |
+
gr.Markdown(
|
| 622 |
+
"""
|
| 623 |
+
<div style="margin-top: 18px; padding: 14px 6px 0; color: #94a3b8; font-size: 0.9rem; line-height: 1.7;">
|
| 624 |
+
<strong style="color:#e2e8f0;">DevDocs AI</strong> Β· Embeddings: <code>all-MiniLM-L6-v2</code> Β·
|
| 625 |
+
LLM: <code>gpt-4.1-nano</code> Β· Vector DB: <code>ChromaDB</code>
|
| 626 |
+
</div>
|
| 627 |
+
"""
|
| 628 |
+
)
|
| 629 |
+
|
| 630 |
+
return demo
|
| 631 |
+
|
| 632 |
+
|
| 633 |
+
# if __name__ == "__main__":
|
| 634 |
+
# ui = build_ui()
|
| 635 |
+
# ui.launch(
|
| 636 |
+
# server_name="127.0.0.1",
|
| 637 |
+
# server_port=7860,
|
| 638 |
+
# share=False,
|
| 639 |
+
# show_error=True,
|
| 640 |
+
# )
|
| 641 |
+
if __name__ == "__main__":
|
| 642 |
+
ui = build_ui()
|
| 643 |
+
ui.launch()
|
config.py
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
config.py β Centralised configuration for DevDocs AI.
|
| 3 |
+
All tuneable parameters live here so the rest of the codebase imports from one place.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import os
|
| 7 |
+
from pathlib import Path
|
| 8 |
+
from dotenv import load_dotenv
|
| 9 |
+
# βββ Paths ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 10 |
+
BASE_DIR = Path(__file__).parent
|
| 11 |
+
DATA_DIR = BASE_DIR / "data"
|
| 12 |
+
VECTOR_DB_DIR = DATA_DIR / "vector_db"
|
| 13 |
+
UPLOAD_DIR = DATA_DIR / "uploads"
|
| 14 |
+
|
| 15 |
+
DATA_DIR.mkdir(parents=True, exist_ok=True)
|
| 16 |
+
VECTOR_DB_DIR.mkdir(parents=True, exist_ok=True)
|
| 17 |
+
UPLOAD_DIR.mkdir(parents=True, exist_ok=True)
|
| 18 |
+
|
| 19 |
+
# βββ Ingestion ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 20 |
+
ALLOWED_EXTENSIONS = {
|
| 21 |
+
# Python
|
| 22 |
+
".py",
|
| 23 |
+
# JavaScript / TypeScript
|
| 24 |
+
".js", ".ts", ".jsx", ".tsx", ".mjs", ".cjs",
|
| 25 |
+
# PHP
|
| 26 |
+
".php", ".php3", ".php4", ".php5", ".phtml",
|
| 27 |
+
# Java / Kotlin
|
| 28 |
+
".java", ".kt", ".kts",
|
| 29 |
+
# C / C++
|
| 30 |
+
".c", ".cpp", ".h", ".hpp", ".cc",
|
| 31 |
+
# Systems
|
| 32 |
+
".go", ".rs",
|
| 33 |
+
# Ruby
|
| 34 |
+
".rb", ".rake",
|
| 35 |
+
# C# / .NET
|
| 36 |
+
".cs",
|
| 37 |
+
# Shell
|
| 38 |
+
".sh", ".bash", ".zsh",
|
| 39 |
+
# Docs / Config
|
| 40 |
+
".md", ".txt", ".yaml", ".yml", ".toml", ".json",
|
| 41 |
+
# HTML / CSS (if you want frontend code)
|
| 42 |
+
".html", ".css", ".scss",
|
| 43 |
+
# SQL
|
| 44 |
+
".sql",
|
| 45 |
+
}
|
| 46 |
+
MAX_FILE_SIZE_MB = 2 # skip files larger than this
|
| 47 |
+
|
| 48 |
+
# βββ Chunking βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 49 |
+
CHUNK_SIZE = 400 # tokens (approx characters / 4)
|
| 50 |
+
CHUNK_OVERLAP = 60 # token overlap between chunks
|
| 51 |
+
CHUNK_SIZE_CHARS = CHUNK_SIZE * 4 # character approximation
|
| 52 |
+
CHUNK_OVERLAP_CHARS = CHUNK_OVERLAP * 4
|
| 53 |
+
|
| 54 |
+
# βββ Embeddings βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 55 |
+
EMBEDDING_MODEL = "all-MiniLM-L6-v2"
|
| 56 |
+
EMBEDDING_DEVICE = "cpu"
|
| 57 |
+
|
| 58 |
+
# βββ Chroma βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 59 |
+
CHROMA_COLLECTION_NAME = "devdocs"
|
| 60 |
+
|
| 61 |
+
# βββ Retrieval ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 62 |
+
DEFAULT_TOP_K = 5
|
| 63 |
+
DEFAULT_SEARCH_TYPE = "similarity" # "similarity" | "mmr"
|
| 64 |
+
MMR_FETCH_K = 20 # candidate pool for MMR
|
| 65 |
+
MMR_LAMBDA_MULT = 0.5 # diversity vs relevance balance
|
| 66 |
+
|
| 67 |
+
# βββ LLM ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 68 |
+
LLM_MODEL = "openai/gpt-4.1-nano" # via litellm
|
| 69 |
+
LLM_MAX_TOKENS = 1024
|
| 70 |
+
LLM_TEMPERATURE = 0.1
|
| 71 |
+
OPENAI_API_KEY = load_dotenv(dotenv_path=Path(__file__).parent / ".env")
|
| 72 |
+
|
| 73 |
+
# βββ Evaluation βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 74 |
+
JUDGE_MODEL = "openai/gpt-4.1-nano"
|
| 75 |
+
EVAL_TOP_K = 5
|
dev.zip
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c43c3856f7b4b9f495e20dab4ab8ecb6d0aba1195fece864b364ecf8befa0b1c
|
| 3 |
+
size 1629940
|
evaluation/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
# metrics package
|
evaluation/judge.py
ADDED
|
@@ -0,0 +1,126 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
judge.py β LLM-as-a-judge answer quality evaluation.
|
| 3 |
+
|
| 4 |
+
Scores the generated answer on four dimensions (each 1β5):
|
| 5 |
+
- Accuracy : Is the answer factually correct given the context?
|
| 6 |
+
- Completeness : Does it fully address the question?
|
| 7 |
+
- Relevance : Is the answer focused and on-topic?
|
| 8 |
+
- Groundedness : Is every claim supported by the retrieved context?
|
| 9 |
+
|
| 10 |
+
Uses a single structured LLM call returning JSON to minimise cost.
|
| 11 |
+
"""
|
| 12 |
+
|
| 13 |
+
import json
|
| 14 |
+
import logging
|
| 15 |
+
from typing import List
|
| 16 |
+
|
| 17 |
+
import litellm
|
| 18 |
+
from pydantic import BaseModel, Field
|
| 19 |
+
|
| 20 |
+
from config import JUDGE_MODEL, OPENAI_API_KEY
|
| 21 |
+
|
| 22 |
+
logger = logging.getLogger(__name__)
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
# βββ Pydantic output model ββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 26 |
+
|
| 27 |
+
class AnswerQualityScores(BaseModel):
|
| 28 |
+
"""Structured LLM-judge evaluation scores."""
|
| 29 |
+
accuracy: int = Field(..., ge=1, le=5, description="Factual accuracy (1β5)")
|
| 30 |
+
completeness: int = Field(..., ge=1, le=5, description="How fully the question is answered (1β5)")
|
| 31 |
+
relevance: int = Field(..., ge=1, le=5, description="Relevance to the question (1β5)")
|
| 32 |
+
groundedness: int = Field(..., ge=1, le=5, description="Claims backed by retrieved context (1β5)")
|
| 33 |
+
overall: float = Field(..., description="Mean of the four scores")
|
| 34 |
+
reasoning: str = Field(..., description="One-sentence justification from the judge")
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
_JUDGE_SYSTEM = """You are a strict, impartial evaluator of AI-generated answers about codebases.
|
| 38 |
+
|
| 39 |
+
Given:
|
| 40 |
+
- A user question
|
| 41 |
+
- Retrieved code context
|
| 42 |
+
- A generated answer
|
| 43 |
+
|
| 44 |
+
Score the answer on FOUR criteria, each from 1 to 5:
|
| 45 |
+
accuracy : Is every claim factually correct based on the context?
|
| 46 |
+
completeness : Does the answer fully address all parts of the question?
|
| 47 |
+
relevance : Is the answer focused on the question without padding?
|
| 48 |
+
groundedness : Are all claims directly supported by the retrieved context?
|
| 49 |
+
|
| 50 |
+
Respond ONLY with valid JSON matching exactly this schema (no extra keys):
|
| 51 |
+
{
|
| 52 |
+
"accuracy": <int 1-5>,
|
| 53 |
+
"completeness": <int 1-5>,
|
| 54 |
+
"relevance": <int 1-5>,
|
| 55 |
+
"groundedness": <int 1-5>,
|
| 56 |
+
"reasoning": "<one sentence justification>"
|
| 57 |
+
}"""
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
def judge_answer(
|
| 61 |
+
query: str,
|
| 62 |
+
context_docs: List,
|
| 63 |
+
answer: str,
|
| 64 |
+
) -> AnswerQualityScores:
|
| 65 |
+
"""
|
| 66 |
+
Evaluate an LLM-generated answer using an LLM judge.
|
| 67 |
+
|
| 68 |
+
This consumes 1 LLM call. Results are returned as a Pydantic model.
|
| 69 |
+
|
| 70 |
+
Args:
|
| 71 |
+
query: The user's original question.
|
| 72 |
+
context_docs: LangChain Documents used as context.
|
| 73 |
+
answer: The generated answer to evaluate.
|
| 74 |
+
|
| 75 |
+
Returns:
|
| 76 |
+
AnswerQualityScores with per-dimension scores and overall mean.
|
| 77 |
+
"""
|
| 78 |
+
if not OPENAI_API_KEY:
|
| 79 |
+
# Return neutral scores when no API key is configured.
|
| 80 |
+
return AnswerQualityScores(
|
| 81 |
+
accuracy=0, completeness=0, relevance=0, groundedness=0,
|
| 82 |
+
overall=0.0, reasoning="No API key β evaluation skipped."
|
| 83 |
+
)
|
| 84 |
+
|
| 85 |
+
context_text = "\n\n".join(
|
| 86 |
+
f"[{i+1}] {d.page_content[:400]}" for i, d in enumerate(context_docs)
|
| 87 |
+
)
|
| 88 |
+
user_msg = (
|
| 89 |
+
f"Question: {query}\n\n"
|
| 90 |
+
f"Retrieved Context:\n{context_text}\n\n"
|
| 91 |
+
f"Generated Answer:\n{answer}"
|
| 92 |
+
)
|
| 93 |
+
|
| 94 |
+
try:
|
| 95 |
+
response = litellm.completion(
|
| 96 |
+
model=JUDGE_MODEL,
|
| 97 |
+
messages=[
|
| 98 |
+
{"role": "system", "content": _JUDGE_SYSTEM},
|
| 99 |
+
{"role": "user", "content": user_msg},
|
| 100 |
+
],
|
| 101 |
+
max_tokens=200,
|
| 102 |
+
temperature=0.0
|
| 103 |
+
|
| 104 |
+
)
|
| 105 |
+
raw = response.choices[0].message.content.strip()
|
| 106 |
+
|
| 107 |
+
# Strip potential markdown fences
|
| 108 |
+
raw = raw.strip().lstrip("```json").lstrip("```").rstrip("```").strip()
|
| 109 |
+
data = json.loads(raw)
|
| 110 |
+
|
| 111 |
+
scores_sum = data["accuracy"] + data["completeness"] + data["relevance"] + data["groundedness"]
|
| 112 |
+
return AnswerQualityScores(
|
| 113 |
+
accuracy=data["accuracy"],
|
| 114 |
+
completeness=data["completeness"],
|
| 115 |
+
relevance=data["relevance"],
|
| 116 |
+
groundedness=data["groundedness"],
|
| 117 |
+
overall=round(scores_sum / 4, 2),
|
| 118 |
+
reasoning=data.get("reasoning", ""),
|
| 119 |
+
)
|
| 120 |
+
|
| 121 |
+
except Exception as e:
|
| 122 |
+
logger.error(f"Judge evaluation failed: {e}")
|
| 123 |
+
return AnswerQualityScores(
|
| 124 |
+
accuracy=0, completeness=0, relevance=0, groundedness=0,
|
| 125 |
+
overall=0.0, reasoning=f"Evaluation failed: {e}"
|
| 126 |
+
)
|
evaluation/metrics.py
ADDED
|
@@ -0,0 +1,131 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
metrics.py β Retrieval quality metrics.
|
| 3 |
+
|
| 4 |
+
Implements:
|
| 5 |
+
- Recall@K : fraction of relevant docs retrieved in top-K
|
| 6 |
+
- MRR : Mean Reciprocal Rank of the first relevant doc
|
| 7 |
+
- nDCG : Normalized Discounted Cumulative Gain
|
| 8 |
+
|
| 9 |
+
Relevance is determined by keyword matching between the query and chunk content.
|
| 10 |
+
This is a proxy measure used when ground-truth labels are unavailable.
|
| 11 |
+
"""
|
| 12 |
+
|
| 13 |
+
import math
|
| 14 |
+
import re
|
| 15 |
+
import logging
|
| 16 |
+
from typing import List
|
| 17 |
+
|
| 18 |
+
from langchain_core.documents import Document
|
| 19 |
+
from pydantic import BaseModel, Field
|
| 20 |
+
|
| 21 |
+
logger = logging.getLogger(__name__)
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
# βββ Pydantic output model ββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 25 |
+
|
| 26 |
+
class RetrievalMetrics(BaseModel):
|
| 27 |
+
"""Structured container for retrieval evaluation scores."""
|
| 28 |
+
recall_at_k: float = Field(..., ge=0.0, le=1.0, description="Recall@K")
|
| 29 |
+
mrr: float = Field(..., ge=0.0, le=1.0, description="Mean Reciprocal Rank")
|
| 30 |
+
ndcg: float = Field(..., ge=0.0, le=1.0, description="nDCG@K")
|
| 31 |
+
top_k: int = Field(..., description="K used for evaluation")
|
| 32 |
+
num_relevant: int = Field(..., description="Number of docs judged relevant")
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
# βββ Relevance oracle βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 36 |
+
|
| 37 |
+
def _extract_keywords(text: str) -> set:
|
| 38 |
+
"""Extract lowercase alphabetic tokens (length β₯ 3) from text."""
|
| 39 |
+
return set(re.findall(r"\b[a-zA-Z]{3,}\b", text.lower()))
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
def _is_relevant(query: str, doc: Document, threshold: int = 2) -> bool:
|
| 43 |
+
"""
|
| 44 |
+
Determine if a document is relevant to the query via keyword overlap.
|
| 45 |
+
|
| 46 |
+
Args:
|
| 47 |
+
query: User question.
|
| 48 |
+
doc: Retrieved document.
|
| 49 |
+
threshold: Minimum number of shared keywords to count as relevant.
|
| 50 |
+
|
| 51 |
+
Returns:
|
| 52 |
+
True if overlap β₯ threshold.
|
| 53 |
+
"""
|
| 54 |
+
q_keywords = _extract_keywords(query)
|
| 55 |
+
d_keywords = _extract_keywords(doc.page_content)
|
| 56 |
+
overlap = len(q_keywords & d_keywords)
|
| 57 |
+
return overlap >= threshold
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
# βββ Metric functions βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 61 |
+
|
| 62 |
+
def _compute_relevance_flags(query: str, docs: List[Document]) -> List[int]:
|
| 63 |
+
"""Return binary relevance list (1 = relevant, 0 = not)."""
|
| 64 |
+
return [1 if _is_relevant(query, doc) else 0 for doc in docs]
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
def recall_at_k(relevance: List[int]) -> float:
|
| 68 |
+
"""
|
| 69 |
+
Recall@K: fraction of retrieved docs that are relevant.
|
| 70 |
+
|
| 71 |
+
Since we have no total relevant pool, we treat the number of
|
| 72 |
+
relevant items in the retrieved set as the denominator baseline.
|
| 73 |
+
"""
|
| 74 |
+
num_relevant = sum(relevance)
|
| 75 |
+
if num_relevant == 0:
|
| 76 |
+
return 0.0
|
| 77 |
+
return num_relevant / len(relevance)
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
def mean_reciprocal_rank(relevance: List[int]) -> float:
|
| 81 |
+
"""
|
| 82 |
+
MRR: 1/rank of the first relevant document.
|
| 83 |
+
|
| 84 |
+
Returns 0.0 if no relevant document is found.
|
| 85 |
+
"""
|
| 86 |
+
for rank, rel in enumerate(relevance, 1):
|
| 87 |
+
if rel == 1:
|
| 88 |
+
return 1.0 / rank
|
| 89 |
+
return 0.0
|
| 90 |
+
|
| 91 |
+
|
| 92 |
+
def ndcg_at_k(relevance: List[int]) -> float:
|
| 93 |
+
"""
|
| 94 |
+
nDCG@K using binary relevance.
|
| 95 |
+
|
| 96 |
+
Args:
|
| 97 |
+
relevance: Binary relevance list ordered by retrieval rank.
|
| 98 |
+
|
| 99 |
+
Returns:
|
| 100 |
+
nDCG score in [0, 1].
|
| 101 |
+
"""
|
| 102 |
+
def dcg(rels: List[int]) -> float:
|
| 103 |
+
return sum(r / math.log2(i + 2) for i, r in enumerate(rels))
|
| 104 |
+
|
| 105 |
+
actual_dcg = dcg(relevance)
|
| 106 |
+
ideal_dcg = dcg(sorted(relevance, reverse=True))
|
| 107 |
+
|
| 108 |
+
if ideal_dcg == 0:
|
| 109 |
+
return 0.0
|
| 110 |
+
return actual_dcg / ideal_dcg
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
def compute_retrieval_metrics(query: str, docs: List[Document]) -> RetrievalMetrics:
|
| 114 |
+
"""
|
| 115 |
+
Compute all retrieval metrics for a queryβresult pair.
|
| 116 |
+
|
| 117 |
+
Args:
|
| 118 |
+
query: User's natural language question.
|
| 119 |
+
docs: Retrieved documents in retrieval rank order.
|
| 120 |
+
|
| 121 |
+
Returns:
|
| 122 |
+
RetrievalMetrics Pydantic model.
|
| 123 |
+
"""
|
| 124 |
+
relevance = _compute_relevance_flags(query, docs)
|
| 125 |
+
return RetrievalMetrics(
|
| 126 |
+
recall_at_k=round(recall_at_k(relevance), 4),
|
| 127 |
+
mrr=round(mean_reciprocal_rank(relevance), 4),
|
| 128 |
+
ndcg=round(ndcg_at_k(relevance), 4),
|
| 129 |
+
top_k=len(docs),
|
| 130 |
+
num_relevant=sum(relevance),
|
| 131 |
+
)
|
ingestion/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
# ingestion package
|
ingestion/chunker.py
ADDED
|
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
chunker.py β Code-aware document chunking.
|
| 3 |
+
|
| 4 |
+
Strategy:
|
| 5 |
+
1. For Python files: split by top-level functions/classes using AST.
|
| 6 |
+
2. For all other files: fall back to character-level sliding window chunks.
|
| 7 |
+
|
| 8 |
+
Each chunk is a LangChain Document with rich metadata.
|
| 9 |
+
"""
|
| 10 |
+
|
| 11 |
+
import ast
|
| 12 |
+
import logging
|
| 13 |
+
from typing import List, Dict
|
| 14 |
+
|
| 15 |
+
from langchain_core.documents import Document
|
| 16 |
+
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
| 17 |
+
|
| 18 |
+
from config import CHUNK_SIZE_CHARS, CHUNK_OVERLAP_CHARS
|
| 19 |
+
|
| 20 |
+
logger = logging.getLogger(__name__)
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
def _chunk_python_by_ast(content: str, file_path: str) -> List[Document]:
|
| 24 |
+
"""
|
| 25 |
+
Parse Python source and extract top-level functions and classes as chunks.
|
| 26 |
+
Falls back to generic chunking if AST parsing fails.
|
| 27 |
+
|
| 28 |
+
Args:
|
| 29 |
+
content: Raw Python source code.
|
| 30 |
+
file_path: Source file path for metadata.
|
| 31 |
+
|
| 32 |
+
Returns:
|
| 33 |
+
List of Documents, one per function/class (or fallback chunks).
|
| 34 |
+
"""
|
| 35 |
+
try:
|
| 36 |
+
tree = ast.parse(content)
|
| 37 |
+
except SyntaxError:
|
| 38 |
+
logger.warning(f"AST parse failed for {file_path}, using fallback chunker.")
|
| 39 |
+
return _chunk_generic(content, file_path)
|
| 40 |
+
|
| 41 |
+
lines = content.splitlines(keepends=True)
|
| 42 |
+
documents: List[Document] = []
|
| 43 |
+
|
| 44 |
+
for node in ast.walk(tree):
|
| 45 |
+
if not isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)):
|
| 46 |
+
continue
|
| 47 |
+
if not isinstance(node, ast.stmt):
|
| 48 |
+
continue # skip nested; only top-level
|
| 49 |
+
|
| 50 |
+
start = node.lineno - 1
|
| 51 |
+
end = node.end_lineno
|
| 52 |
+
chunk_text = "".join(lines[start:end])
|
| 53 |
+
|
| 54 |
+
kind = "class" if isinstance(node, ast.ClassDef) else "function"
|
| 55 |
+
documents.append(Document(
|
| 56 |
+
page_content=chunk_text,
|
| 57 |
+
metadata={
|
| 58 |
+
"file_path": file_path,
|
| 59 |
+
"symbol_name": node.name,
|
| 60 |
+
"symbol_type": kind,
|
| 61 |
+
"start_line": node.lineno,
|
| 62 |
+
"end_line": node.end_lineno,
|
| 63 |
+
}
|
| 64 |
+
))
|
| 65 |
+
|
| 66 |
+
if not documents:
|
| 67 |
+
# File has no top-level definitions (e.g. script) β use fallback
|
| 68 |
+
return _chunk_generic(content, file_path)
|
| 69 |
+
|
| 70 |
+
return documents
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
def _chunk_generic(content: str, file_path: str, extension: str = "") -> List[Document]:
|
| 74 |
+
"""
|
| 75 |
+
Generic recursive character splitter for non-Python or unparseable files.
|
| 76 |
+
|
| 77 |
+
Args:
|
| 78 |
+
content: Raw file content.
|
| 79 |
+
file_path: Source file path for metadata.
|
| 80 |
+
extension: File extension hint (unused currently, reserved).
|
| 81 |
+
|
| 82 |
+
Returns:
|
| 83 |
+
List of overlapping text chunk Documents.
|
| 84 |
+
"""
|
| 85 |
+
splitter = RecursiveCharacterTextSplitter(
|
| 86 |
+
chunk_size=CHUNK_SIZE_CHARS,
|
| 87 |
+
chunk_overlap=CHUNK_OVERLAP_CHARS,
|
| 88 |
+
separators=["\n\n", "\n", " ", ""],
|
| 89 |
+
)
|
| 90 |
+
texts = splitter.split_text(content)
|
| 91 |
+
return [
|
| 92 |
+
Document(
|
| 93 |
+
page_content=text,
|
| 94 |
+
metadata={
|
| 95 |
+
"file_path": file_path,
|
| 96 |
+
"symbol_name": "",
|
| 97 |
+
"symbol_type": "chunk",
|
| 98 |
+
"chunk_index": i,
|
| 99 |
+
}
|
| 100 |
+
)
|
| 101 |
+
for i, text in enumerate(texts)
|
| 102 |
+
]
|
| 103 |
+
|
| 104 |
+
|
| 105 |
+
def chunk_documents(raw_docs: List[Dict]) -> List[Document]:
|
| 106 |
+
"""
|
| 107 |
+
Dispatch each loaded file to the appropriate chunker.
|
| 108 |
+
|
| 109 |
+
Args:
|
| 110 |
+
raw_docs: List of dicts from loader.load_files().
|
| 111 |
+
|
| 112 |
+
Returns:
|
| 113 |
+
Flat list of LangChain Document objects ready for embedding.
|
| 114 |
+
"""
|
| 115 |
+
all_chunks: List[Document] = []
|
| 116 |
+
|
| 117 |
+
for doc in raw_docs:
|
| 118 |
+
content = doc["content"]
|
| 119 |
+
file_path = doc["file_path"]
|
| 120 |
+
ext = doc.get("extension", "")
|
| 121 |
+
|
| 122 |
+
if not content.strip():
|
| 123 |
+
continue
|
| 124 |
+
|
| 125 |
+
if ext == ".py":
|
| 126 |
+
chunks = _chunk_python_by_ast(content, file_path)
|
| 127 |
+
else:
|
| 128 |
+
chunks = _chunk_generic(content, file_path, ext)
|
| 129 |
+
|
| 130 |
+
all_chunks.extend(chunks)
|
| 131 |
+
|
| 132 |
+
logger.info(f"Produced {len(all_chunks)} chunks from {len(raw_docs)} files.")
|
| 133 |
+
return all_chunks
|
ingestion/indexer.py
ADDED
|
@@ -0,0 +1,106 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
indexer.py β Embeds chunks and persists them in ChromaDB.
|
| 3 |
+
|
| 4 |
+
Uses HuggingFace all-MiniLM-L6-v2 (free, 384-dim).
|
| 5 |
+
ChromaDB is stored locally so embeddings are never recomputed
|
| 6 |
+
unless the collection is explicitly cleared.
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
import logging
|
| 10 |
+
from typing import List
|
| 11 |
+
|
| 12 |
+
from langchain_core.documents import Document
|
| 13 |
+
from langchain_chroma import Chroma
|
| 14 |
+
from langchain_huggingface import HuggingFaceEmbeddings
|
| 15 |
+
|
| 16 |
+
from config import VECTOR_DB_DIR, EMBEDDING_MODEL, EMBEDDING_DEVICE, CHROMA_COLLECTION_NAME
|
| 17 |
+
|
| 18 |
+
logger = logging.getLogger(__name__)
|
| 19 |
+
|
| 20 |
+
# Module-level singleton so the embedding model is loaded only once per process.
|
| 21 |
+
_embedding_model: HuggingFaceEmbeddings | None = None
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
def get_embedding_model() -> HuggingFaceEmbeddings:
|
| 25 |
+
"""
|
| 26 |
+
Return (or lazily create) the shared HuggingFace embedding model.
|
| 27 |
+
|
| 28 |
+
Returns:
|
| 29 |
+
HuggingFaceEmbeddings instance for all-MiniLM-L6-v2.
|
| 30 |
+
"""
|
| 31 |
+
global _embedding_model
|
| 32 |
+
if _embedding_model is None:
|
| 33 |
+
logger.info(f"Loading embedding model: {EMBEDDING_MODEL}")
|
| 34 |
+
_embedding_model = HuggingFaceEmbeddings(
|
| 35 |
+
model_name=EMBEDDING_MODEL,
|
| 36 |
+
model_kwargs={"device": EMBEDDING_DEVICE},
|
| 37 |
+
encode_kwargs={"normalize_embeddings": True},
|
| 38 |
+
)
|
| 39 |
+
return _embedding_model
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
def get_vectorstore() -> Chroma:
|
| 43 |
+
"""
|
| 44 |
+
Open (or create) the persistent Chroma vector store.
|
| 45 |
+
|
| 46 |
+
Returns:
|
| 47 |
+
Chroma instance backed by the local vector_db directory.
|
| 48 |
+
"""
|
| 49 |
+
return Chroma(
|
| 50 |
+
collection_name=CHROMA_COLLECTION_NAME,
|
| 51 |
+
embedding_function=get_embedding_model(),
|
| 52 |
+
persist_directory=str(VECTOR_DB_DIR),
|
| 53 |
+
)
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
def index_documents(chunks: List[Document]) -> Chroma:
|
| 57 |
+
"""
|
| 58 |
+
Embed and insert document chunks into ChromaDB.
|
| 59 |
+
|
| 60 |
+
Existing documents in the collection are cleared before re-indexing
|
| 61 |
+
so that re-uploading a ZIP starts fresh.
|
| 62 |
+
|
| 63 |
+
Args:
|
| 64 |
+
chunks: LangChain Documents produced by the chunker.
|
| 65 |
+
|
| 66 |
+
Returns:
|
| 67 |
+
The populated Chroma vector store.
|
| 68 |
+
"""
|
| 69 |
+
if not chunks:
|
| 70 |
+
raise ValueError("No chunks to index.")
|
| 71 |
+
|
| 72 |
+
embeddings = get_embedding_model()
|
| 73 |
+
|
| 74 |
+
# Clear previous collection to avoid stale data on re-index.
|
| 75 |
+
vectorstore = Chroma(
|
| 76 |
+
collection_name=CHROMA_COLLECTION_NAME,
|
| 77 |
+
embedding_function=embeddings,
|
| 78 |
+
persist_directory=str(VECTOR_DB_DIR),
|
| 79 |
+
)
|
| 80 |
+
vectorstore.delete_collection()
|
| 81 |
+
|
| 82 |
+
# Recreate and populate.
|
| 83 |
+
vectorstore = Chroma.from_documents(
|
| 84 |
+
documents=chunks,
|
| 85 |
+
embedding=embeddings,
|
| 86 |
+
collection_name=CHROMA_COLLECTION_NAME,
|
| 87 |
+
persist_directory=str(VECTOR_DB_DIR),
|
| 88 |
+
)
|
| 89 |
+
|
| 90 |
+
logger.info(f"Indexed {len(chunks)} chunks into Chroma collection '{CHROMA_COLLECTION_NAME}'.")
|
| 91 |
+
return vectorstore
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
def is_index_populated() -> bool:
|
| 95 |
+
"""
|
| 96 |
+
Check whether the Chroma collection contains any documents.
|
| 97 |
+
|
| 98 |
+
Returns:
|
| 99 |
+
True if at least one document is stored, False otherwise.
|
| 100 |
+
"""
|
| 101 |
+
try:
|
| 102 |
+
vs = get_vectorstore()
|
| 103 |
+
count = vs._collection.count()
|
| 104 |
+
return count > 0
|
| 105 |
+
except Exception:
|
| 106 |
+
return False
|
ingestion/loader.py
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
loader.py β Handles ZIP extraction and file loading.
|
| 3 |
+
|
| 4 |
+
Responsibilities:
|
| 5 |
+
- Extract uploaded ZIP archives
|
| 6 |
+
- Filter files by allowed extensions
|
| 7 |
+
- Read file contents safely
|
| 8 |
+
- Return a list of raw document dicts
|
| 9 |
+
"""
|
| 10 |
+
|
| 11 |
+
import zipfile
|
| 12 |
+
import os
|
| 13 |
+
import logging
|
| 14 |
+
from pathlib import Path
|
| 15 |
+
from typing import List, Dict
|
| 16 |
+
|
| 17 |
+
from config import ALLOWED_EXTENSIONS, MAX_FILE_SIZE_MB, UPLOAD_DIR
|
| 18 |
+
|
| 19 |
+
logger = logging.getLogger(__name__)
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
def extract_zip(zip_path: str) -> Path:
|
| 23 |
+
"""
|
| 24 |
+
Extract a ZIP archive to a unique subdirectory under UPLOAD_DIR.
|
| 25 |
+
|
| 26 |
+
Args:
|
| 27 |
+
zip_path: Path to the uploaded .zip file.
|
| 28 |
+
|
| 29 |
+
Returns:
|
| 30 |
+
Path to the extraction directory.
|
| 31 |
+
"""
|
| 32 |
+
zip_path = Path(zip_path)
|
| 33 |
+
extract_dir = UPLOAD_DIR / zip_path.stem
|
| 34 |
+
extract_dir.mkdir(parents=True, exist_ok=True)
|
| 35 |
+
|
| 36 |
+
with zipfile.ZipFile(zip_path, "r") as zf:
|
| 37 |
+
zf.extractall(extract_dir)
|
| 38 |
+
|
| 39 |
+
logger.info(f"Extracted ZIP to: {extract_dir}")
|
| 40 |
+
return extract_dir
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
def load_files(extract_dir: Path) -> List[Dict]:
|
| 44 |
+
"""
|
| 45 |
+
Walk the extraction directory and load allowed source files.
|
| 46 |
+
|
| 47 |
+
Each returned dict contains:
|
| 48 |
+
- content (str): raw file text
|
| 49 |
+
- file_path (str): relative path within the archive
|
| 50 |
+
- extension (str): file extension
|
| 51 |
+
|
| 52 |
+
Args:
|
| 53 |
+
extract_dir: Directory containing extracted files.
|
| 54 |
+
|
| 55 |
+
Returns:
|
| 56 |
+
List of raw document dicts.
|
| 57 |
+
"""
|
| 58 |
+
documents: List[Dict] = []
|
| 59 |
+
max_bytes = MAX_FILE_SIZE_MB * 1024 * 1024
|
| 60 |
+
|
| 61 |
+
for root, _dirs, files in os.walk(extract_dir):
|
| 62 |
+
for filename in files:
|
| 63 |
+
full_path = Path(root) / filename
|
| 64 |
+
ext = full_path.suffix.lower()
|
| 65 |
+
|
| 66 |
+
if ext not in ALLOWED_EXTENSIONS:
|
| 67 |
+
continue
|
| 68 |
+
|
| 69 |
+
if full_path.stat().st_size > max_bytes:
|
| 70 |
+
logger.warning(f"Skipping large file: {full_path}")
|
| 71 |
+
continue
|
| 72 |
+
|
| 73 |
+
try:
|
| 74 |
+
content = full_path.read_text(encoding="utf-8", errors="replace")
|
| 75 |
+
relative_path = str(full_path.relative_to(extract_dir))
|
| 76 |
+
documents.append({
|
| 77 |
+
"content": content,
|
| 78 |
+
"file_path": relative_path,
|
| 79 |
+
"extension": ext,
|
| 80 |
+
})
|
| 81 |
+
except Exception as e:
|
| 82 |
+
logger.warning(f"Failed to read {full_path}: {e}")
|
| 83 |
+
|
| 84 |
+
logger.info(f"Loaded {len(documents)} files from {extract_dir}")
|
| 85 |
+
return documents
|
llm/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
# llm package
|
llm/generator.py
ADDED
|
@@ -0,0 +1,105 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
generator.py β LLM-based answer generation from retrieved context.
|
| 3 |
+
|
| 4 |
+
Uses litellm so the model can be swapped by changing config.LLM_MODEL.
|
| 5 |
+
The prompt is designed to:
|
| 6 |
+
- Ground the answer strictly in retrieved context
|
| 7 |
+
- Reference source files by name
|
| 8 |
+
- Decline gracefully when context is insufficient
|
| 9 |
+
"""
|
| 10 |
+
|
| 11 |
+
import logging
|
| 12 |
+
from typing import List, Tuple
|
| 13 |
+
|
| 14 |
+
from langchain_core.documents import Document
|
| 15 |
+
import litellm
|
| 16 |
+
|
| 17 |
+
from config import LLM_MODEL, LLM_MAX_TOKENS, LLM_TEMPERATURE, OPENAI_API_KEY
|
| 18 |
+
|
| 19 |
+
logger = logging.getLogger(__name__)
|
| 20 |
+
|
| 21 |
+
_SYSTEM_PROMPT = """You are DevDocs AI, an expert assistant that answers questions about codebases.
|
| 22 |
+
|
| 23 |
+
Rules:
|
| 24 |
+
1. Answer ONLY using the provided code context. Do NOT hallucinate.
|
| 25 |
+
2. If the context is insufficient, say so clearly.
|
| 26 |
+
3. Always cite the source file(s) at the end of your answer under a "Sources:" heading.
|
| 27 |
+
4. Be concise and precise. Use code snippets when helpful.
|
| 28 |
+
5. Format code blocks with triple backticks and the appropriate language tag.
|
| 29 |
+
"""
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
def _build_context_block(docs: List[Document]) -> str:
|
| 33 |
+
"""
|
| 34 |
+
Format retrieved documents into a structured context string for the prompt.
|
| 35 |
+
|
| 36 |
+
Args:
|
| 37 |
+
docs: Retrieved LangChain Documents.
|
| 38 |
+
|
| 39 |
+
Returns:
|
| 40 |
+
Formatted context string.
|
| 41 |
+
"""
|
| 42 |
+
parts = []
|
| 43 |
+
for i, doc in enumerate(docs, 1):
|
| 44 |
+
meta = doc.metadata
|
| 45 |
+
file_path = meta.get("file_path", "unknown")
|
| 46 |
+
symbol = meta.get("symbol_name", "")
|
| 47 |
+
symbol_type = meta.get("symbol_type", "chunk")
|
| 48 |
+
|
| 49 |
+
header = f"[{i}] File: {file_path}"
|
| 50 |
+
if symbol:
|
| 51 |
+
header += f" | {symbol_type}: {symbol}"
|
| 52 |
+
|
| 53 |
+
parts.append(f"{header}\n```\n{doc.page_content.strip()}\n```")
|
| 54 |
+
|
| 55 |
+
return "\n\n".join(parts)
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
def generate_answer(
|
| 59 |
+
query: str,
|
| 60 |
+
docs: List[Document],
|
| 61 |
+
) -> Tuple[str, List[str]]:
|
| 62 |
+
"""
|
| 63 |
+
Generate a grounded answer from retrieved documents.
|
| 64 |
+
|
| 65 |
+
Args:
|
| 66 |
+
query: The user's natural language question.
|
| 67 |
+
docs: Retrieved Document chunks (context).
|
| 68 |
+
|
| 69 |
+
Returns:
|
| 70 |
+
Tuple of (answer_text, source_file_list).
|
| 71 |
+
|
| 72 |
+
Raises:
|
| 73 |
+
RuntimeError: If the LLM call fails.
|
| 74 |
+
"""
|
| 75 |
+
if not OPENAI_API_KEY:
|
| 76 |
+
return (
|
| 77 |
+
"β οΈ No OpenAI API key configured. Set the OPENAI_API_KEY environment variable.",
|
| 78 |
+
[],
|
| 79 |
+
)
|
| 80 |
+
|
| 81 |
+
context_block = _build_context_block(docs)
|
| 82 |
+
source_files = list({doc.metadata.get("file_path", "") for doc in docs})
|
| 83 |
+
|
| 84 |
+
user_message = (
|
| 85 |
+
f"Question: {query}\n\n"
|
| 86 |
+
f"Context (retrieved code):\n{context_block}"
|
| 87 |
+
)
|
| 88 |
+
|
| 89 |
+
try:
|
| 90 |
+
response = litellm.completion(
|
| 91 |
+
model=LLM_MODEL,
|
| 92 |
+
messages=[
|
| 93 |
+
{"role": "system", "content": _SYSTEM_PROMPT},
|
| 94 |
+
{"role": "user", "content": user_message},
|
| 95 |
+
],
|
| 96 |
+
max_tokens=LLM_MAX_TOKENS,
|
| 97 |
+
temperature=LLM_TEMPERATURE
|
| 98 |
+
)
|
| 99 |
+
answer = response.choices[0].message.content.strip()
|
| 100 |
+
logger.info(f"Generated answer ({len(answer)} chars) for: '{query[:60]}'")
|
| 101 |
+
return answer, source_files
|
| 102 |
+
|
| 103 |
+
except Exception as e:
|
| 104 |
+
logger.error(f"LLM generation failed: {e}")
|
| 105 |
+
raise RuntimeError(f"LLM generation failed: {e}") from e
|
requirements.txt
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ββ Core LangChain (split into focused packages as of v0.2+) ββββββββββββββββββ
|
| 2 |
+
langchain>=0.3.0
|
| 3 |
+
langchain-core>=0.3.0
|
| 4 |
+
langchain-text-splitters>=0.3.0
|
| 5 |
+
langchain-community>=0.3.0
|
| 6 |
+
langchain-chroma>=0.1.4
|
| 7 |
+
|
| 8 |
+
# ββ Embeddings (FREE, local) ββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 9 |
+
langchain-huggingface>=0.1.0 # replaces langchain_community.embeddings.HuggingFaceEmbeddings
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
# Core
|
| 13 |
+
gradio>=4.36.0
|
| 14 |
+
langchain>=0.2.0
|
| 15 |
+
langchain-community>=0.2.0
|
| 16 |
+
chromadb>=0.5.0
|
| 17 |
+
sentence-transformers>=2.7.0
|
| 18 |
+
litellm>=1.40.0
|
| 19 |
+
pydantic>=2.0.0
|
| 20 |
+
|
| 21 |
+
# File handling
|
| 22 |
+
python-dotenv>=1.0.0
|
| 23 |
+
|
| 24 |
+
# Optional: faster tokenisation for chunking
|
| 25 |
+
tiktoken>=0.7.0
|
retrieval/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
# retrieval package
|
retrieval/query_rewriter.py
ADDED
|
@@ -0,0 +1,103 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
query_rewriter.py β Lightweight query reformulation before retrieval.
|
| 3 |
+
|
| 4 |
+
Two modes:
|
| 5 |
+
1. Rule-based (free): simple heuristic expansions (default, zero cost).
|
| 6 |
+
2. LLM-based (optional): one cheap LLM call to reformulate the query.
|
| 7 |
+
|
| 8 |
+
The LLM path is only invoked when explicitly requested to keep costs minimal.
|
| 9 |
+
"""
|
| 10 |
+
|
| 11 |
+
import re
|
| 12 |
+
import logging
|
| 13 |
+
|
| 14 |
+
import litellm
|
| 15 |
+
|
| 16 |
+
from config import LLM_MODEL, LLM_TEMPERATURE, OPENAI_API_KEY
|
| 17 |
+
|
| 18 |
+
logger = logging.getLogger(__name__)
|
| 19 |
+
|
| 20 |
+
# Heuristic keyword expansions (extend as needed).
|
| 21 |
+
_EXPANSIONS = {
|
| 22 |
+
r"\bauth\b": "authentication authorization",
|
| 23 |
+
r"\bdb\b": "database",
|
| 24 |
+
r"\bapi\b": "API endpoint route handler",
|
| 25 |
+
r"\bconfig\b": "configuration settings",
|
| 26 |
+
r"\berror\b": "error exception handling",
|
| 27 |
+
r"\btest\b": "unit test test case",
|
| 28 |
+
r"\bdeploy\b": "deployment CI CD pipeline",
|
| 29 |
+
}
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
def rule_based_rewrite(query: str) -> str:
|
| 33 |
+
"""
|
| 34 |
+
Apply simple regex-based expansions to common abbreviations.
|
| 35 |
+
|
| 36 |
+
Args:
|
| 37 |
+
query: Original user query.
|
| 38 |
+
|
| 39 |
+
Returns:
|
| 40 |
+
Slightly expanded query string.
|
| 41 |
+
"""
|
| 42 |
+
rewritten = query
|
| 43 |
+
for pattern, expansion in _EXPANSIONS.items():
|
| 44 |
+
rewritten = re.sub(pattern, expansion, rewritten, flags=re.IGNORECASE)
|
| 45 |
+
if rewritten != query:
|
| 46 |
+
logger.debug(f"Rule-based rewrite: '{query}' β '{rewritten}'")
|
| 47 |
+
return rewritten
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
def llm_rewrite(query: str) -> str:
|
| 51 |
+
"""
|
| 52 |
+
Use a cheap LLM call to reformulate the query for better retrieval.
|
| 53 |
+
This is optional and costs ~1 LLM call per query.
|
| 54 |
+
|
| 55 |
+
Args:
|
| 56 |
+
query: Original user query.
|
| 57 |
+
|
| 58 |
+
Returns:
|
| 59 |
+
Reformulated query optimised for semantic code search.
|
| 60 |
+
"""
|
| 61 |
+
if not OPENAI_API_KEY:
|
| 62 |
+
logger.warning("No API key set; falling back to rule-based rewrite.")
|
| 63 |
+
return rule_based_rewrite(query)
|
| 64 |
+
|
| 65 |
+
system_prompt = (
|
| 66 |
+
"You are a search query optimizer for code repositories. "
|
| 67 |
+
"Rewrite the user's question into a concise, keyword-rich query "
|
| 68 |
+
"that will best match relevant code chunks. "
|
| 69 |
+
"Output ONLY the rewritten query β no explanation."
|
| 70 |
+
)
|
| 71 |
+
try:
|
| 72 |
+
response = litellm.completion(
|
| 73 |
+
model=LLM_MODEL,
|
| 74 |
+
messages=[
|
| 75 |
+
{"role": "system", "content": system_prompt},
|
| 76 |
+
{"role": "user", "content": query},
|
| 77 |
+
],
|
| 78 |
+
max_tokens=80,
|
| 79 |
+
temperature=LLM_TEMPERATURE,
|
| 80 |
+
api_key=OPENAI_API_KEY,
|
| 81 |
+
)
|
| 82 |
+
rewritten = response.choices[0].message.content.strip()
|
| 83 |
+
logger.info(f"LLM rewrite: '{query}' β '{rewritten}'")
|
| 84 |
+
return rewritten
|
| 85 |
+
except Exception as e:
|
| 86 |
+
logger.warning(f"LLM rewrite failed ({e}); falling back to rule-based.")
|
| 87 |
+
return rule_based_rewrite(query)
|
| 88 |
+
|
| 89 |
+
|
| 90 |
+
def rewrite_query(query: str, use_llm: bool = False) -> str:
|
| 91 |
+
"""
|
| 92 |
+
Entry point for query rewriting.
|
| 93 |
+
|
| 94 |
+
Args:
|
| 95 |
+
query: Raw user question.
|
| 96 |
+
use_llm: If True, invoke LLM rewrite (costs 1 LLM call).
|
| 97 |
+
|
| 98 |
+
Returns:
|
| 99 |
+
Rewritten query string.
|
| 100 |
+
"""
|
| 101 |
+
if use_llm:
|
| 102 |
+
return llm_rewrite(query)
|
| 103 |
+
return rule_based_rewrite(query)
|
retrieval/retriever.py
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
retriever.py β Configurable retrieval over the Chroma vector store.
|
| 3 |
+
|
| 4 |
+
Supports:
|
| 5 |
+
- Similarity search (cosine distance ranking)
|
| 6 |
+
- MMR (Maximum Marginal Relevance) for diversity-aware retrieval
|
| 7 |
+
|
| 8 |
+
Returns LangChain Documents with scores where applicable.
|
| 9 |
+
"""
|
| 10 |
+
|
| 11 |
+
import logging
|
| 12 |
+
from typing import List, Tuple
|
| 13 |
+
|
| 14 |
+
from langchain_core.documents import Document
|
| 15 |
+
from langchain_community.vectorstores import Chroma
|
| 16 |
+
|
| 17 |
+
from config import DEFAULT_TOP_K, MMR_FETCH_K, MMR_LAMBDA_MULT
|
| 18 |
+
from ingestion.indexer import get_vectorstore
|
| 19 |
+
|
| 20 |
+
logger = logging.getLogger(__name__)
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
def retrieve(
|
| 24 |
+
query: str,
|
| 25 |
+
search_type: str = "similarity",
|
| 26 |
+
top_k: int = DEFAULT_TOP_K,
|
| 27 |
+
) -> Tuple[List[Document], List[float]]:
|
| 28 |
+
"""
|
| 29 |
+
Retrieve the most relevant document chunks for a query.
|
| 30 |
+
|
| 31 |
+
Args:
|
| 32 |
+
query: Natural language question from the user.
|
| 33 |
+
search_type: "similarity" or "mmr".
|
| 34 |
+
top_k: Number of chunks to return.
|
| 35 |
+
|
| 36 |
+
Returns:
|
| 37 |
+
Tuple of (documents, scores).
|
| 38 |
+
Scores are cosine-similarity floats for similarity search;
|
| 39 |
+
a list of zeros for MMR (Chroma does not expose MMR scores).
|
| 40 |
+
|
| 41 |
+
Raises:
|
| 42 |
+
RuntimeError: If the vector store is empty.
|
| 43 |
+
"""
|
| 44 |
+
vectorstore: Chroma = get_vectorstore()
|
| 45 |
+
|
| 46 |
+
if vectorstore._collection.count() == 0:
|
| 47 |
+
raise RuntimeError("Vector store is empty. Please index a repository first.")
|
| 48 |
+
|
| 49 |
+
if search_type == "mmr":
|
| 50 |
+
docs = vectorstore.max_marginal_relevance_search(
|
| 51 |
+
query=query,
|
| 52 |
+
k=top_k,
|
| 53 |
+
fetch_k=max(MMR_FETCH_K, top_k * 4),
|
| 54 |
+
lambda_mult=MMR_LAMBDA_MULT,
|
| 55 |
+
)
|
| 56 |
+
scores = [0.0] * len(docs)
|
| 57 |
+
else:
|
| 58 |
+
results = vectorstore.similarity_search_with_score(query=query, k=top_k)
|
| 59 |
+
docs = [d for d, _ in results]
|
| 60 |
+
# Chroma returns L2 distance; convert to similarity (0β1) for clarity.
|
| 61 |
+
scores = [max(0.0, 1.0 - s) for _, s in results]
|
| 62 |
+
|
| 63 |
+
logger.info(f"[{search_type.upper()}] Retrieved {len(docs)} chunks for: '{query[:60]}'")
|
| 64 |
+
return docs, scores
|
utils/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
# utils package
|
utils/helpers.py
ADDED
|
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
helpers.py β Shared utility functions used across the project.
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
import logging
|
| 6 |
+
import sys
|
| 7 |
+
from pathlib import Path
|
| 8 |
+
from typing import List
|
| 9 |
+
|
| 10 |
+
from langchain_core.documents import Document
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
def setup_logging(level: int = logging.INFO) -> None:
|
| 14 |
+
"""Configure root logger with a clean, consistent format."""
|
| 15 |
+
logging.basicConfig(
|
| 16 |
+
level=level,
|
| 17 |
+
format="%(asctime)s | %(levelname)-8s | %(name)s β %(message)s",
|
| 18 |
+
datefmt="%H:%M:%S",
|
| 19 |
+
handlers=[logging.StreamHandler(sys.stdout)],
|
| 20 |
+
)
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
def format_chunks_for_display(docs: List[Document], scores: List[float]) -> str:
|
| 24 |
+
"""
|
| 25 |
+
Format retrieved chunks into a human-readable string for the Gradio UI.
|
| 26 |
+
|
| 27 |
+
Args:
|
| 28 |
+
docs: Retrieved LangChain Documents.
|
| 29 |
+
scores: Corresponding similarity scores.
|
| 30 |
+
|
| 31 |
+
Returns:
|
| 32 |
+
Formatted multi-line string.
|
| 33 |
+
"""
|
| 34 |
+
parts = []
|
| 35 |
+
for i, (doc, score) in enumerate(zip(docs, scores), 1):
|
| 36 |
+
meta = doc.metadata
|
| 37 |
+
file_path = meta.get("file_path", "unknown")
|
| 38 |
+
symbol = meta.get("symbol_name", "")
|
| 39 |
+
symbol_type = meta.get("symbol_type", "chunk")
|
| 40 |
+
score_str = f"{score:.3f}" if score > 0 else "N/A (MMR)"
|
| 41 |
+
|
| 42 |
+
header = f"βββ [{i}] {file_path}"
|
| 43 |
+
if symbol:
|
| 44 |
+
header += f" βΊ {symbol_type}:{symbol}"
|
| 45 |
+
header += f" (score: {score_str}) βββ"
|
| 46 |
+
|
| 47 |
+
parts.append(f"{header}\n{doc.page_content.strip()}")
|
| 48 |
+
|
| 49 |
+
return "\n\n".join(parts)
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
def format_metrics_for_display(retrieval_metrics, answer_scores) -> str:
|
| 53 |
+
"""
|
| 54 |
+
Format all evaluation metrics into a readable dashboard string.
|
| 55 |
+
|
| 56 |
+
Args:
|
| 57 |
+
retrieval_metrics: RetrievalMetrics Pydantic model.
|
| 58 |
+
answer_scores: AnswerQualityScores Pydantic model.
|
| 59 |
+
|
| 60 |
+
Returns:
|
| 61 |
+
Formatted metrics string.
|
| 62 |
+
"""
|
| 63 |
+
lines = [
|
| 64 |
+
"ββββββββββββββββββββββββββββββββββββββββ",
|
| 65 |
+
"β EVALUATION METRICS PANEL β",
|
| 66 |
+
"β βββββββββββββββββββββββββββββββββββββββ£",
|
| 67 |
+
"β RETRIEVAL METRICS β",
|
| 68 |
+
f"β Recall@{retrieval_metrics.top_k:<2} : {retrieval_metrics.recall_at_k:.4f} β",
|
| 69 |
+
f"β MRR : {retrieval_metrics.mrr:.4f} β",
|
| 70 |
+
f"β nDCG@{retrieval_metrics.top_k:<2} : {retrieval_metrics.ndcg:.4f} β",
|
| 71 |
+
f"β Relevant chunks : {retrieval_metrics.num_relevant}/{retrieval_metrics.top_k} β",
|
| 72 |
+
"β βββββββββββββββββββββββββββββββββββββββ£",
|
| 73 |
+
"β ANSWER QUALITY (LLM Judge) β",
|
| 74 |
+
f"β Accuracy : {answer_scores.accuracy}/5 β",
|
| 75 |
+
f"β Completeness : {answer_scores.completeness}/5 β",
|
| 76 |
+
f"β Relevance : {answer_scores.relevance}/5 β",
|
| 77 |
+
f"β Groundedness : {answer_scores.groundedness}/5 β",
|
| 78 |
+
f"β Overall Score : {answer_scores.overall:.2f}/5.00 β",
|
| 79 |
+
"β βββββββββββββββββββββββββββββββββββββββ£",
|
| 80 |
+
f"β Reasoning: {answer_scores.reasoning[:38]:<38}",
|
| 81 |
+
"ββββββββββββββββββββββββββββββββββββββββ",
|
| 82 |
+
]
|
| 83 |
+
return "\n".join(lines)
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
def save_temp_file(file_bytes: bytes, filename: str) -> Path:
|
| 87 |
+
"""
|
| 88 |
+
Save raw bytes to the uploads directory.
|
| 89 |
+
|
| 90 |
+
Args:
|
| 91 |
+
file_bytes: Raw file content.
|
| 92 |
+
filename: Target filename.
|
| 93 |
+
|
| 94 |
+
Returns:
|
| 95 |
+
Path to the saved file.
|
| 96 |
+
"""
|
| 97 |
+
from config import UPLOAD_DIR
|
| 98 |
+
dest = UPLOAD_DIR / filename
|
| 99 |
+
dest.write_bytes(file_bytes)
|
| 100 |
+
return dest
|