Spaces:
Sleeping
Sleeping
Upload 29 files
Browse files- .gitignore +3 -0
- README.md +206 -19
- agents/__init__.py +0 -0
- agents/explainer.py +44 -0
- agents/parser.py +46 -0
- agents/router.py +50 -0
- agents/solver.py +137 -0
- agents/verifier.py +63 -0
- data/__init__.py +0 -0
- data/memory.json +374 -0
- knowledge/docs/algebra.txt +30 -0
- knowledge/docs/calculus.txt +33 -0
- knowledge/docs/linear_algebra.txt +35 -0
- knowledge/docs/probability.txt +33 -0
- rag/__init__.py +0 -0
- rag/knowledge_base.py +104 -0
- rag/retriever.py +24 -0
- requirements.txt +19 -3
- streamlit_app.py +364 -0
- utils/__init__.py +0 -0
- utils/__pycache__/__init__.cpython-310.pyc +0 -0
- utils/__pycache__/audio.cpython-310.pyc +0 -0
- utils/__pycache__/hitl.cpython-310.pyc +0 -0
- utils/__pycache__/memory.cpython-310.pyc +0 -0
- utils/__pycache__/ocr.cpython-310.pyc +0 -0
- utils/audio.py +132 -0
- utils/hitl.py +93 -0
- utils/memory.py +124 -0
- utils/ocr.py +33 -0
.gitignore
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
venv/
|
| 2 |
+
.env
|
| 3 |
+
__pycache__/
|
README.md
CHANGED
|
@@ -1,19 +1,206 @@
|
|
| 1 |
-
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
---
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Math Mentor - Multimodal AI Math Problem Solver
|
| 2 |
+
|
| 3 |
+
Reliable AI application for solving JEE-style math problems with RAG, Multi-Agent System, and Human-in-the-Loop.
|
| 4 |
+
|
| 5 |
+
## Architecture
|
| 6 |
+
|
| 7 |
+
```mermaid
|
| 8 |
+
graph TD
|
| 9 |
+
A[User Input] --> B{Input Type}
|
| 10 |
+
B -->|Text| C[Parser Agent]
|
| 11 |
+
B -->|Image| D[OCR Processor] --> E{Confidence Check}
|
| 12 |
+
B -->|Audio| F[Audio Processor] --> G{Confidence Check}
|
| 13 |
+
|
| 14 |
+
E -->|Low < 0.7| HITL1[HITL Trigger]
|
| 15 |
+
E -->|High ≥ 0.7| C
|
| 16 |
+
G -->|Low < 0.6| HITL1
|
| 17 |
+
G -->|High ≥ 0.6| C
|
| 18 |
+
|
| 19 |
+
C --> H{Parser Check}
|
| 20 |
+
H -->|Needs Clarification| HITL2[HITL Trigger]
|
| 21 |
+
H -->|Clear| I[Router Agent]
|
| 22 |
+
|
| 23 |
+
I --> J[Memory Search]
|
| 24 |
+
J --> K[Retriever: RAG + Similar Problems]
|
| 25 |
+
K --> L[Solver Agent + Python Tool]
|
| 26 |
+
L --> M[Verifier Agent]
|
| 27 |
+
|
| 28 |
+
M --> N{Verification}
|
| 29 |
+
N -->|Confidence < 0.7| HITL3[HITL Trigger]
|
| 30 |
+
N -->|Confidence ≥ 0.7| O[Explainer Agent]
|
| 31 |
+
|
| 32 |
+
O --> P[User Interface]
|
| 33 |
+
P --> Q[User Feedback]
|
| 34 |
+
Q --> R[Memory Store]
|
| 35 |
+
R --> S[Learning Patterns]
|
| 36 |
+
S -.->|Reuse| K
|
| 37 |
+
S -.->|Apply Corrections| D
|
| 38 |
+
S -.->|Apply Corrections| F
|
| 39 |
+
|
| 40 |
+
HITL1 --> T[Human Review]
|
| 41 |
+
HITL2 --> T
|
| 42 |
+
HITL3 --> T
|
| 43 |
+
T --> U[Edit/Approve/Reject]
|
| 44 |
+
U --> R
|
| 45 |
+
```
|
| 46 |
+
|
| 47 |
+
## Features
|
| 48 |
+
|
| 49 |
+
✅ **All Mandatory Requirements Implemented**
|
| 50 |
+
|
| 51 |
+
### 1. Multimodal Input & Parsing
|
| 52 |
+
- **Image Input**: OCR with EasyOCR, confidence scoring, user editing
|
| 53 |
+
- **Audio Input**: Whisper ASR, math-specific phrase handling, confirmation
|
| 54 |
+
- **Text Input**: Direct typing
|
| 55 |
+
- **Low confidence triggers HITL**
|
| 56 |
+
|
| 57 |
+
### 2. Parser Agent
|
| 58 |
+
- Cleans OCR/ASR output
|
| 59 |
+
- Identifies ambiguity and missing information
|
| 60 |
+
- Structured JSON output with topic classification
|
| 61 |
+
- Triggers HITL when needed
|
| 62 |
+
|
| 63 |
+
### 3. RAG Pipeline
|
| 64 |
+
- 4 curated knowledge documents (algebra, probability, calculus, linear algebra)
|
| 65 |
+
- ChromaDB vector store with sentence-transformers
|
| 66 |
+
- Top-k retrieval with source attribution
|
| 67 |
+
- **No hallucinated citations**
|
| 68 |
+
|
| 69 |
+
### 4. Multi-Agent System (5+ Agents)
|
| 70 |
+
1. **Parser Agent**: Raw input → structured problem
|
| 71 |
+
2. **Router Agent**: Problem classification and strategy
|
| 72 |
+
3. **Solver Agent**: RAG + Python calculator tool
|
| 73 |
+
4. **Verifier Agent**: Correctness, units, edge cases
|
| 74 |
+
5. **Explainer Agent**: Student-friendly explanations
|
| 75 |
+
- **All agents use Claude Sonnet 4**
|
| 76 |
+
|
| 77 |
+
### 5. Application UI (Streamlit)
|
| 78 |
+
- Input mode selector
|
| 79 |
+
- Extraction preview with confidence scores
|
| 80 |
+
- Agent trace visualization
|
| 81 |
+
- Retrieved context display
|
| 82 |
+
- Confidence indicators
|
| 83 |
+
- Feedback buttons (✅ correct / ❌ incorrect)
|
| 84 |
+
|
| 85 |
+
### 6. Human-in-the-Loop (HITL)
|
| 86 |
+
**Triggers when:**
|
| 87 |
+
- OCR confidence < 70%
|
| 88 |
+
- Audio confidence < 60%
|
| 89 |
+
- Parser detects ambiguity
|
| 90 |
+
- Verifier confidence < 70%
|
| 91 |
+
- User explicitly requests review
|
| 92 |
+
|
| 93 |
+
**HITL Flow:**
|
| 94 |
+
- Shows severity levels (🔴 high / 🟡 medium)
|
| 95 |
+
- Instructions for user action
|
| 96 |
+
- Records approvals/edits/rejections
|
| 97 |
+
- Stores as learning signals
|
| 98 |
+
|
| 99 |
+
### 7. Memory & Self-Learning
|
| 100 |
+
**Stores:**
|
| 101 |
+
- Original input (all modalities)
|
| 102 |
+
- Parsed question structure
|
| 103 |
+
- Retrieved context
|
| 104 |
+
- Solution and verification
|
| 105 |
+
- User feedback
|
| 106 |
+
|
| 107 |
+
**Learning Features:**
|
| 108 |
+
- Retrieves similar solved problems
|
| 109 |
+
- Reuses successful solution patterns
|
| 110 |
+
- Applies learned OCR/audio corrections
|
| 111 |
+
- Tracks accuracy by topic
|
| 112 |
+
- Identifies most successful strategies
|
| 113 |
+
- **No model retraining - pattern reuse**
|
| 114 |
+
|
| 115 |
+
### 8. Tools Integration
|
| 116 |
+
- **Python Calculator**: Safe mathematical expression evaluation
|
| 117 |
+
- **RAG Retrieval**: Knowledge base search
|
| 118 |
+
- **OCR/ASR Correction**: Learned pattern application
|
| 119 |
+
|
| 120 |
+
## Setup
|
| 121 |
+
|
| 122 |
+
### 1. Clone Repository
|
| 123 |
+
```bash
|
| 124 |
+
git clone https://github.com/krushna27/Math-Mentor---Multimodal-AI-Math-Problem-Solver.git
|
| 125 |
+
cd math-mentor
|
| 126 |
+
```
|
| 127 |
+
|
| 128 |
+
### 2. Install Dependencies
|
| 129 |
+
```bash
|
| 130 |
+
pip install -r requirements.txt
|
| 131 |
+
```
|
| 132 |
+
|
| 133 |
+
### 3. Configure Environment
|
| 134 |
+
```bash
|
| 135 |
+
cp .env
|
| 136 |
+
```
|
| 137 |
+
|
| 138 |
+
Edit `.env` and add your API keys:
|
| 139 |
+
```
|
| 140 |
+
GEMINI_API_KEY=your_key
|
| 141 |
+
OPENAI_API_KEY=your_key
|
| 142 |
+
```
|
| 143 |
+
|
| 144 |
+
### 4. Run Application
|
| 145 |
+
```bash
|
| 146 |
+
streamlit run app.py
|
| 147 |
+
```
|
| 148 |
+
|
| 149 |
+
Download: https://www.gyan.dev/ffmpeg/builds/ffmpeg-release-essentials.zip
|
| 150 |
+
|
| 151 |
+
Extract to C:\ffmpeg
|
| 152 |
+
|
| 153 |
+
Add C:\ffmpeg\bin to Windows PATH
|
| 154 |
+
|
| 155 |
+
Restart terminal/IDE
|
| 156 |
+
|
| 157 |
+
|
| 158 |
+
## Usage
|
| 159 |
+
|
| 160 |
+
1. Select input mode (Text/Image/Audio)
|
| 161 |
+
2. Provide your math problem
|
| 162 |
+
3. Review extracted/transcribed text
|
| 163 |
+
4. Click "Solve Problem"
|
| 164 |
+
5. View agent trace, solution, and explanation
|
| 165 |
+
6. Provide feedback
|
| 166 |
+
|
| 167 |
+
## Project Structure
|
| 168 |
+
|
| 169 |
+
```
|
| 170 |
+
math-mentor/
|
| 171 |
+
���── app.py # Main Streamlit app
|
| 172 |
+
├── agents/ # Multi-agent system
|
| 173 |
+
│ ├── parser.py
|
| 174 |
+
│ ├── router.py
|
| 175 |
+
│ ├── solver.py
|
| 176 |
+
│ ├── verifier.py
|
| 177 |
+
│ └── explainer.py
|
| 178 |
+
├── rag/ # RAG pipeline
|
| 179 |
+
│ ├── knowledge_base.py
|
| 180 |
+
│ └── retriever.py
|
| 181 |
+
├── utils/ # Input processors
|
| 182 |
+
│ ├── ocr.py
|
| 183 |
+
│ ├── audio.py
|
| 184 |
+
│ └── memory.py
|
| 185 |
+
├── knowledge/docs/ # Knowledge base
|
| 186 |
+
└── data/ # Memory storage
|
| 187 |
+
```
|
| 188 |
+
|
| 189 |
+
## Deployment
|
| 190 |
+
|
| 191 |
+
### Streamlit Cloud
|
| 192 |
+
1. Push to GitHub
|
| 193 |
+
2. Go to share.streamlit.io
|
| 194 |
+
3. Deploy from repository
|
| 195 |
+
4. Add secrets in Settings
|
| 196 |
+
|
| 197 |
+
|
| 198 |
+
## Technologies
|
| 199 |
+
|
| 200 |
+
- **LLM**: Claude Sonnet 4 (Anthropic)
|
| 201 |
+
- **OCR**: EasyOCR
|
| 202 |
+
- **ASR**: OpenAI Whisper
|
| 203 |
+
- **Embeddings**: sentence-transformers
|
| 204 |
+
- **Vector DB**: ChromaDB
|
| 205 |
+
- **UI**: Streamlit
|
| 206 |
+
|
agents/__init__.py
ADDED
|
File without changes
|
agents/explainer.py
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
import os
|
| 3 |
+
from typing import Dict
|
| 4 |
+
from google.generativeai import configure
|
| 5 |
+
import google.generativeai as genai
|
| 6 |
+
|
| 7 |
+
class ExplainerAgent:
|
| 8 |
+
def __init__(self):
|
| 9 |
+
configure(api_key=os.getenv('GEMINI_API_KEY'))
|
| 10 |
+
self.model = genai.GenerativeModel('models/gemini-2.0-flash-lite')
|
| 11 |
+
|
| 12 |
+
def explain(self, parsed_problem: Dict, solution: Dict, verification: Dict) -> Dict:
|
| 13 |
+
problem_text = parsed_problem.get('problem_text', '')
|
| 14 |
+
solution_text = solution.get('solution', '')
|
| 15 |
+
is_correct = verification.get('is_correct', False)
|
| 16 |
+
|
| 17 |
+
prompt = f"""You are a friendly math tutor. Explain this solution in a student-friendly way.
|
| 18 |
+
|
| 19 |
+
Problem: {problem_text}
|
| 20 |
+
|
| 21 |
+
Solution:
|
| 22 |
+
{solution_text}
|
| 23 |
+
|
| 24 |
+
Create a clear, step-by-step explanation that:
|
| 25 |
+
1. Breaks down the approach
|
| 26 |
+
2. Explains why each step works
|
| 27 |
+
3. Highlights key concepts
|
| 28 |
+
4. Points out common mistakes to avoid
|
| 29 |
+
|
| 30 |
+
Make it conversational and encouraging.
|
| 31 |
+
|
| 32 |
+
"""
|
| 33 |
+
|
| 34 |
+
response = self.model.generate_content(prompt)
|
| 35 |
+
explanation = response.text
|
| 36 |
+
|
| 37 |
+
if not is_correct:
|
| 38 |
+
explanation += "\n\n⚠️ Note: The verifier has some concerns about this solution. Please review carefully."
|
| 39 |
+
|
| 40 |
+
return {
|
| 41 |
+
'explanation': explanation,
|
| 42 |
+
'tone': 'friendly',
|
| 43 |
+
'includes_warnings': not is_correct
|
| 44 |
+
}
|
agents/parser.py
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
import os
|
| 3 |
+
import json
|
| 4 |
+
from typing import Dict
|
| 5 |
+
# from google.generativeai import configure
|
| 6 |
+
import google.generativeai as genai
|
| 7 |
+
|
| 8 |
+
class ParserAgent:
|
| 9 |
+
def __init__(self):
|
| 10 |
+
# configure(api_key=os.getenv('GEMINI_API_KEY'))
|
| 11 |
+
genai.configure(api_key=os.getenv('GEMINI_API_KEY'))
|
| 12 |
+
self.model = genai.GenerativeModel('models/gemini-2.0-flash-lite')
|
| 13 |
+
|
| 14 |
+
def parse(self, raw_text: str, input_type: str = 'text') -> Dict:
|
| 15 |
+
prompt = f"""You are a math problem parser. Convert the following {input_type} input into a structured format.
|
| 16 |
+
|
| 17 |
+
Input: {raw_text}
|
| 18 |
+
|
| 19 |
+
Analyze and provide a JSON response with:
|
| 20 |
+
- problem_text: cleaned problem statement
|
| 21 |
+
- topic: one of [algebra, probability, calculus, linear_algebra]
|
| 22 |
+
- variables: list of variables mentioned
|
| 23 |
+
- constraints: list of constraints or conditions
|
| 24 |
+
- needs_clarification: true/false if anything is ambiguous
|
| 25 |
+
|
| 26 |
+
Respond with ONLY valid JSON, no other text."""
|
| 27 |
+
|
| 28 |
+
response = self.model.generate_content(prompt)
|
| 29 |
+
response_text = response.text.strip()
|
| 30 |
+
|
| 31 |
+
try:
|
| 32 |
+
if response_text.startswith('```json'):
|
| 33 |
+
response_text = response_text.split('```json')[1].split('```')[0].strip()
|
| 34 |
+
elif response_text.startswith('```'):
|
| 35 |
+
response_text = response_text.split('```')[1].split('```')[0].strip()
|
| 36 |
+
|
| 37 |
+
parsed = json.loads(response_text)
|
| 38 |
+
return parsed
|
| 39 |
+
except json.JSONDecodeError:
|
| 40 |
+
return {
|
| 41 |
+
'problem_text': raw_text,
|
| 42 |
+
'topic': 'unknown',
|
| 43 |
+
'variables': [],
|
| 44 |
+
'constraints': [],
|
| 45 |
+
'needs_clarification': True
|
| 46 |
+
}
|
agents/router.py
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
import os
|
| 3 |
+
from typing import Dict
|
| 4 |
+
from google.generativeai import configure
|
| 5 |
+
import google.generativeai as genai
|
| 6 |
+
|
| 7 |
+
class RouterAgent:
|
| 8 |
+
def __init__(self):
|
| 9 |
+
configure(api_key=os.getenv('GEMINI_API_KEY'))
|
| 10 |
+
self.model = genai.GenerativeModel('models/gemini-2.0-flash-lite')
|
| 11 |
+
|
| 12 |
+
def route(self, parsed_problem: Dict) -> Dict:
|
| 13 |
+
topic = parsed_problem.get('topic', 'unknown')
|
| 14 |
+
needs_clarification = parsed_problem.get('needs_clarification', False)
|
| 15 |
+
|
| 16 |
+
if needs_clarification:
|
| 17 |
+
return {
|
| 18 |
+
'action': 'request_clarification',
|
| 19 |
+
'requires_hitl': True,
|
| 20 |
+
'reason': 'Problem statement is ambiguous or incomplete'
|
| 21 |
+
}
|
| 22 |
+
|
| 23 |
+
if topic == 'unknown':
|
| 24 |
+
return {
|
| 25 |
+
'action': 'request_clarification',
|
| 26 |
+
'requires_hitl': True,
|
| 27 |
+
'reason': 'Cannot determine problem topic'
|
| 28 |
+
}
|
| 29 |
+
|
| 30 |
+
strategy = self._determine_strategy(parsed_problem)
|
| 31 |
+
|
| 32 |
+
return {
|
| 33 |
+
'action': 'solve',
|
| 34 |
+
'requires_hitl': False,
|
| 35 |
+
'strategy': strategy,
|
| 36 |
+
'topic': topic
|
| 37 |
+
}
|
| 38 |
+
|
| 39 |
+
def _determine_strategy(self, problem: Dict) -> str:
|
| 40 |
+
topic = problem.get('topic', '')
|
| 41 |
+
|
| 42 |
+
strategies = {
|
| 43 |
+
'algebra': 'algebraic_manipulation',
|
| 44 |
+
'probability': 'counting_and_probability',
|
| 45 |
+
'calculus': 'differentiation_integration',
|
| 46 |
+
'linear_algebra': 'matrix_operations'
|
| 47 |
+
}
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
return strategies.get(topic, 'general_problem_solving')
|
agents/solver.py
ADDED
|
@@ -0,0 +1,137 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from typing import Dict, List
|
| 3 |
+
import re
|
| 4 |
+
import ast
|
| 5 |
+
import operator
|
| 6 |
+
from google.generativeai import configure
|
| 7 |
+
import google.generativeai as genai
|
| 8 |
+
|
| 9 |
+
class SolverAgent:
|
| 10 |
+
def __init__(self):
|
| 11 |
+
configure(api_key=os.getenv('GEMINI_API_KEY'))
|
| 12 |
+
self.model = genai.GenerativeModel('models/gemini-2.0-flash-lite')
|
| 13 |
+
self.operators = {
|
| 14 |
+
ast.Add: operator.add,
|
| 15 |
+
ast.Sub: operator.sub,
|
| 16 |
+
ast.Mult: operator.mul,
|
| 17 |
+
ast.Div: operator.truediv,
|
| 18 |
+
ast.Pow: operator.pow,
|
| 19 |
+
ast.USub: operator.neg,
|
| 20 |
+
}
|
| 21 |
+
|
| 22 |
+
def solve(self, parsed_problem: Dict, context: Dict, strategy: str) -> Dict:
|
| 23 |
+
problem_text = parsed_problem.get('problem_text', '')
|
| 24 |
+
topic = parsed_problem.get('topic', '')
|
| 25 |
+
|
| 26 |
+
kb_context = context.get('knowledge_base', [])
|
| 27 |
+
similar_problems = context.get('similar_problems', [])
|
| 28 |
+
|
| 29 |
+
context_text = self._format_context(kb_context, similar_problems)
|
| 30 |
+
|
| 31 |
+
prompt = f"""You are a math problem solver with access to a Python calculator tool.
|
| 32 |
+
|
| 33 |
+
Problem: {problem_text}
|
| 34 |
+
Topic: {topic}
|
| 35 |
+
Strategy: {strategy}
|
| 36 |
+
|
| 37 |
+
Relevant Knowledge:
|
| 38 |
+
{context_text}
|
| 39 |
+
|
| 40 |
+
Solve step-by-step. When you need to calculate something, write it as:
|
| 41 |
+
CALCULATE: <expression>
|
| 42 |
+
Example: CALCULATE: (2 + 3) * 4
|
| 43 |
+
|
| 44 |
+
Provide:
|
| 45 |
+
1. Understanding of the problem
|
| 46 |
+
2. Step-by-step solution (use CALCULATE for computations)
|
| 47 |
+
3. Final answer
|
| 48 |
+
|
| 49 |
+
Format each step clearly."""
|
| 50 |
+
|
| 51 |
+
response = self.model.generate_content(prompt)
|
| 52 |
+
solution = response.text
|
| 53 |
+
|
| 54 |
+
solution_with_calcs = self._execute_calculations(solution)
|
| 55 |
+
|
| 56 |
+
return {
|
| 57 |
+
'solution': solution_with_calcs,
|
| 58 |
+
'steps': self._extract_steps(solution_with_calcs),
|
| 59 |
+
'context_used': len(kb_context) + len(similar_problems),
|
| 60 |
+
'calculations_performed': solution.count('CALCULATE:')
|
| 61 |
+
}
|
| 62 |
+
|
| 63 |
+
def _execute_calculations(self, solution: str) -> str:
|
| 64 |
+
lines = solution.split('\n')
|
| 65 |
+
result_lines = []
|
| 66 |
+
|
| 67 |
+
for line in lines:
|
| 68 |
+
result_lines.append(line)
|
| 69 |
+
if 'CALCULATE:' in line:
|
| 70 |
+
expr = line.split('CALCULATE:')[1].strip()
|
| 71 |
+
try:
|
| 72 |
+
result = self._safe_eval(expr)
|
| 73 |
+
result_lines.append(f" → Result: {result}")
|
| 74 |
+
except Exception as e:
|
| 75 |
+
result_lines.append(f" → Calculation error: {str(e)}")
|
| 76 |
+
|
| 77 |
+
return '\n'.join(result_lines)
|
| 78 |
+
|
| 79 |
+
def _safe_eval(self, expr: str):
|
| 80 |
+
try:
|
| 81 |
+
expr = expr.replace('^', '**').replace('√', 'sqrt')
|
| 82 |
+
|
| 83 |
+
allowed_names = {
|
| 84 |
+
'sqrt': lambda x: x ** 0.5,
|
| 85 |
+
'abs': abs,
|
| 86 |
+
'pow': pow,
|
| 87 |
+
'pi': 3.141592653589793,
|
| 88 |
+
'e': 2.718281828459045
|
| 89 |
+
}
|
| 90 |
+
|
| 91 |
+
node = ast.parse(expr, mode='eval')
|
| 92 |
+
|
| 93 |
+
def _eval(node):
|
| 94 |
+
if isinstance(node, ast.Expression):
|
| 95 |
+
return _eval(node.body)
|
| 96 |
+
elif isinstance(node, ast.Num):
|
| 97 |
+
return node.n
|
| 98 |
+
elif isinstance(node, ast.BinOp):
|
| 99 |
+
left = _eval(node.left)
|
| 100 |
+
right = _eval(node.right)
|
| 101 |
+
return self.operators[type(node.op)](left, right)
|
| 102 |
+
elif isinstance(node, ast.UnaryOp):
|
| 103 |
+
operand = _eval(node.operand)
|
| 104 |
+
return self.operators[type(node.op)](operand)
|
| 105 |
+
elif isinstance(node, ast.Call):
|
| 106 |
+
func_name = node.func.id
|
| 107 |
+
args = [_eval(arg) for arg in node.args]
|
| 108 |
+
return allowed_names[func_name](*args)
|
| 109 |
+
elif isinstance(node, ast.Name):
|
| 110 |
+
return allowed_names[node.id]
|
| 111 |
+
else:
|
| 112 |
+
raise ValueError(f"Unsupported operation: {type(node)}")
|
| 113 |
+
|
| 114 |
+
return _eval(node.body)
|
| 115 |
+
except Exception as e:
|
| 116 |
+
raise ValueError(f"Cannot evaluate: {expr}")
|
| 117 |
+
|
| 118 |
+
def _format_context(self, kb_results: List[Dict], similar: List[Dict]) -> str:
|
| 119 |
+
parts = []
|
| 120 |
+
|
| 121 |
+
for i, result in enumerate(kb_results, 1):
|
| 122 |
+
parts.append(f"[KB {i}] {result['content']}")
|
| 123 |
+
|
| 124 |
+
for i, prob in enumerate(similar, 1):
|
| 125 |
+
if 'solution' in prob:
|
| 126 |
+
past_solution = prob.get('solution', '')[:200]
|
| 127 |
+
parts.append(f"[Similar {i}] Previous approach: {past_solution}...")
|
| 128 |
+
|
| 129 |
+
return '\n\n'.join(parts) if parts else 'No additional context available'
|
| 130 |
+
|
| 131 |
+
def _extract_steps(self, solution: str) -> List[str]:
|
| 132 |
+
steps = []
|
| 133 |
+
for line in solution.split('\n'):
|
| 134 |
+
line = line.strip()
|
| 135 |
+
if line and (line[0].isdigit() or line.startswith('-') or line.startswith('•') or line.startswith('Step')):
|
| 136 |
+
steps.append(line)
|
| 137 |
+
return steps
|
agents/verifier.py
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
import os
|
| 3 |
+
from typing import Dict
|
| 4 |
+
import json
|
| 5 |
+
from google.generativeai import configure
|
| 6 |
+
import google.generativeai as genai
|
| 7 |
+
|
| 8 |
+
class VerifierAgent:
|
| 9 |
+
def __init__(self):
|
| 10 |
+
configure(api_key=os.getenv('GEMINI_API_KEY'))
|
| 11 |
+
self.model = genai.GenerativeModel('models/gemini-2.0-flash-lite')
|
| 12 |
+
|
| 13 |
+
def verify(self, parsed_problem: Dict, solution: Dict) -> Dict:
|
| 14 |
+
problem_text = parsed_problem.get('problem_text', '')
|
| 15 |
+
solution_text = solution.get('solution', '')
|
| 16 |
+
|
| 17 |
+
prompt = f"""You are a math solution verifier. Check the correctness of this solution.
|
| 18 |
+
|
| 19 |
+
Problem: {problem_text}
|
| 20 |
+
|
| 21 |
+
Solution:
|
| 22 |
+
{solution_text}
|
| 23 |
+
|
| 24 |
+
Verify:
|
| 25 |
+
1. Mathematical correctness
|
| 26 |
+
2. Logical flow
|
| 27 |
+
3. Units and domains
|
| 28 |
+
4. Edge cases handled
|
| 29 |
+
|
| 30 |
+
Respond with JSON:
|
| 31 |
+
{{
|
| 32 |
+
"is_correct": true/false,
|
| 33 |
+
"confidence": 0.0-1.0,
|
| 34 |
+
"issues": ["list of issues if any"],
|
| 35 |
+
"needs_review": true/false,
|
| 36 |
+
"feedback": "brief feedback"
|
| 37 |
+
}}
|
| 38 |
+
|
| 39 |
+
Respond with ONLY valid JSON."""
|
| 40 |
+
|
| 41 |
+
response = self.model.generate_content(prompt)
|
| 42 |
+
response_text = response.text.strip()
|
| 43 |
+
|
| 44 |
+
try:
|
| 45 |
+
if response_text.startswith('```json'):
|
| 46 |
+
response_text = response_text.split('```json')[1].split('```')[0].strip()
|
| 47 |
+
elif response_text.startswith('```'):
|
| 48 |
+
response_text = response_text.split('```')[1].split('```')[0].strip()
|
| 49 |
+
|
| 50 |
+
verification = json.loads(response_text)
|
| 51 |
+
|
| 52 |
+
if verification.get('confidence', 0) < 0.7:
|
| 53 |
+
verification['needs_review'] = True
|
| 54 |
+
|
| 55 |
+
return verification
|
| 56 |
+
except json.JSONDecodeError:
|
| 57 |
+
return {
|
| 58 |
+
'is_correct': False,
|
| 59 |
+
'confidence': 0.5,
|
| 60 |
+
'issues': ['Unable to verify solution'],
|
| 61 |
+
'needs_review': True,
|
| 62 |
+
'feedback': 'Verification inconclusive'
|
| 63 |
+
}
|
data/__init__.py
ADDED
|
File without changes
|
data/memory.json
ADDED
|
@@ -0,0 +1,374 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"input_type": "Text",
|
| 4 |
+
"original_text": "3x+7=22",
|
| 5 |
+
"parsed_question": {
|
| 6 |
+
"problem_text": "3x + 7 = 22",
|
| 7 |
+
"topic": "algebra",
|
| 8 |
+
"variables": [
|
| 9 |
+
"x"
|
| 10 |
+
],
|
| 11 |
+
"constraints": [],
|
| 12 |
+
"needs_clarification": false
|
| 13 |
+
},
|
| 14 |
+
"solution": "1. **Understanding the problem:**\nThe problem is to find the value of the variable 'x' that makes the equation \"3x + 7 = 22\" true. This is a linear equation with one variable, and we need to isolate 'x' to find its value.\n\n2. **Step-by-step solution:**\nWe will use algebraic manipulation to solve for 'x'. The goal is to get 'x' by itself on one side of the equation.\n\n* **Step 1: Subtract 7 from both sides of the equation.**\n The equation is: 3x + 7 = 22\n To isolate the term with 'x' (which is 3x), we need to remove the '+7'. We do this by performing the inverse operation, which is subtraction. We must subtract 7 from both sides of the equation to maintain equality.\n 3x + 7 - 7 = 22 - 7\n This simplifies to:\n 3x = 15\n\n* **Step 2: Divide both sides of the equation by 3.**\n Now we have: 3x = 15\n To isolate 'x', we need to undo the multiplication by 3. We do this by performing the inverse operation, which is division. We must divide both sides of the equation by 3.\n 3x / 3 = 15 / 3\n This simplifies to:\n x = 5\n\n3. **Final Answer:**\nThe value of 'x' that satisfies the equation 3x + 7 = 22 is 5.\n\nWe can check our answer by substituting x = 5 back into the original equation:\n3(5) + 7 = 15 + 7 = 22.\nSince 22 = 22, our solution is correct.",
|
| 15 |
+
"verification": {
|
| 16 |
+
"is_correct": true,
|
| 17 |
+
"confidence": 1.0,
|
| 18 |
+
"issues": [],
|
| 19 |
+
"needs_review": false,
|
| 20 |
+
"feedback": "The solution correctly identifies the steps required to solve the linear equation and accurately performs the algebraic manipulations. The verification step confirms the correctness of the solution."
|
| 21 |
+
},
|
| 22 |
+
"user_feedback": "correct",
|
| 23 |
+
"timestamp": "2026-01-17T18:35:20.965944",
|
| 24 |
+
"id": 0
|
| 25 |
+
},
|
| 26 |
+
{
|
| 27 |
+
"input_type": "Text",
|
| 28 |
+
"original_text": "a=(2,\u22121,3),b=(1,4,2)",
|
| 29 |
+
"parsed_question": {
|
| 30 |
+
"problem_text": "a = (2, -1, 3), b = (1, 4, 2)",
|
| 31 |
+
"topic": "linear_algebra",
|
| 32 |
+
"variables": [
|
| 33 |
+
"a",
|
| 34 |
+
"b"
|
| 35 |
+
],
|
| 36 |
+
"constraints": [],
|
| 37 |
+
"needs_clarification": false
|
| 38 |
+
},
|
| 39 |
+
"solution": "### 1. Understanding of the Problem\n\nThe problem provides two vectors, $\\mathbf{a} = (2, -1, 3)$ and $\\mathbf{b} = (1, 4, 2)$. The topic is linear algebra and the strategy is matrix operations. While the problem doesn't explicitly state what operation to perform, in the context of linear algebra with two vectors, common operations include vector addition, subtraction, scalar multiplication, dot product, and cross product. Given the instruction to use \"matrix operations\", it implies we might need to represent these vectors as matrices or perform operations that can be viewed through a matrix lens. Without a specific question, I will assume the most common vector operation asked in conjunction with linear algebra and matrix operations is to find the dot product of the two vectors. The dot product of two vectors can be computed using matrix multiplication if the vectors are represented as row or column matrices.\n\n### 2. Step-by-Step Solution\n\nTo calculate the dot product of vectors $\\mathbf{a}$ and $\\mathbf{b}$ using matrix operations, we can represent them as column matrices and perform matrix multiplication.\n\n**Step 1: Represent the vectors as column matrices.**\nLet $\\mathbf{a}$ be represented by a column matrix $A$ and $\\mathbf{b}$ be represented by a column matrix $B$.\n$A = \\begin{pmatrix} 2 \\\\ -1 \\\\ 3 \\end{pmatrix}$\n$B = \\begin{pmatrix} 1 \\\\ 4 \\\\ 2 \\end{pmatrix}$\n\n**Step 2: Calculate the dot product using matrix multiplication.**\nThe dot product of $\\mathbf{a}$ and $\\mathbf{b}$ can be calculated as $A^T B$, where $A^T$ is the transpose of matrix $A$. The transpose of a column matrix is a row matrix.\n$A^T = \\begin{pmatrix} 2 & -1 & 3 \\end{pmatrix}$\n\nNow, perform the matrix multiplication:\n$A^T B = \\begin{pmatrix} 2 & -1 & 3 \\end{pmatrix} \\begin{pmatrix} 1 \\\\ 4 \\\\ 2 \\end{pmatrix}$\n\nTo multiply these matrices, we multiply each element of the first matrix (the row vector) by the corresponding element of the second matrix (the column vector) and then sum the results.\n\n$(2 \\times 1) + (-1 \\times 4) + (3 \\times 2)$\n\n**Step 3: Perform the arithmetic operations.**\n$2 \\times 1 = 2$\n$-1 \\times 4 = -4$\n$3 \\times 2 = 6$\n\nNow, sum these values:\n$2 + (-4) + 6 = 2 - 4 + 6 = -2 + 6 = 4$\n\nAlternatively, we could represent $\\mathbf{a}$ as a row matrix and $\\mathbf{b}$ as a column matrix, or $\\mathbf{a}$ as a column matrix and $\\mathbf{b}$ as a row matrix for a different matrix operation that results in the dot product. The standard way to obtain a scalar (the dot product) from two vectors using matrix multiplication is to multiply a row vector by a column vector.\n\nLet's consider representing $\\mathbf{a}$ as a row vector and $\\mathbf{b}$ as a column vector.\n$\\mathbf{a}^T = \\begin{pmatrix} 2 & -1 & 3 \\end{pmatrix}$\n$\\mathbf{b} = \\begin{pmatrix} 1 \\\\ 4 \\\\ 2 \\end{pmatrix}$\nDot product = $\\mathbf{a}^T \\mathbf{b} = \\begin{pmatrix} 2 & -1 & 3 \\end{pmatrix} \\begin{pmatrix} 1 \\\\ 4 \\\\ 2 \\end{pmatrix} = (2 \\times 1) + (-1 \\times 4) + (3 \\times 2) = 2 - 4 + 6 = 4$.\n\nOr, representing $\\mathbf{a}$ as a column vector and $\\mathbf{b}$ as a row vector (transpose of $\\mathbf{b}$), and then multiplying $\\mathbf{a}$ by $\\mathbf{b}^T$:\n$\\mathbf{a} = \\begin{pmatrix} 2 \\\\ -1 \\\\ 3 \\end{pmatrix}$\n$\\mathbf{b}^T = \\begin{pmatrix} 1 & 4 & 2 \\end{pmatrix}$\nDot product = $\\mathbf{a} \\mathbf{b}^T = \\begin{pmatrix} 2 \\\\ -1 \\\\ 3 \\end{pmatrix} \\begin{pmatrix} 1 & 4 & 2 \\end{pmatrix}$. This would result in a $3 \\times 3$ matrix, not a scalar dot product. The dot product is a scalar quantity.\n\nThe correct matrix operation to obtain the dot product is the multiplication of a row vector with a column vector.\n\nLet $\\mathbf{a} = (a_1, a_2, a_3)$ and $\\mathbf{b} = (b_1, b_2, b_3)$.\nThe dot product is $\\mathbf{a} \\cdot \\mathbf{b} = a_1 b_1 + a_2 b_2 + a_3 b_3$.\n\nUsing matrix notation:\nRepresent $\\mathbf{a}$ as a row matrix $A_{row} = \\begin{pmatrix} a_1 & a_2 & a_3 \\end{pmatrix}$ and $\\mathbf{b}$ as a column matrix $B_{col} = \\begin{pmatrix} b_1 \\\\ b_2 \\\\ b_3 \\end{pmatrix}$.\nThen $\\mathbf{a} \\cdot \\mathbf{b} = A_{row} B_{col} = \\begin{pmatrix} a_1 & a_2 & a_3 \\end{pmatrix} \\begin{pmatrix} b_1 \\\\ b_2 \\\\ b_3 \\end{pmatrix} = a_1 b_1 + a_2 b_2 + a_3 b_3$.\n\nSubstituting the given values:\n$A_{row} = \\begin{pmatrix} 2 & -1 & 3 \\end{pmatrix}$\n$B_{col} = \\begin{pmatrix} 1 \\\\ 4 \\\\ 2 \\end{pmatrix}$\n\nDot product = $\\begin{pmatrix} 2 & -1 & 3 \\end{pmatrix} \\begin{pmatrix} 1 \\\\ 4 \\\\ 2 \\end{pmatrix} = (2)(1) + (-1)(4) + (3)(2) = 2 - 4 + 6 = 4$.\n\n### 3. Final Answer\n\nThe dot product of vectors $\\mathbf{a}$ and $\\mathbf{b}$ can be computed using matrix operations as follows:\nLet $\\mathbf{a}$ be represented by the row matrix $A_{row} = \\begin{pmatrix} 2 & -1 & 3 \\end{pmatrix}$ and $\\mathbf{b}$ be represented by the column matrix $B_{col} = \\begin{pmatrix} 1 \\\\ 4 \\\\ 2 \\end{pmatrix}$.\n\nThe dot product is then calculated by the matrix multiplication $A_{row} B_{col}$:\n$A_{row} B_{col} = \\begin{pmatrix} 2 & -1 & 3 \\end{pmatrix} \\begin{pmatrix} 1 \\\\ 4 \\\\ 2 \\end{pmatrix} = (2 \\times 1) + (-1 \\times 4) + (3 \\times 2) = 2 - 4 + 6 = 4$.\n\nThe final answer is $\\boxed{4}$.",
|
| 40 |
+
"verification": {
|
| 41 |
+
"is_correct": true,
|
| 42 |
+
"confidence": 1.0,
|
| 43 |
+
"issues": [],
|
| 44 |
+
"needs_review": false,
|
| 45 |
+
"feedback": "The solution correctly identifies the likely intended operation (dot product) given the problem statement and context. It clearly explains the matrix representation of the vectors and demonstrates the matrix multiplication process to compute the dot product. The arithmetic is also correct."
|
| 46 |
+
},
|
| 47 |
+
"user_feedback": "correct",
|
| 48 |
+
"timestamp": "2026-01-17T20:41:17.697928",
|
| 49 |
+
"id": 1
|
| 50 |
+
},
|
| 51 |
+
{
|
| 52 |
+
"input_type": "Image",
|
| 53 |
+
"original_text": "x**2 +5x+1=0",
|
| 54 |
+
"parsed_question": {
|
| 55 |
+
"problem_text": "x^2 + 5x + 1 = 0",
|
| 56 |
+
"topic": "algebra",
|
| 57 |
+
"variables": [
|
| 58 |
+
"x"
|
| 59 |
+
],
|
| 60 |
+
"constraints": [],
|
| 61 |
+
"needs_clarification": false
|
| 62 |
+
},
|
| 63 |
+
"routing": {
|
| 64 |
+
"action": "solve",
|
| 65 |
+
"requires_hitl": false,
|
| 66 |
+
"strategy": "algebraic_manipulation",
|
| 67 |
+
"topic": "algebra"
|
| 68 |
+
},
|
| 69 |
+
"solution": "1. **Understanding the problem:**\nThe problem asks us to solve the quadratic equation $x^2 + 5x + 1 = 0$ for the variable $x$. This is an equation of the form $ax^2 + bx + c = 0$.\n\n2. **Step-by-step solution:**\nWe will use the quadratic formula to solve this equation, as it is a standard method for finding the roots of quadratic equations. The quadratic formula is given by:\n$x = \\frac{-b \\pm \\sqrt{b^2 - 4ac}}{2a}$\n\nIn our equation, $x^2 + 5x + 1 = 0$, we have:\n$a = 1$\n$b = 5$\n$c = 1$\n\nFirst, let's calculate the discriminant, $D = b^2 - 4ac$:\nCALCULATE: $5^2 - 4 * 1 * 1$\n \u2192 Calculation error: Cannot evaluate: $5**2 - 4 * 1 * 1$\n$D = 25 - 4$\n$D = 21$\n\nSince the discriminant $D = 21$ is greater than 0, there will be two distinct real roots.\n\nNow, we can substitute the values of $a$, $b$, and $D$ into the quadratic formula:\n$x = \\frac{-5 \\pm \\sqrt{21}}{2 * 1}$\n$x = \\frac{-5 \\pm \\sqrt{21}}{2}$\n\nThis gives us two possible solutions for $x$:\n\nSolution 1:\n$x_1 = \\frac{-5 + \\sqrt{21}}{2}$\n\nSolution 2:\n$x_2 = \\frac{-5 - \\sqrt{21}}{2}$\n\n3. **Final Answer:**\nThe solutions to the equation $x^2 + 5x + 1 = 0$ are $x = \\frac{-5 + \\sqrt{21}}{2}$ and $x = \\frac{-5 - \\sqrt{21}}{2}$.",
|
| 70 |
+
"verification": {
|
| 71 |
+
"is_correct": true,
|
| 72 |
+
"confidence": 1.0,
|
| 73 |
+
"issues": [],
|
| 74 |
+
"needs_review": false,
|
| 75 |
+
"feedback": "The solution correctly applies the quadratic formula to solve the given quadratic equation. The calculations for the discriminant and the roots are accurate. The logical flow is clear and easy to follow."
|
| 76 |
+
},
|
| 77 |
+
"user_feedback": "correct",
|
| 78 |
+
"context_used": {
|
| 79 |
+
"knowledge_base": [
|
| 80 |
+
{
|
| 81 |
+
"content": "ALGEBRA FORMULAS AND CONCEPTS\n\nQuadratic Equations:\n- Standard form: ax\u00b2 + bx + c = 0\n- Quadratic formula: x = (-b \u00b1 \u221a(b\u00b2-4ac)) / 2a\n- Discriminant: D = b\u00b2 - 4ac\n - D > 0: two real roots\n - D = 0: one real root\n - D < 0: complex roots\n- Sum of roots: -b/a\n- Product of roots: c/a\n\nPolynomial Identities:\n- (a+b)\u00b2 = a\u00b2 + 2ab + b\u00b2\n- (a-b)\u00b2 = a\u00b2 - 2ab + b\u00b2\n- a\u00b2 - b\u00b2 = (a+b)(a-b)\n- (a+b)\u00b3 = a\u00b3 + 3a\u00b2b + 3ab\u00b2 + b\u00b3\n- (a-b)\u00b3 = a\u00b3 - 3a\u00b2b + 3ab\u00b2 - b\u00b3\n- a\u00b3 + b\u00b3 = (a+b)(a\u00b2 - ab + b\u00b2)\n- a\u00b3 - b\u00b3 = (a-b)(a\u00b2 + ab + b\u00b2)\n",
|
| 82 |
+
"metadata": {
|
| 83 |
+
"source": "algebra.txt",
|
| 84 |
+
"topic": "algebra"
|
| 85 |
+
},
|
| 86 |
+
"distance": 0.3847310543060303
|
| 87 |
+
},
|
| 88 |
+
{
|
| 89 |
+
"content": "Inequalities:\n- |x| < a \u27f9 -a < x < a\n- |x| > a \u27f9 x < -a or x > a\n- AM \u2265 GM: (a+b)/2 \u2265 \u221a(ab) for a,b > 0\n\nCommon Mistakes:\n- Don't forget to check domain restrictions\n- Remember to verify solutions in original equation\n- Check for extraneous solutions after squaring",
|
| 90 |
+
"metadata": {
|
| 91 |
+
"source": "algebra.txt",
|
| 92 |
+
"topic": "algebra"
|
| 93 |
+
},
|
| 94 |
+
"distance": 0.43384620547294617
|
| 95 |
+
}
|
| 96 |
+
],
|
| 97 |
+
"similar_problems": [
|
| 98 |
+
{
|
| 99 |
+
"input_type": "Text",
|
| 100 |
+
"original_text": "3x+7=22",
|
| 101 |
+
"parsed_question": {
|
| 102 |
+
"problem_text": "3x + 7 = 22",
|
| 103 |
+
"topic": "algebra",
|
| 104 |
+
"variables": [
|
| 105 |
+
"x"
|
| 106 |
+
],
|
| 107 |
+
"constraints": [],
|
| 108 |
+
"needs_clarification": false
|
| 109 |
+
},
|
| 110 |
+
"solution": "1. **Understanding the problem:**\nThe problem is to find the value of the variable 'x' that makes the equation \"3x + 7 = 22\" true. This is a linear equation with one variable, and we need to isolate 'x' to find its value.\n\n2. **Step-by-step solution:**\nWe will use algebraic manipulation to solve for 'x'. The goal is to get 'x' by itself on one side of the equation.\n\n* **Step 1: Subtract 7 from both sides of the equation.**\n The equation is: 3x + 7 = 22\n To isolate the term with 'x' (which is 3x), we need to remove the '+7'. We do this by performing the inverse operation, which is subtraction. We must subtract 7 from both sides of the equation to maintain equality.\n 3x + 7 - 7 = 22 - 7\n This simplifies to:\n 3x = 15\n\n* **Step 2: Divide both sides of the equation by 3.**\n Now we have: 3x = 15\n To isolate 'x', we need to undo the multiplication by 3. We do this by performing the inverse operation, which is division. We must divide both sides of the equation by 3.\n 3x / 3 = 15 / 3\n This simplifies to:\n x = 5\n\n3. **Final Answer:**\nThe value of 'x' that satisfies the equation 3x + 7 = 22 is 5.\n\nWe can check our answer by substituting x = 5 back into the original equation:\n3(5) + 7 = 15 + 7 = 22.\nSince 22 = 22, our solution is correct.",
|
| 111 |
+
"verification": {
|
| 112 |
+
"is_correct": true,
|
| 113 |
+
"confidence": 1.0,
|
| 114 |
+
"issues": [],
|
| 115 |
+
"needs_review": false,
|
| 116 |
+
"feedback": "The solution correctly identifies the steps required to solve the linear equation and accurately performs the algebraic manipulations. The verification step confirms the correctness of the solution."
|
| 117 |
+
},
|
| 118 |
+
"user_feedback": "correct",
|
| 119 |
+
"timestamp": "2026-01-17T18:35:20.965944",
|
| 120 |
+
"id": 0,
|
| 121 |
+
"similarity": 0.3333333333333333
|
| 122 |
+
}
|
| 123 |
+
],
|
| 124 |
+
"sources": [
|
| 125 |
+
{
|
| 126 |
+
"source": "algebra.txt",
|
| 127 |
+
"topic": "algebra"
|
| 128 |
+
},
|
| 129 |
+
{
|
| 130 |
+
"source": "algebra.txt",
|
| 131 |
+
"topic": "algebra"
|
| 132 |
+
}
|
| 133 |
+
]
|
| 134 |
+
},
|
| 135 |
+
"timestamp": "2026-01-18T17:59:00.228068",
|
| 136 |
+
"id": 2
|
| 137 |
+
},
|
| 138 |
+
{
|
| 139 |
+
"input_type": "Audio",
|
| 140 |
+
"original_text": "Find the derivatives of x squared plus 3x.",
|
| 141 |
+
"parsed_question": {
|
| 142 |
+
"problem_text": "Find the derivatives of x squared plus 3x.",
|
| 143 |
+
"topic": "calculus",
|
| 144 |
+
"variables": [
|
| 145 |
+
"x"
|
| 146 |
+
],
|
| 147 |
+
"constraints": [],
|
| 148 |
+
"needs_clarification": false
|
| 149 |
+
},
|
| 150 |
+
"routing": {
|
| 151 |
+
"action": "solve",
|
| 152 |
+
"requires_hitl": false,
|
| 153 |
+
"strategy": "differentiation_integration",
|
| 154 |
+
"topic": "calculus"
|
| 155 |
+
},
|
| 156 |
+
"solution": "**1. Understanding of the Problem**\n\nThe problem asks us to find the derivative of the function $f(x) = x^2 + 3x$. This involves applying the basic rules of differentiation.\n\n**2. Step-by-Step Solution**\n\n* **Step 1: Identify the function.**\n The function we need to differentiate is $f(x) = x^2 + 3x$.\n\n* **Step 2: Apply the sum rule for differentiation.**\n The sum rule states that the derivative of a sum of functions is the sum of their derivatives: $d/dx[f(x) + g(x)] = d/dx[f(x)] + d/dx[g(x)]$.\n In our case, $f(x) = x^2$ and $g(x) = 3x$. So, $d/dx(x^2 + 3x) = d/dx(x^2) + d/dx(3x)$.\n\n* **Step 3: Differentiate the first term, $x^2$.**\n We will use the power rule, which states $d/dx(x^n) = nx^(n-1)$.\n For $x^2$, $n=2$.\n CALCULATE: 2 * x**(2-1)\n \u2192 Calculation error: Cannot evaluate: 2 * x**(2-1)\n This gives us $2x^1$, which simplifies to $2x$.\n\n* **Step 4: Differentiate the second term, $3x$.**\n We can consider this as a constant multiple of $x$. The constant multiple rule states $d/dx(c \\cdot f(x)) = c \\cdot d/dx(f(x))$. Here, $c=3$ and $f(x)=x$.\n The derivative of $x$ is 1 (using the power rule with $n=1$: $1 \\cdot x^(1-1) = 1 \\cdot x^0 = 1$).\n CALCULATE: 3 * 1\n \u2192 Result: 3\n This gives us $3$.\n\n* **Step 5: Combine the derivatives of the terms.**\n Now we add the derivatives from Step 3 and Step 4.\n CALCULATE: 2*x + 3\n \u2192 Calculation error: Cannot evaluate: 2*x + 3\n So, the derivative of $x^2 + 3x$ is $2x + 3$.\n\n**3. Final Answer**\n\nThe derivative of $x^2 + 3x$ is $2x + 3$.",
|
| 157 |
+
"verification": {
|
| 158 |
+
"is_correct": true,
|
| 159 |
+
"confidence": 1.0,
|
| 160 |
+
"issues": [],
|
| 161 |
+
"needs_review": false,
|
| 162 |
+
"feedback": "The solution correctly applies the sum rule and the power rule for differentiation. The steps are logical and the calculations are accurate."
|
| 163 |
+
},
|
| 164 |
+
"user_feedback": "correct",
|
| 165 |
+
"context_used": {
|
| 166 |
+
"knowledge_base": [
|
| 167 |
+
{
|
| 168 |
+
"content": "- d/dx(sin x) = cos x\n- d/dx(cos x) = -sin x\n- d/dx(tan x) = sec\u00b2x\n\nOptimization:\n- Critical points: f'(x) = 0\n- Local max: f'(x) = 0 and f''(x) < 0\n- Local min: f'(x) = 0 and f''(x) > 0\n\nCommon Mistakes:\n- Forgetting chain rule\n- Sign errors in derivatives\n- Not checking endpoints in optimization",
|
| 169 |
+
"metadata": {
|
| 170 |
+
"source": "calculus.txt",
|
| 171 |
+
"topic": "calculus"
|
| 172 |
+
},
|
| 173 |
+
"distance": 0.4156036376953125
|
| 174 |
+
},
|
| 175 |
+
{
|
| 176 |
+
"content": "CALCULUS FORMULAS AND CONCEPTS\n\nLimits:\n- lim(x\u2192a) [f(x) + g(x)] = lim(x\u2192a) f(x) + lim(x\u2192a) g(x)\n- lim(x\u2192a) [f(x) \u00d7 g(x)] = lim(x\u2192a) f(x) \u00d7 lim(x\u2192a) g(x)\n- lim(x\u21920) (sin x)/x = 1\n- lim(x\u2192\u221e) (1 + 1/x)^x = e\n\nL'H\u00f4pital's Rule:\n- For 0/0 or \u221e/\u221e: lim f(x)/g(x) = lim f'(x)/g'(x)\n\nDerivatives:\n- Power rule: d/dx(x^n) = nx^(n-1)\n- Product rule: d/dx(uv) = u'v + uv'\n- Quotient rule: d/dx(u/v) = (u'v - uv')/v\u00b2\n- Chain rule: d/dx[f(g(x))] = f'(g(x)) \u00d7 g'(x)\n\nStandard Derivatives:\n- d/dx(e^x) = e^x\n- d/dx(ln x) = 1/x",
|
| 177 |
+
"metadata": {
|
| 178 |
+
"source": "calculus.txt",
|
| 179 |
+
"topic": "calculus"
|
| 180 |
+
},
|
| 181 |
+
"distance": 0.4319107234477997
|
| 182 |
+
}
|
| 183 |
+
],
|
| 184 |
+
"similar_problems": [],
|
| 185 |
+
"sources": [
|
| 186 |
+
{
|
| 187 |
+
"source": "calculus.txt",
|
| 188 |
+
"topic": "calculus"
|
| 189 |
+
},
|
| 190 |
+
{
|
| 191 |
+
"source": "calculus.txt",
|
| 192 |
+
"topic": "calculus"
|
| 193 |
+
}
|
| 194 |
+
]
|
| 195 |
+
},
|
| 196 |
+
"timestamp": "2026-01-18T22:25:06.247196",
|
| 197 |
+
"id": 3
|
| 198 |
+
},
|
| 199 |
+
{
|
| 200 |
+
"input_type": "Image",
|
| 201 |
+
"original_text": "x**2 +5x+1=0",
|
| 202 |
+
"parsed_question": {
|
| 203 |
+
"problem_text": "x^2 + 5x + 1 = 0",
|
| 204 |
+
"topic": "algebra",
|
| 205 |
+
"variables": [
|
| 206 |
+
"x"
|
| 207 |
+
],
|
| 208 |
+
"constraints": [],
|
| 209 |
+
"needs_clarification": false
|
| 210 |
+
},
|
| 211 |
+
"routing": {
|
| 212 |
+
"action": "solve",
|
| 213 |
+
"requires_hitl": false,
|
| 214 |
+
"strategy": "algebraic_manipulation",
|
| 215 |
+
"topic": "algebra"
|
| 216 |
+
},
|
| 217 |
+
"solution": "1. **Understanding the problem:**\nThe problem asks us to find the values of $x$ that satisfy the quadratic equation $x^2 + 5x + 1 = 0$. This equation is in the standard quadratic form $ax^2 + bx + c = 0$, where $a=1$, $b=5$, and $c=1$.\n\n2. **Step-by-step solution:**\nWe will use the quadratic formula to solve for $x$. The quadratic formula is given by $x = \\frac{-b \\pm \\sqrt{b^2 - 4ac}}{2a}$.\n\n**Step 1: Identify the coefficients a, b, and c.**\nFrom the equation $x^2 + 5x + 1 = 0$, we have:\n$a = 1$\n$b = 5$\n$c = 1$\n\n**Step 2: Calculate the discriminant ($\\Delta = b^2 - 4ac$).**\nThe discriminant will tell us the nature of the roots.\nCALCULATE: $5^2 - 4 * 1 * 1$\n \u2192 Calculation error: Cannot evaluate: $5**2 - 4 * 1 * 1$\nCALCULATE: $25 - 4$\n \u2192 Calculation error: Cannot evaluate: $25 - 4$\nCALCULATE: $21$\n \u2192 Calculation error: Cannot evaluate: $21$\nSince the discriminant is $21$, which is greater than 0, there will be two distinct real roots.\n\n**Step 3: Apply the quadratic formula.**\nNow, substitute the values of $a$, $b$, and the discriminant into the quadratic formula:\n$x = \\frac{-b \\pm \\sqrt{\\Delta}}{2a}$\n$x = \\frac{-5 \\pm \\sqrt{21}}{2 * 1}$\n$x = \\frac{-5 \\pm \\sqrt{21}}{2}$\n\n**Step 4: Write out the two distinct solutions.**\nThe two solutions are:\n$x_1 = \\frac{-5 + \\sqrt{21}}{2}$\n$x_2 = \\frac{-5 - \\sqrt{21}}{2}$\n\n3. **Final Answer:**\nThe solutions to the equation $x^2 + 5x + 1 = 0$ are $x = \\frac{-5 + \\sqrt{21}}{2}$ and $x = \\frac{-5 - \\sqrt{21}}{2}$.",
|
| 218 |
+
"verification": {
|
| 219 |
+
"is_correct": true,
|
| 220 |
+
"confidence": 1.0,
|
| 221 |
+
"issues": [],
|
| 222 |
+
"needs_review": false,
|
| 223 |
+
"feedback": "The solution correctly applies the quadratic formula to solve the given quadratic equation. The identification of coefficients, calculation of the discriminant, and substitution into the formula are all accurate. The final solutions are presented correctly."
|
| 224 |
+
},
|
| 225 |
+
"user_feedback": "correct",
|
| 226 |
+
"context_used": {
|
| 227 |
+
"knowledge_base": [
|
| 228 |
+
{
|
| 229 |
+
"content": "ALGEBRA FORMULAS AND CONCEPTS\n\nQuadratic Equations:\n- Standard form: ax\u00b2 + bx + c = 0\n- Quadratic formula: x = (-b \u00b1 \u221a(b\u00b2-4ac)) / 2a\n- Discriminant: D = b\u00b2 - 4ac\n - D > 0: two real roots\n - D = 0: one real root\n - D < 0: complex roots\n- Sum of roots: -b/a\n- Product of roots: c/a\n\nPolynomial Identities:\n- (a+b)\u00b2 = a\u00b2 + 2ab + b\u00b2\n- (a-b)\u00b2 = a\u00b2 - 2ab + b\u00b2\n- a\u00b2 - b\u00b2 = (a+b)(a-b)\n- (a+b)\u00b3 = a\u00b3 + 3a\u00b2b + 3ab\u00b2 + b\u00b3\n- (a-b)\u00b3 = a\u00b3 - 3a\u00b2b + 3ab\u00b2 - b\u00b3\n- a\u00b3 + b\u00b3 = (a+b)(a\u00b2 - ab + b\u00b2)\n- a\u00b3 - b\u00b3 = (a-b)(a\u00b2 + ab + b\u00b2)\n",
|
| 230 |
+
"metadata": {
|
| 231 |
+
"source": "algebra.txt",
|
| 232 |
+
"topic": "algebra"
|
| 233 |
+
},
|
| 234 |
+
"distance": 0.38496166467666626
|
| 235 |
+
},
|
| 236 |
+
{
|
| 237 |
+
"content": "Inequalities:\n- |x| < a \u27f9 -a < x < a\n- |x| > a \u27f9 x < -a or x > a\n- AM \u2265 GM: (a+b)/2 \u2265 \u221a(ab) for a,b > 0\n\nCommon Mistakes:\n- Don't forget to check domain restrictions\n- Remember to verify solutions in original equation\n- Check for extraneous solutions after squaring",
|
| 238 |
+
"metadata": {
|
| 239 |
+
"source": "algebra.txt",
|
| 240 |
+
"topic": "algebra"
|
| 241 |
+
},
|
| 242 |
+
"distance": 0.43445509672164917
|
| 243 |
+
}
|
| 244 |
+
],
|
| 245 |
+
"similar_problems": [
|
| 246 |
+
{
|
| 247 |
+
"input_type": "Image",
|
| 248 |
+
"original_text": "x**2 +5x+1=0",
|
| 249 |
+
"parsed_question": {
|
| 250 |
+
"problem_text": "x^2 + 5x + 1 = 0",
|
| 251 |
+
"topic": "algebra",
|
| 252 |
+
"variables": [
|
| 253 |
+
"x"
|
| 254 |
+
],
|
| 255 |
+
"constraints": [],
|
| 256 |
+
"needs_clarification": false
|
| 257 |
+
},
|
| 258 |
+
"routing": {
|
| 259 |
+
"action": "solve",
|
| 260 |
+
"requires_hitl": false,
|
| 261 |
+
"strategy": "algebraic_manipulation",
|
| 262 |
+
"topic": "algebra"
|
| 263 |
+
},
|
| 264 |
+
"solution": "1. **Understanding the problem:**\nThe problem asks us to solve the quadratic equation $x^2 + 5x + 1 = 0$ for the variable $x$. This is an equation of the form $ax^2 + bx + c = 0$.\n\n2. **Step-by-step solution:**\nWe will use the quadratic formula to solve this equation, as it is a standard method for finding the roots of quadratic equations. The quadratic formula is given by:\n$x = \\frac{-b \\pm \\sqrt{b^2 - 4ac}}{2a}$\n\nIn our equation, $x^2 + 5x + 1 = 0$, we have:\n$a = 1$\n$b = 5$\n$c = 1$\n\nFirst, let's calculate the discriminant, $D = b^2 - 4ac$:\nCALCULATE: $5^2 - 4 * 1 * 1$\n \u2192 Calculation error: Cannot evaluate: $5**2 - 4 * 1 * 1$\n$D = 25 - 4$\n$D = 21$\n\nSince the discriminant $D = 21$ is greater than 0, there will be two distinct real roots.\n\nNow, we can substitute the values of $a$, $b$, and $D$ into the quadratic formula:\n$x = \\frac{-5 \\pm \\sqrt{21}}{2 * 1}$\n$x = \\frac{-5 \\pm \\sqrt{21}}{2}$\n\nThis gives us two possible solutions for $x$:\n\nSolution 1:\n$x_1 = \\frac{-5 + \\sqrt{21}}{2}$\n\nSolution 2:\n$x_2 = \\frac{-5 - \\sqrt{21}}{2}$\n\n3. **Final Answer:**\nThe solutions to the equation $x^2 + 5x + 1 = 0$ are $x = \\frac{-5 + \\sqrt{21}}{2}$ and $x = \\frac{-5 - \\sqrt{21}}{2}$.",
|
| 265 |
+
"verification": {
|
| 266 |
+
"is_correct": true,
|
| 267 |
+
"confidence": 1.0,
|
| 268 |
+
"issues": [],
|
| 269 |
+
"needs_review": false,
|
| 270 |
+
"feedback": "The solution correctly applies the quadratic formula to solve the given quadratic equation. The calculations for the discriminant and the roots are accurate. The logical flow is clear and easy to follow."
|
| 271 |
+
},
|
| 272 |
+
"user_feedback": "correct",
|
| 273 |
+
"context_used": {
|
| 274 |
+
"knowledge_base": [
|
| 275 |
+
{
|
| 276 |
+
"content": "ALGEBRA FORMULAS AND CONCEPTS\n\nQuadratic Equations:\n- Standard form: ax\u00b2 + bx + c = 0\n- Quadratic formula: x = (-b \u00b1 \u221a(b\u00b2-4ac)) / 2a\n- Discriminant: D = b\u00b2 - 4ac\n - D > 0: two real roots\n - D = 0: one real root\n - D < 0: complex roots\n- Sum of roots: -b/a\n- Product of roots: c/a\n\nPolynomial Identities:\n- (a+b)\u00b2 = a\u00b2 + 2ab + b\u00b2\n- (a-b)\u00b2 = a\u00b2 - 2ab + b\u00b2\n- a\u00b2 - b\u00b2 = (a+b)(a-b)\n- (a+b)\u00b3 = a\u00b3 + 3a\u00b2b + 3ab\u00b2 + b\u00b3\n- (a-b)\u00b3 = a\u00b3 - 3a\u00b2b + 3ab\u00b2 - b\u00b3\n- a\u00b3 + b\u00b3 = (a+b)(a\u00b2 - ab + b\u00b2)\n- a\u00b3 - b\u00b3 = (a-b)(a\u00b2 + ab + b\u00b2)\n",
|
| 277 |
+
"metadata": {
|
| 278 |
+
"source": "algebra.txt",
|
| 279 |
+
"topic": "algebra"
|
| 280 |
+
},
|
| 281 |
+
"distance": 0.3847310543060303
|
| 282 |
+
},
|
| 283 |
+
{
|
| 284 |
+
"content": "Inequalities:\n- |x| < a \u27f9 -a < x < a\n- |x| > a \u27f9 x < -a or x > a\n- AM \u2265 GM: (a+b)/2 \u2265 \u221a(ab) for a,b > 0\n\nCommon Mistakes:\n- Don't forget to check domain restrictions\n- Remember to verify solutions in original equation\n- Check for extraneous solutions after squaring",
|
| 285 |
+
"metadata": {
|
| 286 |
+
"source": "algebra.txt",
|
| 287 |
+
"topic": "algebra"
|
| 288 |
+
},
|
| 289 |
+
"distance": 0.43384620547294617
|
| 290 |
+
}
|
| 291 |
+
],
|
| 292 |
+
"similar_problems": [
|
| 293 |
+
{
|
| 294 |
+
"input_type": "Text",
|
| 295 |
+
"original_text": "3x+7=22",
|
| 296 |
+
"parsed_question": {
|
| 297 |
+
"problem_text": "3x + 7 = 22",
|
| 298 |
+
"topic": "algebra",
|
| 299 |
+
"variables": [
|
| 300 |
+
"x"
|
| 301 |
+
],
|
| 302 |
+
"constraints": [],
|
| 303 |
+
"needs_clarification": false
|
| 304 |
+
},
|
| 305 |
+
"solution": "1. **Understanding the problem:**\nThe problem is to find the value of the variable 'x' that makes the equation \"3x + 7 = 22\" true. This is a linear equation with one variable, and we need to isolate 'x' to find its value.\n\n2. **Step-by-step solution:**\nWe will use algebraic manipulation to solve for 'x'. The goal is to get 'x' by itself on one side of the equation.\n\n* **Step 1: Subtract 7 from both sides of the equation.**\n The equation is: 3x + 7 = 22\n To isolate the term with 'x' (which is 3x), we need to remove the '+7'. We do this by performing the inverse operation, which is subtraction. We must subtract 7 from both sides of the equation to maintain equality.\n 3x + 7 - 7 = 22 - 7\n This simplifies to:\n 3x = 15\n\n* **Step 2: Divide both sides of the equation by 3.**\n Now we have: 3x = 15\n To isolate 'x', we need to undo the multiplication by 3. We do this by performing the inverse operation, which is division. We must divide both sides of the equation by 3.\n 3x / 3 = 15 / 3\n This simplifies to:\n x = 5\n\n3. **Final Answer:**\nThe value of 'x' that satisfies the equation 3x + 7 = 22 is 5.\n\nWe can check our answer by substituting x = 5 back into the original equation:\n3(5) + 7 = 15 + 7 = 22.\nSince 22 = 22, our solution is correct.",
|
| 306 |
+
"verification": {
|
| 307 |
+
"is_correct": true,
|
| 308 |
+
"confidence": 1.0,
|
| 309 |
+
"issues": [],
|
| 310 |
+
"needs_review": false,
|
| 311 |
+
"feedback": "The solution correctly identifies the steps required to solve the linear equation and accurately performs the algebraic manipulations. The verification step confirms the correctness of the solution."
|
| 312 |
+
},
|
| 313 |
+
"user_feedback": "correct",
|
| 314 |
+
"timestamp": "2026-01-17T18:35:20.965944",
|
| 315 |
+
"id": 0,
|
| 316 |
+
"similarity": 0.3333333333333333
|
| 317 |
+
}
|
| 318 |
+
],
|
| 319 |
+
"sources": [
|
| 320 |
+
{
|
| 321 |
+
"source": "algebra.txt",
|
| 322 |
+
"topic": "algebra"
|
| 323 |
+
},
|
| 324 |
+
{
|
| 325 |
+
"source": "algebra.txt",
|
| 326 |
+
"topic": "algebra"
|
| 327 |
+
}
|
| 328 |
+
]
|
| 329 |
+
},
|
| 330 |
+
"timestamp": "2026-01-18T17:59:00.228068",
|
| 331 |
+
"id": 2,
|
| 332 |
+
"similarity": 1.0
|
| 333 |
+
},
|
| 334 |
+
{
|
| 335 |
+
"input_type": "Text",
|
| 336 |
+
"original_text": "3x+7=22",
|
| 337 |
+
"parsed_question": {
|
| 338 |
+
"problem_text": "3x + 7 = 22",
|
| 339 |
+
"topic": "algebra",
|
| 340 |
+
"variables": [
|
| 341 |
+
"x"
|
| 342 |
+
],
|
| 343 |
+
"constraints": [],
|
| 344 |
+
"needs_clarification": false
|
| 345 |
+
},
|
| 346 |
+
"solution": "1. **Understanding the problem:**\nThe problem is to find the value of the variable 'x' that makes the equation \"3x + 7 = 22\" true. This is a linear equation with one variable, and we need to isolate 'x' to find its value.\n\n2. **Step-by-step solution:**\nWe will use algebraic manipulation to solve for 'x'. The goal is to get 'x' by itself on one side of the equation.\n\n* **Step 1: Subtract 7 from both sides of the equation.**\n The equation is: 3x + 7 = 22\n To isolate the term with 'x' (which is 3x), we need to remove the '+7'. We do this by performing the inverse operation, which is subtraction. We must subtract 7 from both sides of the equation to maintain equality.\n 3x + 7 - 7 = 22 - 7\n This simplifies to:\n 3x = 15\n\n* **Step 2: Divide both sides of the equation by 3.**\n Now we have: 3x = 15\n To isolate 'x', we need to undo the multiplication by 3. We do this by performing the inverse operation, which is division. We must divide both sides of the equation by 3.\n 3x / 3 = 15 / 3\n This simplifies to:\n x = 5\n\n3. **Final Answer:**\nThe value of 'x' that satisfies the equation 3x + 7 = 22 is 5.\n\nWe can check our answer by substituting x = 5 back into the original equation:\n3(5) + 7 = 15 + 7 = 22.\nSince 22 = 22, our solution is correct.",
|
| 347 |
+
"verification": {
|
| 348 |
+
"is_correct": true,
|
| 349 |
+
"confidence": 1.0,
|
| 350 |
+
"issues": [],
|
| 351 |
+
"needs_review": false,
|
| 352 |
+
"feedback": "The solution correctly identifies the steps required to solve the linear equation and accurately performs the algebraic manipulations. The verification step confirms the correctness of the solution."
|
| 353 |
+
},
|
| 354 |
+
"user_feedback": "correct",
|
| 355 |
+
"timestamp": "2026-01-17T18:35:20.965944",
|
| 356 |
+
"id": 0,
|
| 357 |
+
"similarity": 0.3333333333333333
|
| 358 |
+
}
|
| 359 |
+
],
|
| 360 |
+
"sources": [
|
| 361 |
+
{
|
| 362 |
+
"source": "algebra.txt",
|
| 363 |
+
"topic": "algebra"
|
| 364 |
+
},
|
| 365 |
+
{
|
| 366 |
+
"source": "algebra.txt",
|
| 367 |
+
"topic": "algebra"
|
| 368 |
+
}
|
| 369 |
+
]
|
| 370 |
+
},
|
| 371 |
+
"timestamp": "2026-01-18T22:27:29.771395",
|
| 372 |
+
"id": 4
|
| 373 |
+
}
|
| 374 |
+
]
|
knowledge/docs/algebra.txt
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
ALGEBRA FORMULAS AND CONCEPTS
|
| 2 |
+
|
| 3 |
+
Quadratic Equations:
|
| 4 |
+
- Standard form: ax² + bx + c = 0
|
| 5 |
+
- Quadratic formula: x = (-b ± √(b²-4ac)) / 2a
|
| 6 |
+
- Discriminant: D = b² - 4ac
|
| 7 |
+
- D > 0: two real roots
|
| 8 |
+
- D = 0: one real root
|
| 9 |
+
- D < 0: complex roots
|
| 10 |
+
- Sum of roots: -b/a
|
| 11 |
+
- Product of roots: c/a
|
| 12 |
+
|
| 13 |
+
Polynomial Identities:
|
| 14 |
+
- (a+b)² = a² + 2ab + b²
|
| 15 |
+
- (a-b)² = a² - 2ab + b²
|
| 16 |
+
- a² - b² = (a+b)(a-b)
|
| 17 |
+
- (a+b)³ = a³ + 3a²b + 3ab² + b³
|
| 18 |
+
- (a-b)³ = a³ - 3a²b + 3ab² - b³
|
| 19 |
+
- a³ + b³ = (a+b)(a² - ab + b²)
|
| 20 |
+
- a³ - b³ = (a-b)(a² + ab + b²)
|
| 21 |
+
|
| 22 |
+
Inequalities:
|
| 23 |
+
- |x| < a ⟹ -a < x < a
|
| 24 |
+
- |x| > a ⟹ x < -a or x > a
|
| 25 |
+
- AM ≥ GM: (a+b)/2 ≥ √(ab) for a,b > 0
|
| 26 |
+
|
| 27 |
+
Common Mistakes:
|
| 28 |
+
- Don't forget to check domain restrictions
|
| 29 |
+
- Remember to verify solutions in original equation
|
| 30 |
+
- Check for extraneous solutions after squaring
|
knowledge/docs/calculus.txt
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
CALCULUS FORMULAS AND CONCEPTS
|
| 2 |
+
|
| 3 |
+
Limits:
|
| 4 |
+
- lim(x→a) [f(x) + g(x)] = lim(x→a) f(x) + lim(x→a) g(x)
|
| 5 |
+
- lim(x→a) [f(x) × g(x)] = lim(x→a) f(x) × lim(x→a) g(x)
|
| 6 |
+
- lim(x→0) (sin x)/x = 1
|
| 7 |
+
- lim(x→∞) (1 + 1/x)^x = e
|
| 8 |
+
|
| 9 |
+
L'Hôpital's Rule:
|
| 10 |
+
- For 0/0 or ∞/∞: lim f(x)/g(x) = lim f'(x)/g'(x)
|
| 11 |
+
|
| 12 |
+
Derivatives:
|
| 13 |
+
- Power rule: d/dx(x^n) = nx^(n-1)
|
| 14 |
+
- Product rule: d/dx(uv) = u'v + uv'
|
| 15 |
+
- Quotient rule: d/dx(u/v) = (u'v - uv')/v²
|
| 16 |
+
- Chain rule: d/dx[f(g(x))] = f'(g(x)) × g'(x)
|
| 17 |
+
|
| 18 |
+
Standard Derivatives:
|
| 19 |
+
- d/dx(e^x) = e^x
|
| 20 |
+
- d/dx(ln x) = 1/x
|
| 21 |
+
- d/dx(sin x) = cos x
|
| 22 |
+
- d/dx(cos x) = -sin x
|
| 23 |
+
- d/dx(tan x) = sec²x
|
| 24 |
+
|
| 25 |
+
Optimization:
|
| 26 |
+
- Critical points: f'(x) = 0
|
| 27 |
+
- Local max: f'(x) = 0 and f''(x) < 0
|
| 28 |
+
- Local min: f'(x) = 0 and f''(x) > 0
|
| 29 |
+
|
| 30 |
+
Common Mistakes:
|
| 31 |
+
- Forgetting chain rule
|
| 32 |
+
- Sign errors in derivatives
|
| 33 |
+
- Not checking endpoints in optimization
|
knowledge/docs/linear_algebra.txt
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
LINEAR ALGEBRA FORMULAS AND CONCEPTS
|
| 2 |
+
|
| 3 |
+
Matrices:
|
| 4 |
+
- Addition: [A + B]ᵢⱼ = aᵢⱼ + bᵢⱼ
|
| 5 |
+
- Scalar multiplication: [kA]ᵢⱼ = k × aᵢⱼ
|
| 6 |
+
- Matrix multiplication: [AB]ᵢⱼ = Σₖ aᵢₖbₖⱼ
|
| 7 |
+
- Transpose: [Aᵀ]ᵢⱼ = aⱼᵢ
|
| 8 |
+
|
| 9 |
+
Properties:
|
| 10 |
+
- (AB)ᵀ = BᵀAᵀ
|
| 11 |
+
- (AB)C = A(BC)
|
| 12 |
+
- A(B+C) = AB + AC
|
| 13 |
+
- AB ≠ BA in general
|
| 14 |
+
|
| 15 |
+
Determinants:
|
| 16 |
+
- 2×2: det|a b; c d| = ad - bc
|
| 17 |
+
- 3×3: Use cofactor expansion
|
| 18 |
+
- det(AB) = det(A) × det(B)
|
| 19 |
+
- det(Aᵀ) = det(A)
|
| 20 |
+
|
| 21 |
+
Inverse:
|
| 22 |
+
- AA⁻¹ = A⁻¹A = I
|
| 23 |
+
- (AB)⁻¹ = B⁻¹A⁻¹
|
| 24 |
+
- (Aᵀ)⁻¹ = (A⁻¹)ᵀ
|
| 25 |
+
- A⁻¹ exists ⟺ det(A) ≠ 0
|
| 26 |
+
|
| 27 |
+
System of Equations:
|
| 28 |
+
- AX = B has unique solution if det(A) ≠ 0
|
| 29 |
+
- X = A⁻¹B
|
| 30 |
+
- Cramer's rule: xᵢ = det(Aᵢ)/det(A)
|
| 31 |
+
|
| 32 |
+
Common Mistakes:
|
| 33 |
+
- Matrix multiplication is not commutative
|
| 34 |
+
- Check dimensions before multiplying
|
| 35 |
+
- Determinant of product ≠ product of determinants
|
knowledge/docs/probability.txt
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
PROBABILITY FORMULAS AND CONCEPTS
|
| 2 |
+
|
| 3 |
+
Basic Probability:
|
| 4 |
+
- P(A) = favorable outcomes / total outcomes
|
| 5 |
+
- 0 ≤ P(A) ≤ 1
|
| 6 |
+
- P(A') = 1 - P(A)
|
| 7 |
+
|
| 8 |
+
Addition Rules:
|
| 9 |
+
- P(A ∪ B) = P(A) + P(B) - P(A ∩ B)
|
| 10 |
+
- For mutually exclusive: P(A ∪ B) = P(A) + P(B)
|
| 11 |
+
|
| 12 |
+
Multiplication Rules:
|
| 13 |
+
- P(A ∩ B) = P(A) × P(B|A)
|
| 14 |
+
- For independent: P(A ∩ B) = P(A) × P(B)
|
| 15 |
+
|
| 16 |
+
Conditional Probability:
|
| 17 |
+
- P(A|B) = P(A ∩ B) / P(B)
|
| 18 |
+
- Bayes' Theorem: P(A|B) = P(B|A) × P(A) / P(B)
|
| 19 |
+
|
| 20 |
+
Combinatorics:
|
| 21 |
+
- Permutation: nPr = n! / (n-r)!
|
| 22 |
+
- Combination: nCr = n! / (r!(n-r)!)
|
| 23 |
+
- Circular permutation: (n-1)!
|
| 24 |
+
|
| 25 |
+
Distributions:
|
| 26 |
+
- Binomial: P(X=k) = nCk × p^k × (1-p)^(n-k)
|
| 27 |
+
- Expected value: E(X) = Σ x × P(x)
|
| 28 |
+
- Variance: Var(X) = E(X²) - [E(X)]²
|
| 29 |
+
|
| 30 |
+
Common Mistakes:
|
| 31 |
+
- Confusing P(A|B) with P(B|A)
|
| 32 |
+
- Forgetting to account for replacement
|
| 33 |
+
- Not checking if events are independent
|
rag/__init__.py
ADDED
|
File without changes
|
rag/knowledge_base.py
ADDED
|
@@ -0,0 +1,104 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from typing import List, Dict
|
| 3 |
+
import openai
|
| 4 |
+
import chromadb
|
| 5 |
+
from chromadb.config import Settings
|
| 6 |
+
from chromadb import EmbeddingFunction
|
| 7 |
+
|
| 8 |
+
class OpenAIEmbedder(EmbeddingFunction):
|
| 9 |
+
def __init__(self):
|
| 10 |
+
openai.api_key = os.getenv('OPENAI_API_KEY')
|
| 11 |
+
|
| 12 |
+
def __call__(self, input: List[str]) -> List[List[float]]:
|
| 13 |
+
response = openai.Embedding.create(input=input, model="text-embedding-ada-002")
|
| 14 |
+
return [item['embedding'] for item in response['data']]
|
| 15 |
+
|
| 16 |
+
class KnowledgeBase:
|
| 17 |
+
def __init__(self, knowledge_dir='knowledge/docs'):
|
| 18 |
+
self.knowledge_dir = knowledge_dir
|
| 19 |
+
self.embedder = OpenAIEmbedder()
|
| 20 |
+
|
| 21 |
+
self.client = chromadb.Client(Settings(
|
| 22 |
+
anonymized_telemetry=False,
|
| 23 |
+
allow_reset=True
|
| 24 |
+
))
|
| 25 |
+
|
| 26 |
+
# Always reset collection to ensure clean state with correct embeddings
|
| 27 |
+
try:
|
| 28 |
+
self.client.delete_collection('math_knowledge')
|
| 29 |
+
except:
|
| 30 |
+
pass
|
| 31 |
+
|
| 32 |
+
self.collection = self.client.create_collection('math_knowledge', embedding_function=self.embedder)
|
| 33 |
+
self._load_documents()
|
| 34 |
+
|
| 35 |
+
def _load_documents(self):
|
| 36 |
+
docs = []
|
| 37 |
+
metadatas = []
|
| 38 |
+
ids = []
|
| 39 |
+
|
| 40 |
+
for filename in os.listdir(self.knowledge_dir):
|
| 41 |
+
if filename.endswith('.txt'):
|
| 42 |
+
filepath = os.path.join(self.knowledge_dir, filename)
|
| 43 |
+
topic = filename.replace('.txt', '')
|
| 44 |
+
|
| 45 |
+
with open(filepath, 'r', encoding='utf-8') as f:
|
| 46 |
+
content = f.read()
|
| 47 |
+
|
| 48 |
+
chunks = self._chunk_document(content)
|
| 49 |
+
|
| 50 |
+
for i, chunk in enumerate(chunks):
|
| 51 |
+
docs.append(chunk)
|
| 52 |
+
metadatas.append({
|
| 53 |
+
'topic': topic,
|
| 54 |
+
'source': filename
|
| 55 |
+
})
|
| 56 |
+
ids.append(f"{topic}_{i}")
|
| 57 |
+
|
| 58 |
+
if docs:
|
| 59 |
+
self.collection.add(
|
| 60 |
+
documents=docs,
|
| 61 |
+
metadatas=metadatas,
|
| 62 |
+
ids=ids
|
| 63 |
+
)
|
| 64 |
+
|
| 65 |
+
def _chunk_document(self, content: str, chunk_size: int = 500) -> List[str]:
|
| 66 |
+
lines = content.split('\n')
|
| 67 |
+
chunks = []
|
| 68 |
+
current_chunk = []
|
| 69 |
+
current_size = 0
|
| 70 |
+
|
| 71 |
+
for line in lines:
|
| 72 |
+
line_size = len(line)
|
| 73 |
+
if current_size + line_size > chunk_size and current_chunk:
|
| 74 |
+
chunks.append('\n'.join(current_chunk))
|
| 75 |
+
current_chunk = [line]
|
| 76 |
+
current_size = line_size
|
| 77 |
+
else:
|
| 78 |
+
current_chunk.append(line)
|
| 79 |
+
current_size += line_size
|
| 80 |
+
|
| 81 |
+
if current_chunk:
|
| 82 |
+
chunks.append('\n'.join(current_chunk))
|
| 83 |
+
|
| 84 |
+
return chunks
|
| 85 |
+
|
| 86 |
+
def search(self, query: str, topic: str = None, k: int = 3) -> List[Dict]:
|
| 87 |
+
where = {"topic": topic} if topic else None
|
| 88 |
+
|
| 89 |
+
results = self.collection.query(
|
| 90 |
+
query_texts=[query],
|
| 91 |
+
n_results=k,
|
| 92 |
+
where=where
|
| 93 |
+
)
|
| 94 |
+
|
| 95 |
+
retrieved = []
|
| 96 |
+
if results['documents']:
|
| 97 |
+
for i, doc in enumerate(results['documents'][0]):
|
| 98 |
+
retrieved.append({
|
| 99 |
+
'content': doc,
|
| 100 |
+
'metadata': results['metadatas'][0][i],
|
| 101 |
+
'distance': results['distances'][0][i] if 'distances' in results else 0
|
| 102 |
+
})
|
| 103 |
+
|
| 104 |
+
return retrieved
|
rag/retriever.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import List, Dict
|
| 2 |
+
from rag.knowledge_base import KnowledgeBase
|
| 3 |
+
from utils.memory import MemorySystem
|
| 4 |
+
|
| 5 |
+
class Retriever:
|
| 6 |
+
def __init__(self):
|
| 7 |
+
self.kb = KnowledgeBase()
|
| 8 |
+
self.memory = MemorySystem()
|
| 9 |
+
|
| 10 |
+
def retrieve_context(self, problem: Dict, k: int = 3) -> Dict:
|
| 11 |
+
problem_text = problem.get('problem_text', '')
|
| 12 |
+
topic = problem.get('topic', '')
|
| 13 |
+
|
| 14 |
+
kb_results = self.kb.search(problem_text, topic, k=k)
|
| 15 |
+
|
| 16 |
+
similar_problems = self.memory.search_similar(problem_text, topic, limit=2)
|
| 17 |
+
|
| 18 |
+
context = {
|
| 19 |
+
'knowledge_base': kb_results,
|
| 20 |
+
'similar_problems': similar_problems,
|
| 21 |
+
'sources': [r['metadata'] for r in kb_results]
|
| 22 |
+
}
|
| 23 |
+
|
| 24 |
+
return context
|
requirements.txt
CHANGED
|
@@ -1,3 +1,19 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
streamlit==1.31.0
|
| 2 |
+
google-generativeai==0.8.3
|
| 3 |
+
openai==1.12.0
|
| 4 |
+
langchain==0.1.9
|
| 5 |
+
langchain-community==0.0.24
|
| 6 |
+
chromadb==0.4.22
|
| 7 |
+
Pillow==10.2.0
|
| 8 |
+
easyocr==1.7.0
|
| 9 |
+
openai-whisper==20231117
|
| 10 |
+
torch==2.2.0
|
| 11 |
+
torchvision==0.17.0
|
| 12 |
+
torchaudio==2.2.0
|
| 13 |
+
python-dotenv==1.0.1
|
| 14 |
+
pydantic==2.6.1
|
| 15 |
+
numpy==1.26.4
|
| 16 |
+
pandas==2.2.0
|
| 17 |
+
faiss-cpu==1.7.4
|
| 18 |
+
opencv-python-headless==4.8.1.78
|
| 19 |
+
soundfile==0.12.1
|
streamlit_app.py
ADDED
|
@@ -0,0 +1,364 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
import streamlit as st
|
| 7 |
+
from PIL import Image
|
| 8 |
+
import os
|
| 9 |
+
from dotenv import load_dotenv
|
| 10 |
+
|
| 11 |
+
# Compatibility fix for Pillow 10.0.0+ where ANTIALIAS was removed
|
| 12 |
+
if not hasattr(Image, 'ANTIALIAS'):
|
| 13 |
+
Image.ANTIALIAS = Image.LANCZOS
|
| 14 |
+
|
| 15 |
+
from utils.ocr import OCRProcessor
|
| 16 |
+
from utils.audio import AudioProcessor
|
| 17 |
+
from utils.memory import MemorySystem
|
| 18 |
+
from utils.hitl import HITLSystem
|
| 19 |
+
from agents.parser import ParserAgent
|
| 20 |
+
from agents.router import RouterAgent
|
| 21 |
+
from agents.solver import SolverAgent
|
| 22 |
+
from agents.verifier import VerifierAgent
|
| 23 |
+
from agents.explainer import ExplainerAgent
|
| 24 |
+
from rag.retriever import Retriever
|
| 25 |
+
|
| 26 |
+
load_dotenv()
|
| 27 |
+
|
| 28 |
+
st.set_page_config(page_title="Math Mentor", page_icon="📐", layout="wide")
|
| 29 |
+
|
| 30 |
+
if 'memory' not in st.session_state:
|
| 31 |
+
st.session_state.memory = MemorySystem()
|
| 32 |
+
if 'hitl' not in st.session_state:
|
| 33 |
+
st.session_state.hitl = HITLSystem()
|
| 34 |
+
if 'ocr' not in st.session_state:
|
| 35 |
+
st.session_state.ocr = OCRProcessor()
|
| 36 |
+
if 'audio' not in st.session_state:
|
| 37 |
+
st.session_state.audio = AudioProcessor()
|
| 38 |
+
if 'parser' not in st.session_state:
|
| 39 |
+
st.session_state.parser = ParserAgent()
|
| 40 |
+
if 'router' not in st.session_state:
|
| 41 |
+
st.session_state.router = RouterAgent()
|
| 42 |
+
if 'solver' not in st.session_state:
|
| 43 |
+
st.session_state.solver = SolverAgent()
|
| 44 |
+
if 'verifier' not in st.session_state:
|
| 45 |
+
st.session_state.verifier = VerifierAgent()
|
| 46 |
+
if 'explainer' not in st.session_state:
|
| 47 |
+
st.session_state.explainer = ExplainerAgent()
|
| 48 |
+
if 'retriever' not in st.session_state:
|
| 49 |
+
st.session_state.retriever = Retriever()
|
| 50 |
+
|
| 51 |
+
st.title("📐 Math Mentor - AI Problem Solver")
|
| 52 |
+
st.markdown("Upload an image, record audio, or type your math problem")
|
| 53 |
+
|
| 54 |
+
col1, col2 = st.columns([2, 1])
|
| 55 |
+
|
| 56 |
+
with col1:
|
| 57 |
+
input_mode = st.radio("Input Mode", ["Text", "Image", "Audio"], horizontal=True)
|
| 58 |
+
|
| 59 |
+
extracted_text = ""
|
| 60 |
+
needs_review = False
|
| 61 |
+
confidence = 1.0
|
| 62 |
+
ocr_confidence = 1.0
|
| 63 |
+
audio_confidence = 1.0
|
| 64 |
+
|
| 65 |
+
if input_mode == "Text":
|
| 66 |
+
extracted_text = st.text_area("Extracted Text (edit if needed):", value=extracted_text, height=150, key="edited_text")
|
| 67 |
+
|
| 68 |
+
elif input_mode == "Image":
|
| 69 |
+
uploaded_file = st.file_uploader("Upload image", type=['png', 'jpg', 'jpeg'])
|
| 70 |
+
if uploaded_file:
|
| 71 |
+
image = Image.open(uploaded_file)
|
| 72 |
+
st.image(image, caption="Uploaded Image", use_column_width=True)
|
| 73 |
+
|
| 74 |
+
with st.spinner("Extracting text from image..."):
|
| 75 |
+
result = st.session_state.ocr.extract_text(image)
|
| 76 |
+
extracted_text = result['text']
|
| 77 |
+
ocr_confidence = result['confidence']
|
| 78 |
+
needs_review = result['needs_review']
|
| 79 |
+
|
| 80 |
+
col_conf1, col_conf2 = st.columns(2)
|
| 81 |
+
with col_conf1:
|
| 82 |
+
st.metric("OCR Confidence", f"{ocr_confidence:.2%}")
|
| 83 |
+
with col_conf2:
|
| 84 |
+
if needs_review:
|
| 85 |
+
st.error("⚠️ Low Confidence")
|
| 86 |
+
else:
|
| 87 |
+
st.success("✅ High Confidence")
|
| 88 |
+
|
| 89 |
+
extracted_text = st.text_area("Extracted Text (edit if needed):", value=extracted_text, height=150)
|
| 90 |
+
|
| 91 |
+
elif input_mode == "Audio":
|
| 92 |
+
audio_file = st.file_uploader("Upload audio file", type=['wav', 'mp3', 'm4a'])
|
| 93 |
+
if audio_file:
|
| 94 |
+
st.audio(audio_file)
|
| 95 |
+
|
| 96 |
+
with st.spinner("Transcribing audio..."):
|
| 97 |
+
result = st.session_state.audio.transcribe(audio_file)
|
| 98 |
+
extracted_text = result['text']
|
| 99 |
+
audio_confidence = result['confidence']
|
| 100 |
+
needs_review = result['needs_review']
|
| 101 |
+
|
| 102 |
+
col_conf1, col_conf2 = st.columns(2)
|
| 103 |
+
with col_conf1:
|
| 104 |
+
st.metric("Transcription Confidence", f"{audio_confidence:.2%}")
|
| 105 |
+
with col_conf2:
|
| 106 |
+
if needs_review:
|
| 107 |
+
st.error("⚠️ Low Confidence")
|
| 108 |
+
else:
|
| 109 |
+
st.success("✅ High Confidence")
|
| 110 |
+
|
| 111 |
+
extracted_text = st.text_area("Transcription (edit if needed):", value=extracted_text, height=150)
|
| 112 |
+
|
| 113 |
+
col_btn1, col_btn2 = st.columns(2)
|
| 114 |
+
with col_btn1:
|
| 115 |
+
solve_button = st.button("🚀 Solve Problem", type="primary", disabled=not extracted_text, use_container_width=True)
|
| 116 |
+
with col_btn2:
|
| 117 |
+
recheck_button = st.button("🔍 Request Re-check", disabled=not extracted_text, use_container_width=True)
|
| 118 |
+
|
| 119 |
+
with col2:
|
| 120 |
+
st.subheader("🔄 Agent Trace")
|
| 121 |
+
trace_container = st.container()
|
| 122 |
+
|
| 123 |
+
if solve_button and extracted_text:
|
| 124 |
+
final_text = st.session_state.get("edited_text", extracted_text)
|
| 125 |
+
|
| 126 |
+
# Then use final_text for parsing
|
| 127 |
+
parsed = st.session_state.parser.parse(final_text, input_mode.lower())
|
| 128 |
+
extracted_text = st.session_state.memory.apply_learned_corrections(extracted_text, input_mode.lower())
|
| 129 |
+
|
| 130 |
+
with trace_container:
|
| 131 |
+
trace = []
|
| 132 |
+
|
| 133 |
+
st.write("🔍 **Parser Agent**: Analyzing problem...")
|
| 134 |
+
parsed = st.session_state.parser.parse(extracted_text, input_mode.lower())
|
| 135 |
+
trace.append({"agent": "Parser", "output": parsed})
|
| 136 |
+
with st.expander("Parser Output", expanded=False):
|
| 137 |
+
st.json(parsed)
|
| 138 |
+
|
| 139 |
+
if input_mode == "Image" and extracted_text != result['text']:
|
| 140 |
+
ocr_confidence = 1.0
|
| 141 |
+
if input_mode == "Audio" and extracted_text != result['text']:
|
| 142 |
+
audio_confidence = 1.0
|
| 143 |
+
|
| 144 |
+
hitl_check = st.session_state.hitl.should_trigger_hitl(
|
| 145 |
+
ocr_confidence=ocr_confidence,
|
| 146 |
+
audio_confidence=audio_confidence,
|
| 147 |
+
parser_needs_clarification=parsed.get('needs_clarification', False),
|
| 148 |
+
explicit_request=False
|
| 149 |
+
)
|
| 150 |
+
|
| 151 |
+
if hitl_check['should_trigger']:
|
| 152 |
+
st.error(st.session_state.hitl.get_hitl_instructions(hitl_check))
|
| 153 |
+
st.session_state.hitl_triggered = True
|
| 154 |
+
st.stop()
|
| 155 |
+
|
| 156 |
+
st.write("🧭 **Router Agent**: Determining strategy...")
|
| 157 |
+
routing = st.session_state.router.route(parsed)
|
| 158 |
+
trace.append({"agent": "Router", "output": routing})
|
| 159 |
+
with st.expander("Router Output", expanded=False):
|
| 160 |
+
st.json(routing)
|
| 161 |
+
|
| 162 |
+
if routing.get('requires_hitl'):
|
| 163 |
+
st.error(f"❗ HITL Required: {routing.get('reason')}")
|
| 164 |
+
st.info("Please clarify your problem or edit the extracted text.")
|
| 165 |
+
st.stop()
|
| 166 |
+
|
| 167 |
+
st.write("🔎 **Retriever**: Fetching relevant context...")
|
| 168 |
+
context = st.session_state.retriever.retrieve_context(parsed)
|
| 169 |
+
trace.append({"agent": "Retriever", "sources": len(context['knowledge_base'])})
|
| 170 |
+
st.write(f"📚 Retrieved {len(context['knowledge_base'])} knowledge chunks + {len(context['similar_problems'])} similar problems")
|
| 171 |
+
|
| 172 |
+
st.write("💡 **Solver Agent**: Solving problem...")
|
| 173 |
+
solution = st.session_state.solver.solve(parsed, context, routing['strategy'])
|
| 174 |
+
trace.append({"agent": "Solver", "steps": len(solution['steps'])})
|
| 175 |
+
if solution.get('calculations_performed', 0) > 0:
|
| 176 |
+
st.write(f"🧮 Performed {solution['calculations_performed']} calculations")
|
| 177 |
+
|
| 178 |
+
st.write("✅ **Verifier Agent**: Checking solution...")
|
| 179 |
+
verification = st.session_state.verifier.verify(parsed, solution)
|
| 180 |
+
trace.append({"agent": "Verifier", "output": verification})
|
| 181 |
+
|
| 182 |
+
verifier_hitl = st.session_state.hitl.should_trigger_hitl(
|
| 183 |
+
verifier_confidence=verification.get('confidence', 1.0)
|
| 184 |
+
)
|
| 185 |
+
|
| 186 |
+
if verifier_hitl['should_trigger']:
|
| 187 |
+
st.warning("⚠️ Verifier has concerns. Solution generated but needs review.")
|
| 188 |
+
|
| 189 |
+
with st.expander("Verifier Output", expanded=False):
|
| 190 |
+
st.json(verification)
|
| 191 |
+
|
| 192 |
+
st.write("📚 **Explainer Agent**: Creating explanation...")
|
| 193 |
+
explanation = st.session_state.explainer.explain(parsed, solution, verification)
|
| 194 |
+
|
| 195 |
+
st.session_state.current_solution = {
|
| 196 |
+
'input_mode': input_mode,
|
| 197 |
+
'original_text': extracted_text,
|
| 198 |
+
'parsed': parsed,
|
| 199 |
+
'routing': routing,
|
| 200 |
+
'solution': solution,
|
| 201 |
+
'verification': verification,
|
| 202 |
+
'explanation': explanation,
|
| 203 |
+
'context': context,
|
| 204 |
+
'trace': trace,
|
| 205 |
+
'hitl_data': verifier_hitl
|
| 206 |
+
}
|
| 207 |
+
|
| 208 |
+
if recheck_button and extracted_text:
|
| 209 |
+
hitl_explicit = st.session_state.hitl.should_trigger_hitl(explicit_request=True)
|
| 210 |
+
st.warning(st.session_state.hitl.get_hitl_instructions(hitl_explicit))
|
| 211 |
+
|
| 212 |
+
if 'current_solution' in st.session_state:
|
| 213 |
+
st.markdown("---")
|
| 214 |
+
|
| 215 |
+
tab1, tab2, tab3, tab4 = st.tabs(["📖 Explanation", "📊 Retrieved Context", "🔍 Solution Details", "📈 Learning Insights"])
|
| 216 |
+
|
| 217 |
+
with tab1:
|
| 218 |
+
conf = st.session_state.current_solution['verification']['confidence']
|
| 219 |
+
|
| 220 |
+
col_metric1, col_metric2, col_metric3 = st.columns(3)
|
| 221 |
+
with col_metric1:
|
| 222 |
+
st.metric("Solution Confidence", f"{conf:.2%}")
|
| 223 |
+
with col_metric2:
|
| 224 |
+
is_correct = st.session_state.current_solution['verification']['is_correct']
|
| 225 |
+
st.metric("Verified", "✅ Yes" if is_correct else "⚠️ Review Needed")
|
| 226 |
+
with col_metric3:
|
| 227 |
+
calc_count = st.session_state.current_solution['solution'].get('calculations_performed', 0)
|
| 228 |
+
st.metric("Calculations", calc_count)
|
| 229 |
+
|
| 230 |
+
if conf < 0.7:
|
| 231 |
+
st.warning("⚠️ Low confidence solution. Please verify carefully.")
|
| 232 |
+
|
| 233 |
+
st.markdown("### Step-by-Step Explanation")
|
| 234 |
+
st.markdown(st.session_state.current_solution['explanation']['explanation'])
|
| 235 |
+
|
| 236 |
+
with tab2:
|
| 237 |
+
st.subheader("📚 Knowledge Base Sources")
|
| 238 |
+
for i, item in enumerate(st.session_state.current_solution['context']['knowledge_base'], 1):
|
| 239 |
+
with st.expander(f"Source {i}: {item['metadata']['topic']}"):
|
| 240 |
+
st.write(item['content'])
|
| 241 |
+
|
| 242 |
+
if st.session_state.current_solution['context']['similar_problems']:
|
| 243 |
+
st.subheader("🔄 Similar Problems from Memory (Self-Learning)")
|
| 244 |
+
for i, prob in enumerate(st.session_state.current_solution['context']['similar_problems'], 1):
|
| 245 |
+
with st.expander(f"Similar Problem {i} (Similarity: {prob.get('similarity', 0):.2%})"):
|
| 246 |
+
st.write("**Problem:**", prob.get('parsed_question', {}).get('problem_text', ''))
|
| 247 |
+
if prob.get('user_feedback') == 'correct':
|
| 248 |
+
st.success("✅ This was a correct solution")
|
| 249 |
+
if 'solution' in prob:
|
| 250 |
+
st.write("**Previous Solution:**")
|
| 251 |
+
st.write(prob['solution'][:300] + "...")
|
| 252 |
+
|
| 253 |
+
with tab3:
|
| 254 |
+
st.subheader("📝 Full Solution")
|
| 255 |
+
st.write(st.session_state.current_solution['solution']['solution'])
|
| 256 |
+
|
| 257 |
+
st.subheader("🔍 Verification Results")
|
| 258 |
+
ver = st.session_state.current_solution['verification']
|
| 259 |
+
st.json(ver)
|
| 260 |
+
|
| 261 |
+
if ver.get('issues'):
|
| 262 |
+
st.subheader("⚠️ Issues Found")
|
| 263 |
+
for issue in ver['issues']:
|
| 264 |
+
st.warning(issue)
|
| 265 |
+
|
| 266 |
+
with tab4:
|
| 267 |
+
insights = st.session_state.memory.get_learning_insights()
|
| 268 |
+
|
| 269 |
+
col_i1, col_i2, col_i3 = st.columns(3)
|
| 270 |
+
with col_i1:
|
| 271 |
+
st.metric("Total Problems Solved", insights['total_problems'])
|
| 272 |
+
with col_i2:
|
| 273 |
+
st.metric("Overall Accuracy", f"{insights['accuracy']:.1f}%")
|
| 274 |
+
with col_i3:
|
| 275 |
+
st.metric("Best Strategy", insights['most_successful_strategy'] or "N/A")
|
| 276 |
+
|
| 277 |
+
if insights['topics_distribution']:
|
| 278 |
+
st.subheader("📊 Topics Distribution")
|
| 279 |
+
st.bar_chart(insights['topics_distribution'])
|
| 280 |
+
|
| 281 |
+
if insights['common_error_topics']:
|
| 282 |
+
st.subheader("⚠️ Topics Needing Improvement")
|
| 283 |
+
for topic in insights['common_error_topics']:
|
| 284 |
+
st.write(f"- {topic}")
|
| 285 |
+
|
| 286 |
+
st.markdown("---")
|
| 287 |
+
st.subheader("📝 Provide Feedback (Helps System Learn)")
|
| 288 |
+
|
| 289 |
+
col_fb1, col_fb2, col_fb3 = st.columns(3)
|
| 290 |
+
|
| 291 |
+
with col_fb1:
|
| 292 |
+
if st.button("✅ Correct Solution", use_container_width=True):
|
| 293 |
+
st.session_state.memory.store({
|
| 294 |
+
'input_type': st.session_state.current_solution['input_mode'],
|
| 295 |
+
'original_text': st.session_state.current_solution['original_text'],
|
| 296 |
+
'parsed_question': st.session_state.current_solution['parsed'],
|
| 297 |
+
'routing': st.session_state.current_solution['routing'],
|
| 298 |
+
'solution': st.session_state.current_solution['solution']['solution'],
|
| 299 |
+
'verification': st.session_state.current_solution['verification'],
|
| 300 |
+
'user_feedback': 'correct',
|
| 301 |
+
'context_used': st.session_state.current_solution['context']
|
| 302 |
+
})
|
| 303 |
+
st.success("✅ Feedback saved! This solution will help improve future responses.")
|
| 304 |
+
st.balloons()
|
| 305 |
+
|
| 306 |
+
with col_fb2:
|
| 307 |
+
if st.button("❌ Incorrect Solution", use_container_width=True):
|
| 308 |
+
st.session_state.show_feedback_form = True
|
| 309 |
+
|
| 310 |
+
with col_fb3:
|
| 311 |
+
if st.button("🔄 Try Again", use_container_width=True):
|
| 312 |
+
del st.session_state.current_solution
|
| 313 |
+
st.rerun()
|
| 314 |
+
|
| 315 |
+
if st.session_state.get('show_feedback_form'):
|
| 316 |
+
st.markdown("---")
|
| 317 |
+
feedback_comment = st.text_area("What was wrong? Your feedback helps the system learn:", placeholder="e.g., Wrong formula used, calculation error, missed a constraint...")
|
| 318 |
+
|
| 319 |
+
col_submit, col_cancel = st.columns(2)
|
| 320 |
+
with col_submit:
|
| 321 |
+
if st.button("Submit Feedback", type="primary", use_container_width=True):
|
| 322 |
+
st.session_state.memory.store({
|
| 323 |
+
'input_type': st.session_state.current_solution['input_mode'],
|
| 324 |
+
'original_text': st.session_state.current_solution['original_text'],
|
| 325 |
+
'parsed_question': st.session_state.current_solution['parsed'],
|
| 326 |
+
'routing': st.session_state.current_solution['routing'],
|
| 327 |
+
'solution': st.session_state.current_solution['solution']['solution'],
|
| 328 |
+
'verification': st.session_state.current_solution['verification'],
|
| 329 |
+
'user_feedback': 'incorrect',
|
| 330 |
+
'user_comment': feedback_comment,
|
| 331 |
+
'context_used': st.session_state.current_solution['context']
|
| 332 |
+
})
|
| 333 |
+
st.success("✅ Thank you! This feedback will help the system learn and improve.")
|
| 334 |
+
st.session_state.show_feedback_form = False
|
| 335 |
+
st.rerun()
|
| 336 |
+
|
| 337 |
+
with col_cancel:
|
| 338 |
+
if st.button("Cancel", use_container_width=True):
|
| 339 |
+
st.session_state.show_feedback_form = False
|
| 340 |
+
st.rerun()
|
| 341 |
+
|
| 342 |
+
st.sidebar.title("📊 System Statistics")
|
| 343 |
+
insights = st.session_state.memory.get_learning_insights()
|
| 344 |
+
|
| 345 |
+
st.sidebar.metric("Problems Solved", insights['total_problems'])
|
| 346 |
+
if insights['total_problems'] > 0:
|
| 347 |
+
st.sidebar.metric("Success Rate", f"{insights['accuracy']:.1f}%")
|
| 348 |
+
|
| 349 |
+
if insights['most_successful_strategy']:
|
| 350 |
+
st.sidebar.metric("Best Strategy", insights['most_successful_strategy'])
|
| 351 |
+
|
| 352 |
+
st.sidebar.markdown("### 📚 Topics Learned")
|
| 353 |
+
for topic, count in insights['topics_distribution'].items():
|
| 354 |
+
st.sidebar.write(f"- {topic}: {count}")
|
| 355 |
+
|
| 356 |
+
st.sidebar.markdown("---")
|
| 357 |
+
st.sidebar.info("""
|
| 358 |
+
**How This System Learns:**
|
| 359 |
+
- Stores all solved problems
|
| 360 |
+
- Retrieves similar past solutions
|
| 361 |
+
- Learns from your feedback
|
| 362 |
+
- Improves OCR/audio corrections
|
| 363 |
+
- Identifies successful strategies
|
| 364 |
+
""")
|
utils/__init__.py
ADDED
|
File without changes
|
utils/__pycache__/__init__.cpython-310.pyc
ADDED
|
Binary file (158 Bytes). View file
|
|
|
utils/__pycache__/audio.cpython-310.pyc
ADDED
|
Binary file (1.34 kB). View file
|
|
|
utils/__pycache__/hitl.cpython-310.pyc
ADDED
|
Binary file (2.61 kB). View file
|
|
|
utils/__pycache__/memory.cpython-310.pyc
ADDED
|
Binary file (5.06 kB). View file
|
|
|
utils/__pycache__/ocr.cpython-310.pyc
ADDED
|
Binary file (1.09 kB). View file
|
|
|
utils/audio.py
ADDED
|
@@ -0,0 +1,132 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# import whisper
|
| 2 |
+
# import tempfile
|
| 3 |
+
# import os
|
| 4 |
+
|
| 5 |
+
# class AudioProcessor:
|
| 6 |
+
# def __init__(self):
|
| 7 |
+
# self.model = whisper.load_model("base")
|
| 8 |
+
|
| 9 |
+
# def transcribe(self, audio_file):
|
| 10 |
+
# with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as tmp:
|
| 11 |
+
# tmp.write(audio_file.read())
|
| 12 |
+
# tmp_path = tmp.name
|
| 13 |
+
|
| 14 |
+
# try:
|
| 15 |
+
# result = self.model.transcribe(tmp_path)
|
| 16 |
+
|
| 17 |
+
# text = result['text']
|
| 18 |
+
# segments = result.get('segments', [])
|
| 19 |
+
|
| 20 |
+
# avg_confidence = 0.0
|
| 21 |
+
# if segments:
|
| 22 |
+
# confidences = [s.get('no_speech_prob', 0) for s in segments]
|
| 23 |
+
# avg_confidence = 1 - (sum(confidences) / len(confidences))
|
| 24 |
+
# else:
|
| 25 |
+
# avg_confidence = 0.8
|
| 26 |
+
|
| 27 |
+
# return {
|
| 28 |
+
# 'text': text,
|
| 29 |
+
# 'confidence': avg_confidence,
|
| 30 |
+
# 'needs_review': avg_confidence < 0.6
|
| 31 |
+
# }
|
| 32 |
+
# finally:
|
| 33 |
+
# os.unlink(tmp_path)
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
# import whisper
|
| 39 |
+
# import tempfile
|
| 40 |
+
# import os
|
| 41 |
+
# import soundfile as sf
|
| 42 |
+
# import numpy as np
|
| 43 |
+
|
| 44 |
+
# class AudioProcessor:
|
| 45 |
+
# def __init__(self):
|
| 46 |
+
# self.model = whisper.load_model("base")
|
| 47 |
+
|
| 48 |
+
# def transcribe(self, audio_file):
|
| 49 |
+
# # Save uploaded file
|
| 50 |
+
# with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as tmp:
|
| 51 |
+
# tmp.write(audio_file.read())
|
| 52 |
+
# tmp_path = tmp.name
|
| 53 |
+
|
| 54 |
+
# try:
|
| 55 |
+
# # Transcribe directly
|
| 56 |
+
# result = self.model.transcribe(tmp_path, fp16=False)
|
| 57 |
+
|
| 58 |
+
# text = result['text']
|
| 59 |
+
# segments = result.get('segments', [])
|
| 60 |
+
|
| 61 |
+
# avg_confidence = 0.8
|
| 62 |
+
# if segments:
|
| 63 |
+
# confidences = [1 - s.get('no_speech_prob', 0) for s in segments]
|
| 64 |
+
# avg_confidence = sum(confidences) / len(confidences)
|
| 65 |
+
|
| 66 |
+
# return {
|
| 67 |
+
# 'text': text,
|
| 68 |
+
# 'confidence': avg_confidence,
|
| 69 |
+
# 'needs_review': avg_confidence < 0.6
|
| 70 |
+
# }
|
| 71 |
+
# except Exception as e:
|
| 72 |
+
# # Fallback
|
| 73 |
+
# return {
|
| 74 |
+
# 'text': "Error transcribing audio. Please try again.",
|
| 75 |
+
# 'confidence': 0.0,
|
| 76 |
+
# 'needs_review': True
|
| 77 |
+
# }
|
| 78 |
+
# finally:
|
| 79 |
+
# if os.path.exists(tmp_path):
|
| 80 |
+
# os.unlink(tmp_path)
|
| 81 |
+
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
import whisper
|
| 85 |
+
import os
|
| 86 |
+
|
| 87 |
+
class AudioProcessor:
|
| 88 |
+
def __init__(self):
|
| 89 |
+
try:
|
| 90 |
+
self.model = whisper.load_model("tiny") # Smaller, faster
|
| 91 |
+
except:
|
| 92 |
+
self.model = None
|
| 93 |
+
|
| 94 |
+
def transcribe(self, audio_file):
|
| 95 |
+
if self.model is None:
|
| 96 |
+
return {
|
| 97 |
+
'text': "",
|
| 98 |
+
'confidence': 0.0,
|
| 99 |
+
'needs_review': True
|
| 100 |
+
}
|
| 101 |
+
|
| 102 |
+
# Save to temp file
|
| 103 |
+
temp_path = "temp_audio.wav"
|
| 104 |
+
|
| 105 |
+
try:
|
| 106 |
+
with open(temp_path, "wb") as f:
|
| 107 |
+
f.write(audio_file.getvalue())
|
| 108 |
+
|
| 109 |
+
# Transcribe
|
| 110 |
+
result = self.model.transcribe(temp_path, language="en", fp16=False)
|
| 111 |
+
|
| 112 |
+
text = result.get('text', '').strip()
|
| 113 |
+
|
| 114 |
+
return {
|
| 115 |
+
'text': text,
|
| 116 |
+
'confidence': 0.8,
|
| 117 |
+
'needs_review': len(text) < 5
|
| 118 |
+
}
|
| 119 |
+
|
| 120 |
+
except Exception as e:
|
| 121 |
+
print(f"Transcription error: {e}")
|
| 122 |
+
return {
|
| 123 |
+
'text': "",
|
| 124 |
+
'confidence': 0.0,
|
| 125 |
+
'needs_review': True
|
| 126 |
+
}
|
| 127 |
+
finally:
|
| 128 |
+
if os.path.exists(temp_path):
|
| 129 |
+
try:
|
| 130 |
+
os.remove(temp_path)
|
| 131 |
+
except:
|
| 132 |
+
pass
|
utils/hitl.py
ADDED
|
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import Dict, Optional
|
| 2 |
+
from datetime import datetime
|
| 3 |
+
|
| 4 |
+
class HITLSystem:
|
| 5 |
+
def __init__(self):
|
| 6 |
+
self.hitl_triggers = []
|
| 7 |
+
|
| 8 |
+
def should_trigger_hitl(self,
|
| 9 |
+
ocr_confidence: float = 1.0,
|
| 10 |
+
audio_confidence: float = 1.0,
|
| 11 |
+
parser_needs_clarification: bool = False,
|
| 12 |
+
verifier_confidence: float = 1.0,
|
| 13 |
+
explicit_request: bool = False) -> Dict:
|
| 14 |
+
|
| 15 |
+
triggers = []
|
| 16 |
+
|
| 17 |
+
if ocr_confidence < 0.7:
|
| 18 |
+
triggers.append({
|
| 19 |
+
'reason': 'Low OCR confidence',
|
| 20 |
+
'confidence': ocr_confidence,
|
| 21 |
+
'severity': 'medium'
|
| 22 |
+
})
|
| 23 |
+
|
| 24 |
+
if audio_confidence < 0.6:
|
| 25 |
+
triggers.append({
|
| 26 |
+
'reason': 'Low audio transcription confidence',
|
| 27 |
+
'confidence': audio_confidence,
|
| 28 |
+
'severity': 'medium'
|
| 29 |
+
})
|
| 30 |
+
|
| 31 |
+
if parser_needs_clarification:
|
| 32 |
+
triggers.append({
|
| 33 |
+
'reason': 'Parser detected ambiguity or incomplete information',
|
| 34 |
+
'confidence': 0.5,
|
| 35 |
+
'severity': 'high'
|
| 36 |
+
})
|
| 37 |
+
|
| 38 |
+
if verifier_confidence < 0.7:
|
| 39 |
+
triggers.append({
|
| 40 |
+
'reason': 'Verifier not confident in solution correctness',
|
| 41 |
+
'confidence': verifier_confidence,
|
| 42 |
+
'severity': 'high'
|
| 43 |
+
})
|
| 44 |
+
|
| 45 |
+
if explicit_request:
|
| 46 |
+
triggers.append({
|
| 47 |
+
'reason': 'User explicitly requested review',
|
| 48 |
+
'confidence': 0.0,
|
| 49 |
+
'severity': 'high'
|
| 50 |
+
})
|
| 51 |
+
|
| 52 |
+
should_trigger = len(triggers) > 0
|
| 53 |
+
|
| 54 |
+
hitl_data = {
|
| 55 |
+
'should_trigger': should_trigger,
|
| 56 |
+
'triggers': triggers,
|
| 57 |
+
'timestamp': datetime.now().isoformat(),
|
| 58 |
+
'primary_reason': triggers[0]['reason'] if triggers else None
|
| 59 |
+
}
|
| 60 |
+
|
| 61 |
+
if should_trigger:
|
| 62 |
+
self.hitl_triggers.append(hitl_data)
|
| 63 |
+
|
| 64 |
+
return hitl_data
|
| 65 |
+
|
| 66 |
+
def get_hitl_instructions(self, hitl_data: Dict) -> str:
|
| 67 |
+
if not hitl_data['should_trigger']:
|
| 68 |
+
return ""
|
| 69 |
+
|
| 70 |
+
instructions = ["⚠️ **Human Review Required**\n"]
|
| 71 |
+
|
| 72 |
+
for trigger in hitl_data['triggers']:
|
| 73 |
+
severity_emoji = "🔴" if trigger['severity'] == 'high' else "🟡"
|
| 74 |
+
instructions.append(f"{severity_emoji} {trigger['reason']}")
|
| 75 |
+
if 'confidence' in trigger:
|
| 76 |
+
instructions.append(f" Confidence: {trigger['confidence']:.2%}")
|
| 77 |
+
|
| 78 |
+
instructions.append("\n**Action Required:**")
|
| 79 |
+
instructions.append("1. Review the extracted/parsed content above")
|
| 80 |
+
instructions.append("2. Edit if necessary")
|
| 81 |
+
instructions.append("3. Click 'Solve Problem' again, or")
|
| 82 |
+
instructions.append("4. Use 'Request Re-check' if solution is generated")
|
| 83 |
+
|
| 84 |
+
return '\n'.join(instructions)
|
| 85 |
+
|
| 86 |
+
def record_hitl_resolution(self, hitl_data: Dict, resolution: Dict):
|
| 87 |
+
hitl_data['resolution'] = {
|
| 88 |
+
'action': resolution.get('action'),
|
| 89 |
+
'edited': resolution.get('edited', False),
|
| 90 |
+
'approved': resolution.get('approved', False),
|
| 91 |
+
'timestamp': datetime.now().isoformat()
|
| 92 |
+
}
|
| 93 |
+
return hitl_data
|
utils/memory.py
ADDED
|
@@ -0,0 +1,124 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
import json
|
| 5 |
+
import os
|
| 6 |
+
from datetime import datetime
|
| 7 |
+
from typing import Dict, List, Optional
|
| 8 |
+
from collections import Counter
|
| 9 |
+
|
| 10 |
+
class MemorySystem:
|
| 11 |
+
def __init__(self, memory_file='data/memory.json'):
|
| 12 |
+
self.memory_file = memory_file
|
| 13 |
+
os.makedirs(os.path.dirname(memory_file), exist_ok=True)
|
| 14 |
+
self.memories = self._load_memory()
|
| 15 |
+
self.correction_patterns = self._load_correction_patterns()
|
| 16 |
+
|
| 17 |
+
def _load_memory(self) -> List[Dict]:
|
| 18 |
+
if os.path.exists(self.memory_file):
|
| 19 |
+
with open(self.memory_file, 'r') as f:
|
| 20 |
+
return json.load(f)
|
| 21 |
+
return []
|
| 22 |
+
|
| 23 |
+
def _save_memory(self):
|
| 24 |
+
with open(self.memory_file, 'w') as f:
|
| 25 |
+
json.dump(self.memories, f, indent=2)
|
| 26 |
+
|
| 27 |
+
def _load_correction_patterns(self) -> Dict:
|
| 28 |
+
patterns = {
|
| 29 |
+
'ocr_corrections': {},
|
| 30 |
+
'audio_corrections': {},
|
| 31 |
+
'common_mistakes': Counter(),
|
| 32 |
+
'successful_strategies': Counter()
|
| 33 |
+
}
|
| 34 |
+
|
| 35 |
+
for memory in self.memories:
|
| 36 |
+
if memory.get('user_feedback') == 'incorrect' and memory.get('user_comment'):
|
| 37 |
+
topic = memory.get('parsed_question', {}).get('topic', 'unknown')
|
| 38 |
+
patterns['common_mistakes'][topic] += 1
|
| 39 |
+
|
| 40 |
+
if memory.get('user_feedback') == 'correct':
|
| 41 |
+
strategy = memory.get('routing', {}).get('strategy', 'unknown')
|
| 42 |
+
patterns['successful_strategies'][strategy] += 1
|
| 43 |
+
|
| 44 |
+
return patterns
|
| 45 |
+
|
| 46 |
+
def store(self, entry: Dict):
|
| 47 |
+
entry['timestamp'] = datetime.now().isoformat()
|
| 48 |
+
entry['id'] = len(self.memories)
|
| 49 |
+
self.memories.append(entry)
|
| 50 |
+
self._save_memory()
|
| 51 |
+
|
| 52 |
+
self.correction_patterns = self._load_correction_patterns()
|
| 53 |
+
|
| 54 |
+
def search_similar(self, problem_text: str, topic: str = None, limit: int = 3) -> List[Dict]:
|
| 55 |
+
results = []
|
| 56 |
+
problem_lower = problem_text.lower()
|
| 57 |
+
problem_words = set(problem_lower.split())
|
| 58 |
+
|
| 59 |
+
for memory in reversed(self.memories):
|
| 60 |
+
if topic and memory.get('parsed_question', {}).get('topic', '').lower() != topic.lower():
|
| 61 |
+
continue
|
| 62 |
+
|
| 63 |
+
memory_text = memory.get('parsed_question', {}).get('problem_text', '').lower()
|
| 64 |
+
memory_words = set(memory_text.split())
|
| 65 |
+
|
| 66 |
+
common_words = problem_words & memory_words
|
| 67 |
+
similarity = len(common_words) / max(len(problem_words), 1)
|
| 68 |
+
|
| 69 |
+
if similarity > 0.3:
|
| 70 |
+
results.append({
|
| 71 |
+
**memory,
|
| 72 |
+
'similarity': similarity
|
| 73 |
+
})
|
| 74 |
+
|
| 75 |
+
results.sort(key=lambda x: x['similarity'], reverse=True)
|
| 76 |
+
return results[:limit]
|
| 77 |
+
|
| 78 |
+
def get_corrections(self) -> List[Dict]:
|
| 79 |
+
return [m for m in self.memories if m.get('user_feedback') == 'incorrect']
|
| 80 |
+
|
| 81 |
+
def get_learning_insights(self) -> Dict:
|
| 82 |
+
total = len(self.memories)
|
| 83 |
+
if total == 0:
|
| 84 |
+
return {
|
| 85 |
+
'total_problems': 0,
|
| 86 |
+
'accuracy': 0,
|
| 87 |
+
'topics_distribution': {},
|
| 88 |
+
'most_successful_strategy': None,
|
| 89 |
+
'common_error_topics': []
|
| 90 |
+
}
|
| 91 |
+
|
| 92 |
+
correct = sum(1 for m in self.memories if m.get('user_feedback') == 'correct')
|
| 93 |
+
|
| 94 |
+
topics = Counter()
|
| 95 |
+
for m in self.memories:
|
| 96 |
+
topic = m.get('parsed_question', {}).get('topic', 'unknown')
|
| 97 |
+
topics[topic] += 1
|
| 98 |
+
|
| 99 |
+
return {
|
| 100 |
+
'total_problems': total,
|
| 101 |
+
'accuracy': (correct / total * 100) if total > 0 else 0,
|
| 102 |
+
'topics_distribution': dict(topics),
|
| 103 |
+
'most_successful_strategy': self.correction_patterns['successful_strategies'].most_common(1)[0][0] if self.correction_patterns['successful_strategies'] else None,
|
| 104 |
+
'common_error_topics': [topic for topic, count in self.correction_patterns['common_mistakes'].most_common(3)]
|
| 105 |
+
}
|
| 106 |
+
|
| 107 |
+
def apply_learned_corrections(self, text: str, input_type: str) -> str:
|
| 108 |
+
corrections_key = f'{input_type}_corrections'
|
| 109 |
+
if corrections_key in self.correction_patterns:
|
| 110 |
+
for wrong, correct in self.correction_patterns[corrections_key].items():
|
| 111 |
+
text = text.replace(wrong, correct)
|
| 112 |
+
|
| 113 |
+
return text
|
| 114 |
+
|
| 115 |
+
def get_reusable_solution_pattern(self, topic: str) -> Optional[Dict]:
|
| 116 |
+
successful_solutions = [
|
| 117 |
+
m for m in self.memories
|
| 118 |
+
if m.get('user_feedback') == 'correct'
|
| 119 |
+
and m.get('parsed_question', {}).get('topic') == topic
|
| 120 |
+
]
|
| 121 |
+
|
| 122 |
+
if successful_solutions:
|
| 123 |
+
return successful_solutions[-1]
|
| 124 |
+
return None
|
utils/ocr.py
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import easyocr
|
| 2 |
+
import numpy as np
|
| 3 |
+
from PIL import Image
|
| 4 |
+
|
| 5 |
+
# Compatibility fix for Pillow 10.0.0+ where ANTIALIAS was removed
|
| 6 |
+
if not hasattr(Image, 'ANTIALIAS'):
|
| 7 |
+
Image.ANTIALIAS = Image.LANCZOS
|
| 8 |
+
|
| 9 |
+
class OCRProcessor:
|
| 10 |
+
def __init__(self):
|
| 11 |
+
self.reader = easyocr.Reader(['en'], gpu=False)
|
| 12 |
+
|
| 13 |
+
def extract_text(self, image):
|
| 14 |
+
if isinstance(image, Image.Image):
|
| 15 |
+
image = np.array(image)
|
| 16 |
+
|
| 17 |
+
results = self.reader.readtext(image)
|
| 18 |
+
|
| 19 |
+
text_parts = []
|
| 20 |
+
confidences = []
|
| 21 |
+
|
| 22 |
+
for bbox, text, conf in results:
|
| 23 |
+
text_parts.append(text)
|
| 24 |
+
confidences.append(conf)
|
| 25 |
+
|
| 26 |
+
full_text = ' '.join(text_parts)
|
| 27 |
+
avg_confidence = np.mean(confidences) if confidences else 0.0
|
| 28 |
+
|
| 29 |
+
return {
|
| 30 |
+
'text': full_text,
|
| 31 |
+
'confidence': avg_confidence,
|
| 32 |
+
'needs_review': avg_confidence < 0.7
|
| 33 |
+
}
|