Spaces:
Sleeping
Sleeping
Upload 18 files
Browse files- README_SPACES.md +130 -0
- app.py +224 -0
- requirements.txt +30 -0
- src/__init__.py +12 -0
- src/__pycache__/__init__.cpython-312.pyc +0 -0
- src/__pycache__/config.cpython-312.pyc +0 -0
- src/__pycache__/gradio_interface.cpython-312.pyc +0 -0
- src/__pycache__/knowledge_graph.cpython-312.pyc +0 -0
- src/__pycache__/question_generator.cpython-312.pyc +0 -0
- src/__pycache__/rag_query.cpython-312.pyc +0 -0
- src/__pycache__/vector_store.cpython-312.pyc +0 -0
- src/config.py +79 -0
- src/evaluation.py +226 -0
- src/gradio_interface.py +745 -0
- src/knowledge_graph.py +323 -0
- src/question_generator.py +161 -0
- src/rag_query.py +118 -0
- src/vector_store.py +145 -0
README_SPACES.md
ADDED
|
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: CSRC Car Manual RAG System
|
| 3 |
+
emoji: π
|
| 4 |
+
colorFrom: blue
|
| 5 |
+
colorTo: purple
|
| 6 |
+
sdk: gradio
|
| 7 |
+
sdk_version: 4.0.0
|
| 8 |
+
app_file: app.py
|
| 9 |
+
pinned: false
|
| 10 |
+
license: mit
|
| 11 |
+
---
|
| 12 |
+
|
| 13 |
+
# CSRC Car Manual RAG System
|
| 14 |
+
|
| 15 |
+
An intelligent RAG (Retrieval-Augmented Generation) system for querying car manual documents using OpenAI and vector stores.
|
| 16 |
+
|
| 17 |
+
## π Features
|
| 18 |
+
|
| 19 |
+
- **RAG-based Q&A**: Ask questions about car manual content
|
| 20 |
+
- **Vector Store**: Fast and accurate document retrieval
|
| 21 |
+
- **Knowledge Graph**: Visualize document relationships
|
| 22 |
+
- **Personalized Learning**: Adaptive learning paths (optional)
|
| 23 |
+
- **Scenario Contextualization**: Context-aware responses (optional)
|
| 24 |
+
|
| 25 |
+
## π Setup Instructions
|
| 26 |
+
|
| 27 |
+
### 1. Clone or Upload to Hugging Face Spaces
|
| 28 |
+
|
| 29 |
+
- **Option A**: Create a new Space on Hugging Face and upload files
|
| 30 |
+
- **Option B**: Connect your GitHub repository to Spaces
|
| 31 |
+
|
| 32 |
+
### 2. Set Environment Variables (Secrets)
|
| 33 |
+
|
| 34 |
+
Go to **Settings > Secrets** in your Space and add:
|
| 35 |
+
|
| 36 |
+
```
|
| 37 |
+
OPENAI_API_KEY=your-openai-api-key-here
|
| 38 |
+
```
|
| 39 |
+
|
| 40 |
+
β οΈ **Important**: Never commit API keys to the repository. Always use Spaces Secrets.
|
| 41 |
+
|
| 42 |
+
### 3. Upload PDF Files
|
| 43 |
+
|
| 44 |
+
Ensure your PDF files are in the `car_manual/` directory:
|
| 45 |
+
|
| 46 |
+
```
|
| 47 |
+
car_manual/
|
| 48 |
+
βββ Function of Active Distance Assist DISTRONIC.pdf
|
| 49 |
+
βββ Function of Active Lane Change Assist.pdf
|
| 50 |
+
βββ Function of Active Steering Assist.pdf
|
| 51 |
+
βββ Function of Active Stop-and-Go Assist.pdf
|
| 52 |
+
```
|
| 53 |
+
|
| 54 |
+
### 4. Wait for Build
|
| 55 |
+
|
| 56 |
+
Spaces will automatically:
|
| 57 |
+
- Install dependencies from `requirements.txt`
|
| 58 |
+
- Run `app.py`
|
| 59 |
+
- Start the Gradio interface
|
| 60 |
+
|
| 61 |
+
## π Project Structure
|
| 62 |
+
|
| 63 |
+
```
|
| 64 |
+
.
|
| 65 |
+
βββ app.py # Hugging Face Spaces entry point
|
| 66 |
+
βββ main.py # Local development entry point
|
| 67 |
+
βββ requirements.txt # Python dependencies
|
| 68 |
+
βββ src/ # Core modules
|
| 69 |
+
βββ modules/ # Feature modules
|
| 70 |
+
βββ car_manual/ # PDF files directory
|
| 71 |
+
βββ config/ # Configuration files
|
| 72 |
+
βββ output/ # Output directory (auto-created)
|
| 73 |
+
```
|
| 74 |
+
|
| 75 |
+
## π§ Configuration
|
| 76 |
+
|
| 77 |
+
### Required
|
| 78 |
+
|
| 79 |
+
- **OPENAI_API_KEY**: Your OpenAI API key (set in Spaces Secrets)
|
| 80 |
+
|
| 81 |
+
### Optional
|
| 82 |
+
|
| 83 |
+
- **PDF Files**: Place in `car_manual/` directory
|
| 84 |
+
- **Vector Store**: Automatically created on first run
|
| 85 |
+
|
| 86 |
+
## π Usage
|
| 87 |
+
|
| 88 |
+
1. Wait for the Space to build (check the logs)
|
| 89 |
+
2. Open the Gradio interface
|
| 90 |
+
3. Enter your question in the input field
|
| 91 |
+
4. Get answers with source citations
|
| 92 |
+
|
| 93 |
+
## π Troubleshooting
|
| 94 |
+
|
| 95 |
+
### Error: OPENAI_API_KEY not found
|
| 96 |
+
|
| 97 |
+
- Go to Settings > Secrets
|
| 98 |
+
- Add `OPENAI_API_KEY` with your actual API key
|
| 99 |
+
- Restart the Space
|
| 100 |
+
|
| 101 |
+
### Error: No PDF files found
|
| 102 |
+
|
| 103 |
+
- Ensure PDF files are in the `car_manual/` directory
|
| 104 |
+
- Check file permissions
|
| 105 |
+
- Verify file names (case-sensitive)
|
| 106 |
+
|
| 107 |
+
### Build Fails
|
| 108 |
+
|
| 109 |
+
- Check the logs for error messages
|
| 110 |
+
- Verify `requirements.txt` is correct
|
| 111 |
+
- Ensure all Python dependencies are compatible
|
| 112 |
+
|
| 113 |
+
## π Notes
|
| 114 |
+
|
| 115 |
+
- Vector store is created automatically on first run
|
| 116 |
+
- Vector store ID is saved in `config/vector_store_config.json`
|
| 117 |
+
- First initialization may take time (uploading PDFs to OpenAI)
|
| 118 |
+
|
| 119 |
+
## π Links
|
| 120 |
+
|
| 121 |
+
- [OpenAI API Keys](https://platform.openai.com/api-keys)
|
| 122 |
+
- [Hugging Face Spaces Documentation](https://huggingface.co/docs/hub/spaces)
|
| 123 |
+
|
| 124 |
+
## π License
|
| 125 |
+
|
| 126 |
+
MIT License
|
| 127 |
+
|
| 128 |
+
|
| 129 |
+
|
| 130 |
+
|
app.py
ADDED
|
@@ -0,0 +1,224 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Hugging Face Spaces Entry Point for CSRC Car Manual RAG System
|
| 3 |
+
This is the entry point for Hugging Face Spaces deployment
|
| 4 |
+
|
| 5 |
+
Note: For local development, use main.py instead.
|
| 6 |
+
"""
|
| 7 |
+
import os
|
| 8 |
+
import sys
|
| 9 |
+
from pathlib import Path
|
| 10 |
+
|
| 11 |
+
# Detect if running in Hugging Face Spaces
|
| 12 |
+
IS_SPACES = os.getenv("SPACE_ID") is not None or os.getenv("HF_SPACE") is not None
|
| 13 |
+
|
| 14 |
+
# Add the current directory to Python path for Spaces environment
|
| 15 |
+
sys.path.insert(0, str(Path(__file__).parent))
|
| 16 |
+
|
| 17 |
+
from openai import OpenAI
|
| 18 |
+
from src.config import Config
|
| 19 |
+
from src.vector_store import VectorStoreManager
|
| 20 |
+
from src.rag_query import RAGQueryEngine
|
| 21 |
+
from src.question_generator import QuestionGenerator
|
| 22 |
+
from src.knowledge_graph import KnowledgeGraphGenerator
|
| 23 |
+
from src.gradio_interface import GradioInterfaceBuilder
|
| 24 |
+
|
| 25 |
+
# Import personalized learning if available
|
| 26 |
+
try:
|
| 27 |
+
from modules.personalized_learning import UserProfilingSystem, LearningPathGenerator, AdaptiveLearningEngine
|
| 28 |
+
PERSONALIZED_LEARNING_AVAILABLE = True
|
| 29 |
+
except ImportError:
|
| 30 |
+
PERSONALIZED_LEARNING_AVAILABLE = False
|
| 31 |
+
print("β οΈ Personalized learning modules not available")
|
| 32 |
+
|
| 33 |
+
# Import proactive learning if available
|
| 34 |
+
try:
|
| 35 |
+
from modules.proactive_learning import ProactiveLearningEngine
|
| 36 |
+
PROACTIVE_LEARNING_AVAILABLE = True
|
| 37 |
+
except ImportError:
|
| 38 |
+
PROACTIVE_LEARNING_AVAILABLE = False
|
| 39 |
+
print("β οΈ Proactive learning modules not available")
|
| 40 |
+
|
| 41 |
+
# Import scenario contextualization if available
|
| 42 |
+
try:
|
| 43 |
+
from modules.scenario_contextualization.database.scenario_database import ScenarioDatabase
|
| 44 |
+
from modules.scenario_contextualization.integration.feature_extractor import ADASFeatureExtractor
|
| 45 |
+
from modules.scenario_contextualization.retrieval.scenario_retriever import ScenarioRetriever
|
| 46 |
+
from modules.scenario_contextualization.formatting.constructive_formatter import ConstructiveFormatter
|
| 47 |
+
from modules.scenario_contextualization.integration.enhanced_rag_engine import EnhancedRAGEngine
|
| 48 |
+
SCENARIO_CONTEXTUALIZATION_AVAILABLE = True
|
| 49 |
+
except ImportError as e:
|
| 50 |
+
SCENARIO_CONTEXTUALIZATION_AVAILABLE = False
|
| 51 |
+
print(f"β οΈ Scenario contextualization modules not available: {e}")
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
def initialize_system(config: Config) -> dict:
|
| 55 |
+
"""Initialize the RAG system components"""
|
| 56 |
+
# Initialize OpenAI client
|
| 57 |
+
if not config.openai_api_key:
|
| 58 |
+
raise ValueError(
|
| 59 |
+
"OPENAI_API_KEY not found! Please set it in Hugging Face Spaces Secrets. "
|
| 60 |
+
"Go to Settings > Secrets and add OPENAI_API_KEY"
|
| 61 |
+
)
|
| 62 |
+
|
| 63 |
+
client = OpenAI(api_key=config.openai_api_key)
|
| 64 |
+
|
| 65 |
+
# Initialize vector store manager
|
| 66 |
+
vector_store_manager = VectorStoreManager(client)
|
| 67 |
+
|
| 68 |
+
# Get or create vector store
|
| 69 |
+
vector_store_id = config.get_vector_store_id()
|
| 70 |
+
|
| 71 |
+
if not vector_store_id:
|
| 72 |
+
print("π¦ Creating new vector store...")
|
| 73 |
+
pdf_files = config.get_pdf_files()
|
| 74 |
+
|
| 75 |
+
if not pdf_files:
|
| 76 |
+
raise ValueError(f"No PDF files found in {config.car_manual_dir}")
|
| 77 |
+
|
| 78 |
+
vector_store_details = vector_store_manager.create_vector_store(config.vector_store_name)
|
| 79 |
+
if not vector_store_details:
|
| 80 |
+
raise RuntimeError("Failed to create vector store")
|
| 81 |
+
|
| 82 |
+
vector_store_id = vector_store_details["id"]
|
| 83 |
+
config.save_vector_store_id(vector_store_id, config.vector_store_name)
|
| 84 |
+
|
| 85 |
+
# Upload files
|
| 86 |
+
upload_stats = vector_store_manager.upload_pdf_files(pdf_files, vector_store_id)
|
| 87 |
+
if upload_stats["successful_uploads"] == 0:
|
| 88 |
+
raise RuntimeError("Failed to upload any files")
|
| 89 |
+
else:
|
| 90 |
+
print(f"β
Using existing vector store: {vector_store_id}")
|
| 91 |
+
|
| 92 |
+
# Initialize RAG query engine
|
| 93 |
+
rag_engine = RAGQueryEngine(client, vector_store_id, config.model)
|
| 94 |
+
|
| 95 |
+
# Initialize question generator
|
| 96 |
+
question_generator = QuestionGenerator(client, rag_engine)
|
| 97 |
+
|
| 98 |
+
# Initialize knowledge graph generator
|
| 99 |
+
knowledge_graph = KnowledgeGraphGenerator(client, vector_store_id, str(config.output_dir))
|
| 100 |
+
|
| 101 |
+
# Initialize personalized learning (if available)
|
| 102 |
+
user_profiling = None
|
| 103 |
+
learning_path_generator = None
|
| 104 |
+
adaptive_engine = None
|
| 105 |
+
|
| 106 |
+
if PERSONALIZED_LEARNING_AVAILABLE:
|
| 107 |
+
try:
|
| 108 |
+
user_profiling = UserProfilingSystem()
|
| 109 |
+
learning_path_generator = LearningPathGenerator(user_profiling, config.available_topics)
|
| 110 |
+
adaptive_engine = AdaptiveLearningEngine(user_profiling, learning_path_generator)
|
| 111 |
+
print("β
Personalized Learning System initialized!")
|
| 112 |
+
except Exception as e:
|
| 113 |
+
print(f"β οΈ Error initializing Personalized Learning System: {e}")
|
| 114 |
+
|
| 115 |
+
# Initialize proactive learning (if available)
|
| 116 |
+
proactive_engine = None
|
| 117 |
+
if PROACTIVE_LEARNING_AVAILABLE and user_profiling:
|
| 118 |
+
try:
|
| 119 |
+
proactive_engine = ProactiveLearningEngine(
|
| 120 |
+
client, rag_engine, user_profiling, adaptive_engine, config.available_topics
|
| 121 |
+
)
|
| 122 |
+
print("β
Proactive Learning Assistance initialized!")
|
| 123 |
+
except Exception as e:
|
| 124 |
+
print(f"β οΈ Error initializing Proactive Learning Assistance: {e}")
|
| 125 |
+
|
| 126 |
+
# Initialize scenario contextualization (if available)
|
| 127 |
+
enhanced_rag_engine = None
|
| 128 |
+
if SCENARIO_CONTEXTUALIZATION_AVAILABLE:
|
| 129 |
+
try:
|
| 130 |
+
scenario_database = ScenarioDatabase()
|
| 131 |
+
feature_extractor = ADASFeatureExtractor(use_llm=False, client=client)
|
| 132 |
+
scenario_retriever = ScenarioRetriever(
|
| 133 |
+
scenario_database=scenario_database,
|
| 134 |
+
scenario_vector_store_id=None,
|
| 135 |
+
client=client
|
| 136 |
+
)
|
| 137 |
+
formatter = ConstructiveFormatter()
|
| 138 |
+
enhanced_rag_engine = EnhancedRAGEngine(
|
| 139 |
+
base_rag_engine=rag_engine,
|
| 140 |
+
scenario_retriever=scenario_retriever,
|
| 141 |
+
feature_extractor=feature_extractor,
|
| 142 |
+
formatter=formatter
|
| 143 |
+
)
|
| 144 |
+
print("β
Scenario Contextualization initialized!")
|
| 145 |
+
except Exception as e:
|
| 146 |
+
print(f"β οΈ Error initializing Scenario Contextualization: {e}")
|
| 147 |
+
import traceback
|
| 148 |
+
traceback.print_exc()
|
| 149 |
+
|
| 150 |
+
return {
|
| 151 |
+
"client": client,
|
| 152 |
+
"vector_store_manager": vector_store_manager,
|
| 153 |
+
"rag_engine": rag_engine,
|
| 154 |
+
"question_generator": question_generator,
|
| 155 |
+
"knowledge_graph": knowledge_graph,
|
| 156 |
+
"user_profiling": user_profiling,
|
| 157 |
+
"learning_path_generator": learning_path_generator,
|
| 158 |
+
"adaptive_engine": adaptive_engine,
|
| 159 |
+
"proactive_engine": proactive_engine,
|
| 160 |
+
"enhanced_rag_engine": enhanced_rag_engine,
|
| 161 |
+
"config": config
|
| 162 |
+
}
|
| 163 |
+
|
| 164 |
+
|
| 165 |
+
def create_app():
|
| 166 |
+
"""Create and return the Gradio app for Hugging Face Spaces"""
|
| 167 |
+
print("=" * 60)
|
| 168 |
+
print("π CSRC Car Manual RAG System - Hugging Face Spaces")
|
| 169 |
+
print("=" * 60)
|
| 170 |
+
|
| 171 |
+
# Load configuration
|
| 172 |
+
config = Config()
|
| 173 |
+
|
| 174 |
+
# Initialize system
|
| 175 |
+
try:
|
| 176 |
+
components = initialize_system(config)
|
| 177 |
+
except Exception as e:
|
| 178 |
+
print(f"β Error initializing system: {e}")
|
| 179 |
+
import gradio as gr
|
| 180 |
+
|
| 181 |
+
# Create error interface
|
| 182 |
+
error_msg = f"""
|
| 183 |
+
# β Initialization Error
|
| 184 |
+
|
| 185 |
+
**Error:** {str(e)}
|
| 186 |
+
|
| 187 |
+
**Possible solutions:**
|
| 188 |
+
1. Check if OPENAI_API_KEY is set in Spaces Secrets (Settings > Secrets)
|
| 189 |
+
2. Ensure PDF files are in the `car_manual/` directory
|
| 190 |
+
3. Check the logs for more details
|
| 191 |
+
"""
|
| 192 |
+
|
| 193 |
+
def error_display():
|
| 194 |
+
return error_msg
|
| 195 |
+
|
| 196 |
+
error_interface = gr.Interface(
|
| 197 |
+
fn=error_display,
|
| 198 |
+
inputs=None,
|
| 199 |
+
outputs=gr.Markdown(),
|
| 200 |
+
title="CSRC Car Manual RAG System",
|
| 201 |
+
description="An error occurred during initialization. Please check the logs."
|
| 202 |
+
)
|
| 203 |
+
return error_interface
|
| 204 |
+
|
| 205 |
+
# Build Gradio interface
|
| 206 |
+
print("\nπ Building Gradio interface...")
|
| 207 |
+
interface_builder = GradioInterfaceBuilder(
|
| 208 |
+
rag_engine=components["rag_engine"],
|
| 209 |
+
question_generator=components["question_generator"],
|
| 210 |
+
knowledge_graph=components["knowledge_graph"],
|
| 211 |
+
config=components["config"],
|
| 212 |
+
user_profiling=components["user_profiling"],
|
| 213 |
+
adaptive_engine=components["adaptive_engine"],
|
| 214 |
+
proactive_engine=components["proactive_engine"]
|
| 215 |
+
)
|
| 216 |
+
|
| 217 |
+
demo = interface_builder.create_interface()
|
| 218 |
+
return demo
|
| 219 |
+
|
| 220 |
+
|
| 221 |
+
# Create the app for Hugging Face Spaces
|
| 222 |
+
# Spaces will automatically detect Gradio and run this
|
| 223 |
+
demo = create_app()
|
| 224 |
+
|
requirements.txt
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# CSRC Car Manual RAG System - Requirements
|
| 2 |
+
# Core dependencies
|
| 3 |
+
openai>=1.0.0
|
| 4 |
+
PyPDF2>=3.0.0
|
| 5 |
+
pandas>=2.0.0
|
| 6 |
+
tqdm>=4.65.0
|
| 7 |
+
|
| 8 |
+
# Visualization
|
| 9 |
+
matplotlib>=3.7.0
|
| 10 |
+
networkx>=3.0
|
| 11 |
+
scikit-learn>=1.3.0
|
| 12 |
+
|
| 13 |
+
# Web Interface
|
| 14 |
+
gradio>=4.0.0
|
| 15 |
+
|
| 16 |
+
# Evaluation (optional)
|
| 17 |
+
ragas>=0.3.0
|
| 18 |
+
sentence-transformers>=2.2.0
|
| 19 |
+
nltk>=3.8.0
|
| 20 |
+
|
| 21 |
+
# Utilities
|
| 22 |
+
numpy>=1.24.0
|
| 23 |
+
python-dotenv>=1.0.0
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
|
src/__init__.py
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
CSRC Car Manual RAG System - Modular Package
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
__version__ = "1.0.0"
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
|
src/__pycache__/__init__.cpython-312.pyc
ADDED
|
Binary file (256 Bytes). View file
|
|
|
src/__pycache__/config.cpython-312.pyc
ADDED
|
Binary file (4.48 kB). View file
|
|
|
src/__pycache__/gradio_interface.cpython-312.pyc
ADDED
|
Binary file (38.9 kB). View file
|
|
|
src/__pycache__/knowledge_graph.cpython-312.pyc
ADDED
|
Binary file (17.2 kB). View file
|
|
|
src/__pycache__/question_generator.cpython-312.pyc
ADDED
|
Binary file (7.84 kB). View file
|
|
|
src/__pycache__/rag_query.cpython-312.pyc
ADDED
|
Binary file (4.78 kB). View file
|
|
|
src/__pycache__/vector_store.cpython-312.pyc
ADDED
|
Binary file (6.32 kB). View file
|
|
|
src/config.py
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Configuration management for the RAG system
|
| 3 |
+
"""
|
| 4 |
+
import os
|
| 5 |
+
import json
|
| 6 |
+
from typing import Optional, Dict
|
| 7 |
+
from pathlib import Path
|
| 8 |
+
from dotenv import load_dotenv
|
| 9 |
+
|
| 10 |
+
# Load environment variables from .env file if it exists
|
| 11 |
+
load_dotenv()
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
class Config:
|
| 15 |
+
"""Centralized configuration management"""
|
| 16 |
+
|
| 17 |
+
def __init__(self):
|
| 18 |
+
self.base_dir = Path(__file__).parent.parent
|
| 19 |
+
self.car_manual_dir = self.base_dir / "car_manual"
|
| 20 |
+
self.output_dir = self.base_dir / "output"
|
| 21 |
+
self.user_data_dir = self.base_dir / "user_data"
|
| 22 |
+
self.config_file = self.base_dir / "config" / "vector_store_config.json"
|
| 23 |
+
|
| 24 |
+
# Create necessary directories
|
| 25 |
+
self.car_manual_dir.mkdir(exist_ok=True)
|
| 26 |
+
self.output_dir.mkdir(exist_ok=True)
|
| 27 |
+
self.user_data_dir.mkdir(exist_ok=True)
|
| 28 |
+
|
| 29 |
+
# OpenAI settings
|
| 30 |
+
self.openai_api_key = os.getenv("OPENAI_API_KEY")
|
| 31 |
+
if not self.openai_api_key:
|
| 32 |
+
# Provide helpful error message
|
| 33 |
+
env_file = self.base_dir / ".env"
|
| 34 |
+
raise ValueError(
|
| 35 |
+
f"OPENAI_API_KEY not found!\n\n"
|
| 36 |
+
f"Please set your OpenAI API key using one of these methods:\n"
|
| 37 |
+
f"1. Create a .env file in the project root with: OPENAI_API_KEY=your-key-here\n"
|
| 38 |
+
f"2. Set environment variable: export OPENAI_API_KEY=your-key-here (Linux/Mac) or "
|
| 39 |
+
f"$env:OPENAI_API_KEY='your-key-here' (Windows PowerShell)\n"
|
| 40 |
+
f"3. Set environment variable: set OPENAI_API_KEY=your-key-here (Windows CMD)\n\n"
|
| 41 |
+
f"You can copy .env.example to .env and add your key there."
|
| 42 |
+
)
|
| 43 |
+
self.model = "gpt-4o-mini"
|
| 44 |
+
self.vector_store_name = "mercedes_manual_store"
|
| 45 |
+
|
| 46 |
+
# Available topics
|
| 47 |
+
self.available_topics = [
|
| 48 |
+
"Function of Active Distance Assist DISTRONIC",
|
| 49 |
+
"Function of Active Lane Change Assist",
|
| 50 |
+
"Function of Active Steering Assist",
|
| 51 |
+
"Function of Active Stop-and-Go Assist"
|
| 52 |
+
]
|
| 53 |
+
|
| 54 |
+
def get_vector_store_id(self) -> Optional[str]:
|
| 55 |
+
"""Load vector store ID from config file"""
|
| 56 |
+
if self.config_file.exists():
|
| 57 |
+
try:
|
| 58 |
+
with open(self.config_file, 'r') as f:
|
| 59 |
+
config = json.load(f)
|
| 60 |
+
return config.get('id')
|
| 61 |
+
except Exception as e:
|
| 62 |
+
print(f"Error loading vector store config: {e}")
|
| 63 |
+
return None
|
| 64 |
+
|
| 65 |
+
def save_vector_store_id(self, vector_store_id: str, name: str = None):
|
| 66 |
+
"""Save vector store ID to config file"""
|
| 67 |
+
config = {
|
| 68 |
+
'id': vector_store_id,
|
| 69 |
+
'name': name or self.vector_store_name,
|
| 70 |
+
'created_at': str(Path(self.config_file).stat().st_mtime)
|
| 71 |
+
}
|
| 72 |
+
with open(self.config_file, 'w') as f:
|
| 73 |
+
json.dump(config, f, indent=2)
|
| 74 |
+
|
| 75 |
+
def get_pdf_files(self) -> list:
|
| 76 |
+
"""Get list of PDF files in car_manual directory"""
|
| 77 |
+
pdf_files = list(self.car_manual_dir.glob("*.pdf"))
|
| 78 |
+
return [str(f) for f in pdf_files]
|
| 79 |
+
|
src/evaluation.py
ADDED
|
@@ -0,0 +1,226 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
RAG Evaluation Module
|
| 3 |
+
Comprehensive evaluation system for RAG-based Q&A
|
| 4 |
+
"""
|
| 5 |
+
import pandas as pd
|
| 6 |
+
import numpy as np
|
| 7 |
+
from sentence_transformers import SentenceTransformer
|
| 8 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
| 9 |
+
import nltk
|
| 10 |
+
import re
|
| 11 |
+
from typing import Dict, List, Optional
|
| 12 |
+
from datetime import datetime
|
| 13 |
+
|
| 14 |
+
# Download required NLTK data
|
| 15 |
+
try:
|
| 16 |
+
nltk.download('punkt', quiet=True)
|
| 17 |
+
nltk.download('stopwords', quiet=True)
|
| 18 |
+
except:
|
| 19 |
+
pass
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
class ComprehensiveRAGEvaluator:
|
| 23 |
+
"""Comprehensive evaluation system for RAG-based car manual Q&A"""
|
| 24 |
+
|
| 25 |
+
def __init__(self, rag_system, client):
|
| 26 |
+
self.rag_system = rag_system
|
| 27 |
+
self.client = client
|
| 28 |
+
self.sentence_model = SentenceTransformer('all-MiniLM-L6-v2')
|
| 29 |
+
self.evaluation_results = {}
|
| 30 |
+
|
| 31 |
+
def evaluate_answer_quality(self, question: str, generated_answer: str,
|
| 32 |
+
expected_answer: str, retrieved_contexts: List[str]) -> Dict:
|
| 33 |
+
"""
|
| 34 |
+
Comprehensive answer quality evaluation
|
| 35 |
+
|
| 36 |
+
Args:
|
| 37 |
+
question: The question asked
|
| 38 |
+
generated_answer: Answer generated by RAG system
|
| 39 |
+
expected_answer: Expected correct answer
|
| 40 |
+
retrieved_contexts: Contexts retrieved for the answer
|
| 41 |
+
|
| 42 |
+
Returns:
|
| 43 |
+
Dictionary of quality metrics
|
| 44 |
+
"""
|
| 45 |
+
metrics = {}
|
| 46 |
+
|
| 47 |
+
# 1. Semantic Similarity to Expected Answer
|
| 48 |
+
gen_embedding = self.sentence_model.encode([generated_answer])
|
| 49 |
+
exp_embedding = self.sentence_model.encode([expected_answer])
|
| 50 |
+
metrics['semantic_similarity'] = cosine_similarity(gen_embedding, exp_embedding)[0][0]
|
| 51 |
+
|
| 52 |
+
# 2. Answer Relevance to Question
|
| 53 |
+
q_embedding = self.sentence_model.encode([question])
|
| 54 |
+
a_embedding = self.sentence_model.encode([generated_answer])
|
| 55 |
+
metrics['answer_relevance'] = cosine_similarity(q_embedding, a_embedding)[0][0]
|
| 56 |
+
|
| 57 |
+
# 3. Faithfulness (grounding in retrieved context)
|
| 58 |
+
metrics['faithfulness'] = self._calculate_faithfulness(generated_answer, retrieved_contexts)
|
| 59 |
+
|
| 60 |
+
# 4. Completeness Assessment
|
| 61 |
+
metrics['completeness'] = self._assess_completeness(question, generated_answer, expected_answer)
|
| 62 |
+
|
| 63 |
+
# 5. Safety Appropriateness
|
| 64 |
+
metrics['safety_appropriateness'] = self._check_safety_appropriateness(question, generated_answer)
|
| 65 |
+
|
| 66 |
+
# 6. Technical Accuracy
|
| 67 |
+
metrics['technical_accuracy'] = self._assess_technical_accuracy(generated_answer, retrieved_contexts)
|
| 68 |
+
|
| 69 |
+
# 7. Clarity and Actionability
|
| 70 |
+
metrics['clarity'] = self._assess_clarity(generated_answer)
|
| 71 |
+
metrics['actionability'] = self._assess_actionability(question, generated_answer)
|
| 72 |
+
|
| 73 |
+
return metrics
|
| 74 |
+
|
| 75 |
+
def _calculate_faithfulness(self, answer: str, contexts: List[str]) -> float:
|
| 76 |
+
"""Calculate how well the answer is grounded in the retrieved contexts"""
|
| 77 |
+
if not contexts:
|
| 78 |
+
return 0.0
|
| 79 |
+
|
| 80 |
+
answer_sentences = nltk.sent_tokenize(answer)
|
| 81 |
+
supported_sentences = 0
|
| 82 |
+
|
| 83 |
+
for sentence in answer_sentences:
|
| 84 |
+
sentence_embedding = self.sentence_model.encode([sentence])
|
| 85 |
+
max_similarity = 0
|
| 86 |
+
|
| 87 |
+
for context in contexts:
|
| 88 |
+
context_embedding = self.sentence_model.encode([context])
|
| 89 |
+
similarity = cosine_similarity(sentence_embedding, context_embedding)[0][0]
|
| 90 |
+
max_similarity = max(max_similarity, similarity)
|
| 91 |
+
|
| 92 |
+
if max_similarity > 0.7:
|
| 93 |
+
supported_sentences += 1
|
| 94 |
+
|
| 95 |
+
return supported_sentences / len(answer_sentences) if answer_sentences else 0.0
|
| 96 |
+
|
| 97 |
+
def _assess_completeness(self, question: str, generated_answer: str, expected_answer: str) -> float:
|
| 98 |
+
"""Assess if the generated answer covers all aspects of the expected answer"""
|
| 99 |
+
expected_words = set(expected_answer.lower().split())
|
| 100 |
+
generated_words = set(generated_answer.lower().split())
|
| 101 |
+
|
| 102 |
+
stop_words = set(['the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by'])
|
| 103 |
+
expected_words -= stop_words
|
| 104 |
+
generated_words -= stop_words
|
| 105 |
+
|
| 106 |
+
if not expected_words:
|
| 107 |
+
return 1.0
|
| 108 |
+
|
| 109 |
+
overlap = len(expected_words.intersection(generated_words))
|
| 110 |
+
return overlap / len(expected_words)
|
| 111 |
+
|
| 112 |
+
def _check_safety_appropriateness(self, question: str, answer: str) -> float:
|
| 113 |
+
"""Check if safety-critical information is handled appropriately"""
|
| 114 |
+
safety_keywords = ['brake', 'airbag', 'emergency', 'warning', 'danger', 'caution', 'safety', 'speed', 'steering']
|
| 115 |
+
|
| 116 |
+
question_lower = question.lower()
|
| 117 |
+
answer_lower = answer.lower()
|
| 118 |
+
|
| 119 |
+
is_safety_related = any(keyword in question_lower for keyword in safety_keywords)
|
| 120 |
+
|
| 121 |
+
if not is_safety_related:
|
| 122 |
+
return 1.0
|
| 123 |
+
|
| 124 |
+
safety_indicators = ['warning', 'caution', 'important', 'ensure', 'never', 'always', 'must']
|
| 125 |
+
has_safety_language = any(indicator in answer_lower for indicator in safety_indicators)
|
| 126 |
+
|
| 127 |
+
return 1.0 if has_safety_language else 0.5
|
| 128 |
+
|
| 129 |
+
def _assess_technical_accuracy(self, answer: str, contexts: List[str]) -> float:
|
| 130 |
+
"""Assess technical accuracy based on context alignment"""
|
| 131 |
+
if not contexts:
|
| 132 |
+
return 0.5
|
| 133 |
+
|
| 134 |
+
answer_embedding = self.sentence_model.encode([answer])
|
| 135 |
+
context_embeddings = self.sentence_model.encode(contexts)
|
| 136 |
+
|
| 137 |
+
similarities = cosine_similarity(answer_embedding, context_embeddings)[0]
|
| 138 |
+
return np.mean(similarities)
|
| 139 |
+
|
| 140 |
+
def _assess_clarity(self, answer: str) -> float:
|
| 141 |
+
"""Assess clarity of the answer"""
|
| 142 |
+
sentences = nltk.sent_tokenize(answer)
|
| 143 |
+
|
| 144 |
+
if not sentences:
|
| 145 |
+
return 0.0
|
| 146 |
+
|
| 147 |
+
avg_sentence_length = np.mean([len(sentence.split()) for sentence in sentences])
|
| 148 |
+
length_score = min(1.0, 15.0 / avg_sentence_length) if avg_sentence_length > 0 else 0.0
|
| 149 |
+
|
| 150 |
+
structure_indicators = ['step', 'first', 'second', 'then', 'next', 'finally', '1.', '2.']
|
| 151 |
+
has_structure = any(indicator in answer.lower() for indicator in structure_indicators)
|
| 152 |
+
structure_score = 1.0 if has_structure else 0.7
|
| 153 |
+
|
| 154 |
+
return (length_score + structure_score) / 2
|
| 155 |
+
|
| 156 |
+
def _assess_actionability(self, question: str, answer: str) -> float:
|
| 157 |
+
"""Assess if the answer provides actionable information"""
|
| 158 |
+
question_lower = question.lower()
|
| 159 |
+
answer_lower = answer.lower()
|
| 160 |
+
|
| 161 |
+
if 'how to' in question_lower or 'how do' in question_lower:
|
| 162 |
+
action_indicators = ['press', 'turn', 'select', 'push', 'pull', 'set', 'adjust', 'follow', 'ensure']
|
| 163 |
+
has_actions = any(indicator in answer_lower for indicator in action_indicators)
|
| 164 |
+
return 1.0 if has_actions else 0.3
|
| 165 |
+
|
| 166 |
+
return 0.8
|
| 167 |
+
|
| 168 |
+
def generate_evaluation_report(self) -> str:
|
| 169 |
+
"""Generate comprehensive evaluation report"""
|
| 170 |
+
if not self.evaluation_results:
|
| 171 |
+
return "No evaluation results available. Run evaluation first."
|
| 172 |
+
|
| 173 |
+
df = pd.DataFrame(self.evaluation_results)
|
| 174 |
+
|
| 175 |
+
# Overall metrics
|
| 176 |
+
overall_metrics = {
|
| 177 |
+
'semantic_similarity': df['semantic_similarity'].mean(),
|
| 178 |
+
'answer_relevance': df['answer_relevance'].mean(),
|
| 179 |
+
'faithfulness': df['faithfulness'].mean(),
|
| 180 |
+
'completeness': df['completeness'].mean(),
|
| 181 |
+
'safety_appropriateness': df['safety_appropriateness'].mean(),
|
| 182 |
+
'technical_accuracy': df['technical_accuracy'].mean(),
|
| 183 |
+
'clarity': df['clarity'].mean(),
|
| 184 |
+
'actionability': df['actionability'].mean()
|
| 185 |
+
}
|
| 186 |
+
|
| 187 |
+
# Performance by question type
|
| 188 |
+
type_performance = df.groupby('question_type')[list(overall_metrics.keys())].mean()
|
| 189 |
+
|
| 190 |
+
# Performance by difficulty
|
| 191 |
+
difficulty_performance = df.groupby('difficulty')[list(overall_metrics.keys())].mean()
|
| 192 |
+
|
| 193 |
+
# Generate report
|
| 194 |
+
report = f"""
|
| 195 |
+
# RAG System Comprehensive Evaluation Report
|
| 196 |
+
Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
|
| 197 |
+
|
| 198 |
+
## Overall Performance Metrics
|
| 199 |
+
{'-' * 40}
|
| 200 |
+
{'Metric':<25} {'Score':<10} {'Interpretation':<30}
|
| 201 |
+
{'-' * 40}
|
| 202 |
+
{'Semantic Similarity':<25} {overall_metrics['semantic_similarity']:.3f} {'Answer matches expected content'}
|
| 203 |
+
{'Answer Relevance':<25} {overall_metrics['answer_relevance']:.3f} {'Answer addresses the question'}
|
| 204 |
+
{'Faithfulness':<25} {overall_metrics['faithfulness']:.3f} {'Answer is grounded in context'}
|
| 205 |
+
{'Completeness':<25} {overall_metrics['completeness']:.3f} {'Answer covers all aspects'}
|
| 206 |
+
{'Safety Appropriateness':<25} {overall_metrics['safety_appropriateness']:.3f} {'Safety info handled properly'}
|
| 207 |
+
{'Technical Accuracy':<25} {overall_metrics['technical_accuracy']:.3f} {'Technically correct information'}
|
| 208 |
+
{'Clarity':<25} {overall_metrics['clarity']:.3f} {'Clear and understandable'}
|
| 209 |
+
{'Actionability':<25} {overall_metrics['actionability']:.3f} {'Provides actionable guidance'}
|
| 210 |
+
{'-' * 40}
|
| 211 |
+
|
| 212 |
+
## Performance by Question Type
|
| 213 |
+
{type_performance.round(3)}
|
| 214 |
+
|
| 215 |
+
## Performance by Difficulty Level
|
| 216 |
+
{difficulty_performance.round(3)}
|
| 217 |
+
"""
|
| 218 |
+
|
| 219 |
+
return report
|
| 220 |
+
|
| 221 |
+
|
| 222 |
+
|
| 223 |
+
|
| 224 |
+
|
| 225 |
+
|
| 226 |
+
|
src/gradio_interface.py
ADDED
|
@@ -0,0 +1,745 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Gradio Interface Module
|
| 3 |
+
Creates the main Gradio web interface for the RAG system
|
| 4 |
+
"""
|
| 5 |
+
import gradio as gr
|
| 6 |
+
from typing import Optional
|
| 7 |
+
from .rag_query import RAGQueryEngine
|
| 8 |
+
from .question_generator import QuestionGenerator
|
| 9 |
+
from .knowledge_graph import KnowledgeGraphGenerator
|
| 10 |
+
from .config import Config
|
| 11 |
+
|
| 12 |
+
# Import cold start onboarding functions if available
|
| 13 |
+
try:
|
| 14 |
+
from modules.cold_start_onboarding import check_and_show_onboarding
|
| 15 |
+
COLD_START_AVAILABLE = True
|
| 16 |
+
except ImportError:
|
| 17 |
+
COLD_START_AVAILABLE = False
|
| 18 |
+
def check_and_show_onboarding(user_profiling, user_id):
|
| 19 |
+
"""Fallback function if module not available"""
|
| 20 |
+
if not user_profiling:
|
| 21 |
+
return False
|
| 22 |
+
return user_profiling.is_cold_start(user_id)
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
class GradioInterfaceBuilder:
|
| 26 |
+
"""Builds the Gradio interface for the RAG system"""
|
| 27 |
+
|
| 28 |
+
def __init__(self, rag_engine: RAGQueryEngine, question_generator: QuestionGenerator,
|
| 29 |
+
knowledge_graph: KnowledgeGraphGenerator, config: Config,
|
| 30 |
+
user_profiling=None, adaptive_engine=None, proactive_engine=None, enhanced_rag_engine=None):
|
| 31 |
+
self.rag_engine = rag_engine
|
| 32 |
+
self.question_generator = question_generator
|
| 33 |
+
self.knowledge_graph = knowledge_graph
|
| 34 |
+
self.config = config
|
| 35 |
+
self.user_profiling = user_profiling
|
| 36 |
+
self.adaptive_engine = adaptive_engine
|
| 37 |
+
self.proactive_engine = proactive_engine
|
| 38 |
+
self.enhanced_rag_engine = enhanced_rag_engine # Enhanced RAG engine (scenario feature)
|
| 39 |
+
|
| 40 |
+
def create_interface(self):
|
| 41 |
+
"""Create the main Gradio interface"""
|
| 42 |
+
with gr.Blocks(title="Mercedes E-class ADAS Manual Interface") as demo:
|
| 43 |
+
gr.Markdown("# π Mercedes E-class ADAS Manual Interface")
|
| 44 |
+
gr.Markdown("Ask questions, explore knowledge maps, and test your understanding!")
|
| 45 |
+
|
| 46 |
+
# Create tabs with proper order: Setup -> Ask Questions -> Knowledge Map -> Test -> Personalized Learning
|
| 47 |
+
# Ask Questions is set as default selected tab
|
| 48 |
+
with gr.Tabs(selected=1 if self.user_profiling else 0) as tabs:
|
| 49 |
+
# Tab 1: Setup (Cold Start/Onboarding) - only shown if user_profiling is available
|
| 50 |
+
if self.user_profiling:
|
| 51 |
+
with gr.TabItem("Setup"):
|
| 52 |
+
self._create_onboarding_tab()
|
| 53 |
+
|
| 54 |
+
# Tab 2: Ask Questions (Default selected tab)
|
| 55 |
+
with gr.TabItem("Ask Questions"):
|
| 56 |
+
self._create_qa_tab()
|
| 57 |
+
|
| 58 |
+
# Tab 3: Knowledge Map
|
| 59 |
+
with gr.TabItem("Knowledge Map"):
|
| 60 |
+
self._create_knowledge_map_tab()
|
| 61 |
+
|
| 62 |
+
# Tab 4: Test Your Knowledge
|
| 63 |
+
with gr.TabItem("Test Your Knowledge"):
|
| 64 |
+
self._create_test_tab()
|
| 65 |
+
|
| 66 |
+
# Tab 5: Personalized Learning Path (if available)
|
| 67 |
+
if self.adaptive_engine:
|
| 68 |
+
with gr.TabItem("Personalized Learning"):
|
| 69 |
+
self._create_learning_path_tab()
|
| 70 |
+
|
| 71 |
+
return demo
|
| 72 |
+
|
| 73 |
+
def _create_qa_tab(self):
|
| 74 |
+
"""Create the Q&A tab"""
|
| 75 |
+
gr.Markdown("Ask questions about your car's advanced driver assistance systems")
|
| 76 |
+
|
| 77 |
+
# User ID input (if user profiling is available)
|
| 78 |
+
user_id_input = None
|
| 79 |
+
if self.user_profiling:
|
| 80 |
+
with gr.Row():
|
| 81 |
+
user_id_input = gr.Textbox(
|
| 82 |
+
label="User ID",
|
| 83 |
+
placeholder="Enter your user ID (e.g., default_user)",
|
| 84 |
+
value="default_user",
|
| 85 |
+
scale=3
|
| 86 |
+
)
|
| 87 |
+
load_suggestions_btn = gr.Button("π‘ Get Suggestions", variant="secondary", scale=1)
|
| 88 |
+
|
| 89 |
+
# Prompt suggestions area
|
| 90 |
+
suggestions_container = gr.Column(visible=False)
|
| 91 |
+
suggestions_display = None
|
| 92 |
+
refresh_suggestions_btn = None
|
| 93 |
+
cancel_suggestions_btn = None
|
| 94 |
+
regenerate_suggestions_btn = None
|
| 95 |
+
if self.proactive_engine:
|
| 96 |
+
with suggestions_container:
|
| 97 |
+
gr.Markdown("### π‘ Suggested Questions for You:")
|
| 98 |
+
suggestions_display = gr.HTML()
|
| 99 |
+
with gr.Row():
|
| 100 |
+
refresh_suggestions_btn = gr.Button("π Refresh Suggestions", variant="secondary", size="sm")
|
| 101 |
+
cancel_suggestions_btn = gr.Button("βΉοΈ Stop", variant="stop", size="sm")
|
| 102 |
+
regenerate_suggestions_btn = gr.Button("π Regenerate", variant="secondary", size="sm")
|
| 103 |
+
|
| 104 |
+
with gr.Row():
|
| 105 |
+
query_input = gr.Textbox(
|
| 106 |
+
lines=2,
|
| 107 |
+
placeholder="Enter your question here...",
|
| 108 |
+
label="Your Question"
|
| 109 |
+
)
|
| 110 |
+
|
| 111 |
+
with gr.Row():
|
| 112 |
+
submit_btn = gr.Button("Get Answer", variant="primary")
|
| 113 |
+
cancel_answer_btn = gr.Button("βΉοΈ Stop", variant="stop")
|
| 114 |
+
regenerate_answer_btn = gr.Button("π Regenerate", variant="secondary")
|
| 115 |
+
|
| 116 |
+
with gr.Column():
|
| 117 |
+
answer_output = gr.Markdown(label="Answer")
|
| 118 |
+
footnotes_output = gr.Markdown(label="Sources")
|
| 119 |
+
|
| 120 |
+
# Scenario contextualization area (collapsible)
|
| 121 |
+
scenarios_container = gr.Column(visible=False)
|
| 122 |
+
with scenarios_container:
|
| 123 |
+
scenarios_header = gr.Markdown("### π― Related Scenarios")
|
| 124 |
+
scenarios_display = gr.HTML()
|
| 125 |
+
|
| 126 |
+
# Follow-up questions area
|
| 127 |
+
followup_container = gr.Column(visible=False)
|
| 128 |
+
cancel_followup_btn = None
|
| 129 |
+
regenerate_followup_btn = None
|
| 130 |
+
if self.proactive_engine:
|
| 131 |
+
with followup_container:
|
| 132 |
+
gr.Markdown("### π‘ Want to learn more? Try these questions:")
|
| 133 |
+
followup_questions_display = gr.HTML()
|
| 134 |
+
with gr.Row():
|
| 135 |
+
cancel_followup_btn = gr.Button("βΉοΈ Stop", variant="stop", size="sm")
|
| 136 |
+
regenerate_followup_btn = gr.Button("π Regenerate", variant="secondary", size="sm")
|
| 137 |
+
else:
|
| 138 |
+
followup_questions_display = gr.HTML()
|
| 139 |
+
|
| 140 |
+
def process_query(query, user_id="default_user"):
|
| 141 |
+
"""Process query and generate follow-up questions"""
|
| 142 |
+
# Use enhanced RAG engine if available, otherwise use standard
|
| 143 |
+
if self.enhanced_rag_engine:
|
| 144 |
+
try:
|
| 145 |
+
enhanced_answer = self.enhanced_rag_engine.query(query, user_id=user_id)
|
| 146 |
+
answer = enhanced_answer.answer
|
| 147 |
+
footnotes = enhanced_answer.sources
|
| 148 |
+
scenarios_html = enhanced_answer.scenarios_html
|
| 149 |
+
show_scenarios = enhanced_answer.scenario_count > 0
|
| 150 |
+
except Exception as e:
|
| 151 |
+
print(f"β οΈ Error in enhanced RAG engine: {e}, falling back to standard")
|
| 152 |
+
answer, footnotes = self.rag_engine.query(query)
|
| 153 |
+
scenarios_html = ""
|
| 154 |
+
show_scenarios = False
|
| 155 |
+
else:
|
| 156 |
+
answer, footnotes = self.rag_engine.query(query)
|
| 157 |
+
scenarios_html = ""
|
| 158 |
+
show_scenarios = False
|
| 159 |
+
|
| 160 |
+
# Update user profile with question
|
| 161 |
+
if self.user_profiling and user_id:
|
| 162 |
+
try:
|
| 163 |
+
self.user_profiling.update_from_question(user_id, query)
|
| 164 |
+
except Exception as e:
|
| 165 |
+
print(f"Error updating user profile: {e}")
|
| 166 |
+
|
| 167 |
+
# Generate follow-up questions
|
| 168 |
+
followup_html = ""
|
| 169 |
+
followup_visible = False
|
| 170 |
+
if self.proactive_engine and user_id:
|
| 171 |
+
try:
|
| 172 |
+
followup_questions = self.proactive_engine.get_follow_up_questions(
|
| 173 |
+
user_id, answer, max_questions=5
|
| 174 |
+
)
|
| 175 |
+
if followup_questions:
|
| 176 |
+
followup_visible = True
|
| 177 |
+
followup_html = "<div style='margin-top: 15px;'>"
|
| 178 |
+
for i, q_data in enumerate(followup_questions, 1):
|
| 179 |
+
question = q_data.get("question", "")
|
| 180 |
+
bloom_level = q_data.get("bloom_level", "")
|
| 181 |
+
# Escape quotes for JavaScript
|
| 182 |
+
question_escaped = question.replace("'", "\\'").replace('"', '\\"')
|
| 183 |
+
followup_html += f"""
|
| 184 |
+
<div style='margin: 10px 0; padding: 12px; background-color: #f5f5f5; border-radius: 5px; border-left: 3px solid #4CAF50; display: flex; justify-content: space-between; align-items: center;'>
|
| 185 |
+
<div style='flex: 1;'>
|
| 186 |
+
<div style='font-weight: 500; margin-bottom: 4px;'>{question}</div>
|
| 187 |
+
<small style='color: #666;'>Bloom Level: {bloom_level.title()}</small>
|
| 188 |
+
</div>
|
| 189 |
+
<button onclick="document.querySelector('textarea[label=\\'Your Question\\']').value='{question_escaped}'; this.style.backgroundColor='#4CAF50'; this.style.color='white';"
|
| 190 |
+
style='margin-left: 15px; padding: 8px 16px; background-color: #2196F3; color: white; border: none; border-radius: 3px; cursor: pointer; white-space: nowrap;'>
|
| 191 |
+
Use
|
| 192 |
+
</button>
|
| 193 |
+
</div>
|
| 194 |
+
"""
|
| 195 |
+
followup_html += "</div>"
|
| 196 |
+
except Exception as e:
|
| 197 |
+
print(f"Error generating follow-up questions: {e}")
|
| 198 |
+
|
| 199 |
+
# Prepare return values
|
| 200 |
+
outputs = [answer, footnotes]
|
| 201 |
+
|
| 202 |
+
# Add scenarios output
|
| 203 |
+
if self.enhanced_rag_engine:
|
| 204 |
+
outputs.append(gr.update(visible=show_scenarios))
|
| 205 |
+
outputs.append(scenarios_html if scenarios_html else "")
|
| 206 |
+
|
| 207 |
+
# Add follow-up questions output
|
| 208 |
+
if self.proactive_engine:
|
| 209 |
+
outputs.append(gr.update(visible=followup_visible))
|
| 210 |
+
outputs.append(followup_html)
|
| 211 |
+
|
| 212 |
+
return tuple(outputs)
|
| 213 |
+
|
| 214 |
+
def load_suggestions(user_id="default_user"):
|
| 215 |
+
"""Load prompt suggestions"""
|
| 216 |
+
if not self.proactive_engine or not user_id:
|
| 217 |
+
return gr.update(visible=False), ""
|
| 218 |
+
|
| 219 |
+
try:
|
| 220 |
+
suggestions = self.proactive_engine.get_prompt_suggestions(user_id, max_suggestions=5)
|
| 221 |
+
if not suggestions:
|
| 222 |
+
return gr.update(visible=False), ""
|
| 223 |
+
|
| 224 |
+
suggestions_html = "<div style='margin-top: 10px;'>"
|
| 225 |
+
for i, suggestion in enumerate(suggestions, 1):
|
| 226 |
+
question = suggestion.get("question", "")
|
| 227 |
+
reason = suggestion.get("reason", "")
|
| 228 |
+
priority = suggestion.get("priority", "low")
|
| 229 |
+
priority_color = {"high": "#f44336", "medium": "#ff9800", "low": "#4CAF50"}.get(priority, "#666")
|
| 230 |
+
|
| 231 |
+
# Escape quotes for JavaScript
|
| 232 |
+
question_escaped = question.replace("'", "\\'").replace('"', '\\"')
|
| 233 |
+
suggestions_html += f"""
|
| 234 |
+
<div style='margin: 10px 0; padding: 12px; background-color: #f9f9f9; border-radius: 5px; border-left: 4px solid {priority_color};'>
|
| 235 |
+
<div style='display: flex; justify-content: space-between; align-items: start;'>
|
| 236 |
+
<div style='flex: 1;'>
|
| 237 |
+
<strong style='color: #333;'>{i}. {question}</strong>
|
| 238 |
+
<br><small style='color: #666;'>{reason}</small>
|
| 239 |
+
</div>
|
| 240 |
+
<button onclick="document.querySelector('textarea[label=\\'Your Question\\']').value='{question_escaped}'; this.style.backgroundColor='#4CAF50'; this.style.color='white';"
|
| 241 |
+
style='margin-left: 10px; padding: 8px 15px; background-color: #2196F3; color: white; border: none; border-radius: 3px; cursor: pointer; white-space: nowrap;'>
|
| 242 |
+
Use
|
| 243 |
+
</button>
|
| 244 |
+
</div>
|
| 245 |
+
</div>
|
| 246 |
+
"""
|
| 247 |
+
suggestions_html += "</div>"
|
| 248 |
+
|
| 249 |
+
return gr.update(visible=True), suggestions_html
|
| 250 |
+
except Exception as e:
|
| 251 |
+
print(f"Error loading suggestions: {e}")
|
| 252 |
+
return gr.update(visible=False), ""
|
| 253 |
+
|
| 254 |
+
# Set up event handlers
|
| 255 |
+
if self.proactive_engine and user_id_input and suggestions_display:
|
| 256 |
+
# Suggestions event handlers
|
| 257 |
+
suggestions_event = load_suggestions_btn.click(
|
| 258 |
+
load_suggestions,
|
| 259 |
+
inputs=[user_id_input],
|
| 260 |
+
outputs=[suggestions_container, suggestions_display]
|
| 261 |
+
)
|
| 262 |
+
if refresh_suggestions_btn:
|
| 263 |
+
refresh_suggestions_btn.click(
|
| 264 |
+
load_suggestions,
|
| 265 |
+
inputs=[user_id_input],
|
| 266 |
+
outputs=[suggestions_container, suggestions_display]
|
| 267 |
+
)
|
| 268 |
+
if regenerate_suggestions_btn:
|
| 269 |
+
regenerate_suggestions_btn.click(
|
| 270 |
+
load_suggestions,
|
| 271 |
+
inputs=[user_id_input],
|
| 272 |
+
outputs=[suggestions_container, suggestions_display]
|
| 273 |
+
)
|
| 274 |
+
if cancel_suggestions_btn:
|
| 275 |
+
cancel_suggestions_btn.click(fn=None, cancels=suggestions_event)
|
| 276 |
+
|
| 277 |
+
# Build outputs list for query
|
| 278 |
+
outputs_list = [answer_output, footnotes_output]
|
| 279 |
+
if self.enhanced_rag_engine:
|
| 280 |
+
outputs_list.extend([scenarios_container, scenarios_display])
|
| 281 |
+
outputs_list.extend([followup_container, followup_questions_display])
|
| 282 |
+
|
| 283 |
+
# Query event handlers
|
| 284 |
+
query_event = submit_btn.click(
|
| 285 |
+
process_query,
|
| 286 |
+
inputs=[query_input, user_id_input],
|
| 287 |
+
outputs=outputs_list
|
| 288 |
+
)
|
| 289 |
+
regenerate_answer_btn.click(
|
| 290 |
+
process_query,
|
| 291 |
+
inputs=[query_input, user_id_input],
|
| 292 |
+
outputs=outputs_list
|
| 293 |
+
)
|
| 294 |
+
if cancel_answer_btn:
|
| 295 |
+
cancel_answer_btn.click(fn=None, cancels=query_event)
|
| 296 |
+
|
| 297 |
+
# Follow-up questions event handlers (regenerate only, cancel is handled by query cancel)
|
| 298 |
+
if self.proactive_engine and regenerate_followup_btn:
|
| 299 |
+
def regenerate_followup(query, user_id, answer_text):
|
| 300 |
+
"""Regenerate follow-up questions based on the current answer"""
|
| 301 |
+
if not self.proactive_engine or not user_id or not answer_text:
|
| 302 |
+
return gr.update(visible=False), ""
|
| 303 |
+
|
| 304 |
+
try:
|
| 305 |
+
followup_questions = self.proactive_engine.get_follow_up_questions(
|
| 306 |
+
user_id, answer_text, max_questions=5
|
| 307 |
+
)
|
| 308 |
+
if followup_questions:
|
| 309 |
+
followup_html = "<div style='margin-top: 15px;'>"
|
| 310 |
+
for i, q_data in enumerate(followup_questions, 1):
|
| 311 |
+
question = q_data.get("question", "")
|
| 312 |
+
bloom_level = q_data.get("bloom_level", "")
|
| 313 |
+
question_escaped = question.replace("'", "\\'").replace('"', '\\"')
|
| 314 |
+
followup_html += f"""
|
| 315 |
+
<div style='margin: 10px 0; padding: 12px; background-color: #f5f5f5; border-radius: 5px; border-left: 3px solid #4CAF50; display: flex; justify-content: space-between; align-items: center;'>
|
| 316 |
+
<div style='flex: 1;'>
|
| 317 |
+
<div style='font-weight: 500; margin-bottom: 4px;'>{question}</div>
|
| 318 |
+
<small style='color: #666;'>Bloom Level: {bloom_level.title()}</small>
|
| 319 |
+
</div>
|
| 320 |
+
<button onclick="document.querySelector('textarea[label=\\'Your Question\\']').value='{question_escaped}'; this.style.backgroundColor='#4CAF50'; this.style.color='white';"
|
| 321 |
+
style='margin-left: 15px; padding: 8px 16px; background-color: #2196F3; color: white; border: none; border-radius: 3px; cursor: pointer; white-space: nowrap;'>
|
| 322 |
+
Use
|
| 323 |
+
</button>
|
| 324 |
+
</div>
|
| 325 |
+
"""
|
| 326 |
+
followup_html += "</div>"
|
| 327 |
+
return gr.update(visible=True), followup_html
|
| 328 |
+
else:
|
| 329 |
+
return gr.update(visible=False), ""
|
| 330 |
+
except Exception as e:
|
| 331 |
+
print(f"Error regenerating follow-up questions: {e}")
|
| 332 |
+
return gr.update(visible=False), ""
|
| 333 |
+
|
| 334 |
+
followup_event = regenerate_followup_btn.click(
|
| 335 |
+
regenerate_followup,
|
| 336 |
+
inputs=[query_input, user_id_input, answer_output],
|
| 337 |
+
outputs=[followup_container, followup_questions_display]
|
| 338 |
+
)
|
| 339 |
+
if cancel_followup_btn:
|
| 340 |
+
cancel_followup_btn.click(fn=None, cancels=followup_event)
|
| 341 |
+
else:
|
| 342 |
+
# Build outputs list (must match process_query return values)
|
| 343 |
+
outputs_list = [answer_output, footnotes_output]
|
| 344 |
+
if self.enhanced_rag_engine:
|
| 345 |
+
outputs_list.extend([scenarios_container, scenarios_display])
|
| 346 |
+
if self.proactive_engine:
|
| 347 |
+
outputs_list.extend([followup_container, followup_questions_display])
|
| 348 |
+
|
| 349 |
+
query_event = submit_btn.click(
|
| 350 |
+
process_query,
|
| 351 |
+
inputs=[query_input],
|
| 352 |
+
outputs=outputs_list
|
| 353 |
+
)
|
| 354 |
+
regenerate_answer_btn.click(
|
| 355 |
+
process_query,
|
| 356 |
+
inputs=[query_input],
|
| 357 |
+
outputs=outputs_list
|
| 358 |
+
)
|
| 359 |
+
if cancel_answer_btn:
|
| 360 |
+
cancel_answer_btn.click(fn=None, cancels=query_event)
|
| 361 |
+
|
| 362 |
+
def _create_knowledge_map_tab(self):
|
| 363 |
+
"""Create the knowledge map tab"""
|
| 364 |
+
gr.Markdown("## π Car Manual Knowledge Map")
|
| 365 |
+
gr.Markdown("This visualization shows how different concepts in the car manual are related.")
|
| 366 |
+
|
| 367 |
+
knowledge_map_img = gr.Image(
|
| 368 |
+
value=str(self.config.output_dir / "knowledge_graph.png"),
|
| 369 |
+
label="Knowledge Graph"
|
| 370 |
+
)
|
| 371 |
+
|
| 372 |
+
gr.Markdown("## π₯ Document Similarity Heatmap")
|
| 373 |
+
gr.Markdown("This heatmap shows how similar different ADAS features are to each other.")
|
| 374 |
+
|
| 375 |
+
similarity_heatmap_img = gr.Image(
|
| 376 |
+
value=str(self.config.output_dir / "similarity_heatmap.png"),
|
| 377 |
+
label="Similarity Heatmap"
|
| 378 |
+
)
|
| 379 |
+
|
| 380 |
+
with gr.Row():
|
| 381 |
+
refresh_btn = gr.Button("π Refresh Visualizations", variant="secondary")
|
| 382 |
+
|
| 383 |
+
def refresh_images():
|
| 384 |
+
graph_path, heatmap_path = self.knowledge_graph.generate_visualizations()
|
| 385 |
+
return graph_path, heatmap_path
|
| 386 |
+
|
| 387 |
+
refresh_btn.click(
|
| 388 |
+
refresh_images,
|
| 389 |
+
inputs=[],
|
| 390 |
+
outputs=[knowledge_map_img, similarity_heatmap_img]
|
| 391 |
+
)
|
| 392 |
+
|
| 393 |
+
def _create_test_tab(self):
|
| 394 |
+
"""Create the test tab"""
|
| 395 |
+
gr.Markdown("## π Test Your Understanding of Mercedes E-class ADAS")
|
| 396 |
+
gr.Markdown("Select a topic to test your knowledge with multiple-choice questions based on Bloom's taxonomy levels.")
|
| 397 |
+
|
| 398 |
+
topic_files = self.rag_engine.get_files_from_vector_store()
|
| 399 |
+
|
| 400 |
+
with gr.Row():
|
| 401 |
+
test_questions = gr.State(None)
|
| 402 |
+
current_level_idx = gr.State(0)
|
| 403 |
+
selected_topic = gr.State(None)
|
| 404 |
+
test_results = gr.State([])
|
| 405 |
+
|
| 406 |
+
topic_dropdown = gr.Dropdown(
|
| 407 |
+
label="Select a Topic",
|
| 408 |
+
choices=topic_files,
|
| 409 |
+
value=topic_files[0] if topic_files else None,
|
| 410 |
+
interactive=True
|
| 411 |
+
)
|
| 412 |
+
|
| 413 |
+
start_test_btn = gr.Button("Start Test", variant="primary")
|
| 414 |
+
|
| 415 |
+
# Progress indicator
|
| 416 |
+
with gr.Column(visible=False) as progress_container:
|
| 417 |
+
progress_html = gr.HTML()
|
| 418 |
+
|
| 419 |
+
# Test container
|
| 420 |
+
with gr.Column(visible=False) as test_container:
|
| 421 |
+
taxonomy_level = gr.Markdown("Level: Remember")
|
| 422 |
+
level_description = gr.Markdown()
|
| 423 |
+
question_display = gr.Markdown()
|
| 424 |
+
|
| 425 |
+
option_radio = gr.Radio(
|
| 426 |
+
choices=["A", "B", "C", "D"],
|
| 427 |
+
label="Select your answer",
|
| 428 |
+
interactive=True
|
| 429 |
+
)
|
| 430 |
+
|
| 431 |
+
submit_answer_btn = gr.Button("Submit Answer", variant="primary")
|
| 432 |
+
feedback_display = gr.Markdown(visible=False)
|
| 433 |
+
next_question_btn = gr.Button("Next Question", visible=False)
|
| 434 |
+
show_summary_btn = gr.Button("Show Summary", visible=False)
|
| 435 |
+
|
| 436 |
+
# Summary container
|
| 437 |
+
with gr.Column(visible=False) as summary_container:
|
| 438 |
+
summary_topic = gr.Markdown()
|
| 439 |
+
summary_results = gr.HTML()
|
| 440 |
+
summary_recommendation = gr.Markdown()
|
| 441 |
+
restart_btn = gr.Button("Start Another Test")
|
| 442 |
+
|
| 443 |
+
# Connect handlers (simplified - full implementation would include all handlers)
|
| 444 |
+
# This is a placeholder structure - full implementation would be quite long
|
| 445 |
+
|
| 446 |
+
def _create_onboarding_tab(self):
|
| 447 |
+
"""Create onboarding tab for cold start"""
|
| 448 |
+
gr.Markdown("## π― Welcome! Let's Get Started")
|
| 449 |
+
gr.Markdown("Complete your profile to get a personalized learning experience.")
|
| 450 |
+
|
| 451 |
+
if not self.user_profiling:
|
| 452 |
+
gr.Markdown("β οΈ Personalized Learning System not initialized.")
|
| 453 |
+
return
|
| 454 |
+
|
| 455 |
+
user_id_input = gr.Textbox(
|
| 456 |
+
label="User ID",
|
| 457 |
+
placeholder="Enter your user ID",
|
| 458 |
+
value="default_user"
|
| 459 |
+
)
|
| 460 |
+
|
| 461 |
+
with gr.Accordion("π Step 1: Background Information", open=True):
|
| 462 |
+
background_input = gr.Radio(
|
| 463 |
+
label="What's your experience with ADAS systems?",
|
| 464 |
+
choices=[
|
| 465 |
+
("Beginner - I'm new to ADAS systems", "beginner"),
|
| 466 |
+
("Intermediate - I know some basics", "intermediate"),
|
| 467 |
+
("Experienced - I have good knowledge", "experienced")
|
| 468 |
+
],
|
| 469 |
+
value="beginner"
|
| 470 |
+
)
|
| 471 |
+
|
| 472 |
+
with gr.Accordion("π¨ Step 2: Learning Preferences", open=True):
|
| 473 |
+
learning_style_input = gr.Radio(
|
| 474 |
+
label="How do you prefer to learn?",
|
| 475 |
+
choices=[
|
| 476 |
+
("Visual - I like diagrams and illustrations", "visual"),
|
| 477 |
+
("Textual - I prefer reading and explanations", "textual"),
|
| 478 |
+
("Practical - I learn by doing", "practical"),
|
| 479 |
+
("Mixed - I like a combination", "mixed")
|
| 480 |
+
],
|
| 481 |
+
value="mixed"
|
| 482 |
+
)
|
| 483 |
+
|
| 484 |
+
learning_pace_input = gr.Radio(
|
| 485 |
+
label="What's your preferred learning pace?",
|
| 486 |
+
choices=[
|
| 487 |
+
("Slow - I like to take my time", "slow"),
|
| 488 |
+
("Medium - Normal pace is fine", "medium"),
|
| 489 |
+
("Fast - I want to learn quickly", "fast")
|
| 490 |
+
],
|
| 491 |
+
value="medium"
|
| 492 |
+
)
|
| 493 |
+
|
| 494 |
+
with gr.Accordion("π― Step 3: Learning Goals", open=True):
|
| 495 |
+
learning_goals_input = gr.CheckboxGroup(
|
| 496 |
+
label="What are your learning goals?",
|
| 497 |
+
choices=[
|
| 498 |
+
"Understand basic ADAS functions",
|
| 499 |
+
"Learn how to operate ADAS features",
|
| 500 |
+
"Master advanced ADAS capabilities",
|
| 501 |
+
"Troubleshoot ADAS issues",
|
| 502 |
+
"Prepare for certification",
|
| 503 |
+
"General knowledge improvement"
|
| 504 |
+
],
|
| 505 |
+
value=["Understand basic ADAS functions"]
|
| 506 |
+
)
|
| 507 |
+
|
| 508 |
+
with gr.Accordion("π Step 4: Initial Knowledge Assessment", open=True):
|
| 509 |
+
gr.Markdown("Rate your familiarity with each topic (0 = No knowledge, 1 = Expert)")
|
| 510 |
+
|
| 511 |
+
knowledge_sliders = {}
|
| 512 |
+
for topic in self.config.available_topics:
|
| 513 |
+
display_name = topic.replace("Function of ", "").replace(" Assist", "")
|
| 514 |
+
knowledge_sliders[topic] = gr.Slider(
|
| 515 |
+
label=display_name,
|
| 516 |
+
minimum=0.0,
|
| 517 |
+
maximum=1.0,
|
| 518 |
+
value=0.0,
|
| 519 |
+
step=0.1
|
| 520 |
+
)
|
| 521 |
+
|
| 522 |
+
submit_btn = gr.Button("Complete Setup", variant="primary")
|
| 523 |
+
output_result = gr.JSON(label="Setup Result")
|
| 524 |
+
|
| 525 |
+
def submit_onboarding(user_id, background, learning_style, learning_pace,
|
| 526 |
+
learning_goals, *knowledge_values):
|
| 527 |
+
"""Submit cold start data"""
|
| 528 |
+
if not self.user_profiling:
|
| 529 |
+
return {"status": "error", "message": "System not initialized"}
|
| 530 |
+
|
| 531 |
+
# Convert knowledge_values tuple to dictionary
|
| 532 |
+
knowledge_survey = {}
|
| 533 |
+
for i, topic in enumerate(self.config.available_topics):
|
| 534 |
+
if i < len(knowledge_values):
|
| 535 |
+
knowledge_survey[topic] = knowledge_values[i]
|
| 536 |
+
else:
|
| 537 |
+
knowledge_survey[topic] = 0.0
|
| 538 |
+
|
| 539 |
+
# Handle tuple values from Radio components
|
| 540 |
+
if isinstance(background, tuple):
|
| 541 |
+
background = background[1] if len(background) > 1 else background[0]
|
| 542 |
+
if isinstance(learning_style, tuple):
|
| 543 |
+
learning_style = learning_style[1] if len(learning_style) > 1 else learning_style[0]
|
| 544 |
+
if isinstance(learning_pace, tuple):
|
| 545 |
+
learning_pace = learning_pace[1] if len(learning_pace) > 1 else learning_pace[0]
|
| 546 |
+
|
| 547 |
+
onboarding_data = {
|
| 548 |
+
'learning_style': learning_style,
|
| 549 |
+
'learning_pace': learning_pace,
|
| 550 |
+
'background_experience': background,
|
| 551 |
+
'learning_goals': learning_goals if learning_goals else [],
|
| 552 |
+
'initial_knowledge_survey': knowledge_survey,
|
| 553 |
+
'initial_assessment_completed': True
|
| 554 |
+
}
|
| 555 |
+
|
| 556 |
+
try:
|
| 557 |
+
profile = self.user_profiling.complete_onboarding(user_id, onboarding_data)
|
| 558 |
+
return {
|
| 559 |
+
"status": "success",
|
| 560 |
+
"message": f"Onboarding completed for {user_id}",
|
| 561 |
+
"profile_summary": self.user_profiling.get_profile_summary(user_id)
|
| 562 |
+
}
|
| 563 |
+
except Exception as e:
|
| 564 |
+
import traceback
|
| 565 |
+
error_details = traceback.format_exc()
|
| 566 |
+
return {"status": "error", "message": f"Error: {str(e)}\nDetails: {error_details}"}
|
| 567 |
+
|
| 568 |
+
inputs = [user_id_input, background_input, learning_style_input,
|
| 569 |
+
learning_pace_input, learning_goals_input] + list(knowledge_sliders.values())
|
| 570 |
+
submit_btn.click(submit_onboarding, inputs=inputs, outputs=output_result)
|
| 571 |
+
|
| 572 |
+
def _create_learning_path_tab(self):
|
| 573 |
+
"""Create personalized learning path tab"""
|
| 574 |
+
gr.Markdown("## πΊοΈ Your Personalized Learning Journey")
|
| 575 |
+
gr.Markdown("Get a customized learning path based on your knowledge profile.")
|
| 576 |
+
|
| 577 |
+
if not self.adaptive_engine or not self.user_profiling:
|
| 578 |
+
gr.Markdown("β οΈ Personalized Learning System not initialized.")
|
| 579 |
+
return
|
| 580 |
+
|
| 581 |
+
# User ID input
|
| 582 |
+
with gr.Row():
|
| 583 |
+
user_id_input = gr.Textbox(
|
| 584 |
+
label="User ID",
|
| 585 |
+
placeholder="Enter your user ID",
|
| 586 |
+
value="default_user"
|
| 587 |
+
)
|
| 588 |
+
load_profile_btn = gr.Button("Load My Profile", variant="primary")
|
| 589 |
+
|
| 590 |
+
# User profile display
|
| 591 |
+
with gr.Column(visible=False) as profile_container:
|
| 592 |
+
profile_summary = gr.Markdown()
|
| 593 |
+
|
| 594 |
+
with gr.Row():
|
| 595 |
+
with gr.Column():
|
| 596 |
+
gr.Markdown("### π Knowledge Profile")
|
| 597 |
+
knowledge_level_display = gr.JSON()
|
| 598 |
+
|
| 599 |
+
with gr.Column():
|
| 600 |
+
gr.Markdown("### π Learning Statistics")
|
| 601 |
+
learning_stats = gr.JSON()
|
| 602 |
+
|
| 603 |
+
# Learning path generation
|
| 604 |
+
with gr.Row():
|
| 605 |
+
focus_areas_input = gr.CheckboxGroup(
|
| 606 |
+
label="Focus Areas (Optional)",
|
| 607 |
+
choices=self.config.available_topics,
|
| 608 |
+
value=[],
|
| 609 |
+
interactive=True
|
| 610 |
+
)
|
| 611 |
+
generate_path_btn = gr.Button("Generate Learning Path", variant="primary")
|
| 612 |
+
|
| 613 |
+
# Learning path display
|
| 614 |
+
with gr.Column(visible=False) as path_container:
|
| 615 |
+
gr.Markdown("### πΊοΈ Your Learning Path")
|
| 616 |
+
path_progress = gr.HTML()
|
| 617 |
+
path_visualization = gr.HTML()
|
| 618 |
+
|
| 619 |
+
with gr.Row():
|
| 620 |
+
with gr.Column():
|
| 621 |
+
current_node_info = gr.Markdown()
|
| 622 |
+
recommendations_display = gr.JSON()
|
| 623 |
+
|
| 624 |
+
def check_and_show_onboarding_wrapper(user_id):
|
| 625 |
+
"""Check if user needs onboarding"""
|
| 626 |
+
if not user_id:
|
| 627 |
+
return False
|
| 628 |
+
return check_and_show_onboarding(self.user_profiling, user_id)
|
| 629 |
+
|
| 630 |
+
def load_user_profile(user_id):
|
| 631 |
+
"""Load the user profile"""
|
| 632 |
+
if not self.user_profiling or not user_id:
|
| 633 |
+
return (gr.update(visible=False), "", {}, {}, [])
|
| 634 |
+
|
| 635 |
+
if check_and_show_onboarding_wrapper(user_id):
|
| 636 |
+
return (
|
| 637 |
+
gr.update(visible=False),
|
| 638 |
+
f"## β οΈ Onboarding Required\n\nPlease complete onboarding first.",
|
| 639 |
+
{},
|
| 640 |
+
{},
|
| 641 |
+
self.config.available_topics
|
| 642 |
+
)
|
| 643 |
+
|
| 644 |
+
profile = self.user_profiling.get_or_create_profile(user_id)
|
| 645 |
+
summary = self.user_profiling.get_profile_summary(user_id)
|
| 646 |
+
|
| 647 |
+
summary_text = f"""
|
| 648 |
+
### π€ User Profile: {user_id}
|
| 649 |
+
|
| 650 |
+
**Learning Style:** {summary['learning_style'].title()}
|
| 651 |
+
**Learning Pace:** {summary['learning_pace'].title()}
|
| 652 |
+
**Overall Progress:** {summary['overall_progress']:.1%}
|
| 653 |
+
**Total Questions:** {summary['total_questions']}
|
| 654 |
+
**Total Tests:** {summary['total_tests']}
|
| 655 |
+
|
| 656 |
+
**Strong Areas:** {', '.join(summary['strong_areas']) if summary['strong_areas'] else 'None'}
|
| 657 |
+
**Weak Areas:** {', '.join(summary['weak_areas']) if summary['weak_areas'] else 'None'}
|
| 658 |
+
"""
|
| 659 |
+
|
| 660 |
+
knowledge_data = summary['knowledge_level'] or {"No topics learned yet": 0.0}
|
| 661 |
+
stats_data = {
|
| 662 |
+
"Total Questions": summary['total_questions'],
|
| 663 |
+
"Total Tests": summary['total_tests'],
|
| 664 |
+
"Preferred Topics": summary['preferred_topics'][:5] if summary['preferred_topics'] else [],
|
| 665 |
+
"Overall Progress": f"{summary['overall_progress']:.1%}"
|
| 666 |
+
}
|
| 667 |
+
|
| 668 |
+
return (
|
| 669 |
+
gr.update(visible=True),
|
| 670 |
+
summary_text,
|
| 671 |
+
knowledge_data,
|
| 672 |
+
stats_data,
|
| 673 |
+
self.config.available_topics
|
| 674 |
+
)
|
| 675 |
+
|
| 676 |
+
def generate_learning_path(user_id, focus_areas):
|
| 677 |
+
"""Generate learning paths"""
|
| 678 |
+
if not self.adaptive_engine or not user_id:
|
| 679 |
+
return (gr.update(visible=False), "β οΈ System not initialized.", "", "", {})
|
| 680 |
+
|
| 681 |
+
if check_and_show_onboarding_wrapper(user_id):
|
| 682 |
+
return (gr.update(visible=False), "β οΈ Please complete onboarding first.", "", "", {})
|
| 683 |
+
|
| 684 |
+
path = self.adaptive_engine.create_or_update_path(user_id, focus_areas if focus_areas else None)
|
| 685 |
+
|
| 686 |
+
progress_html = f"""
|
| 687 |
+
<div style="width:100%; background-color:#f0f0f0; border-radius:5px; overflow:hidden; margin:20px 0;">
|
| 688 |
+
<div style="width:{path.completion_percentage*100}%; background-color:#4CAF50; height:30px; border-radius:5px; display:flex; align-items:center; justify-content:center; color:white; font-weight:bold;">
|
| 689 |
+
{path.completion_percentage*100:.1f}% Complete
|
| 690 |
+
</div>
|
| 691 |
+
</div>
|
| 692 |
+
<p><strong>Total Nodes:</strong> {len(path.nodes)} | <strong>Completed:</strong> {sum(1 for n in path.nodes if n.status == 'completed')} | <strong>Estimated Time:</strong> {path.estimated_total_time} minutes</p>
|
| 693 |
+
"""
|
| 694 |
+
|
| 695 |
+
path_html = "<div style='margin:20px 0;'><h4>Learning Path:</h4><div style='display:flex; flex-direction:column; gap:10px;'>"
|
| 696 |
+
for i, node in enumerate(path.nodes):
|
| 697 |
+
status_color = {"completed": "#4CAF50", "in_progress": "#2196F3", "pending": "#9E9E9E", "skipped": "#FF9800"}.get(node.status, "#9E9E9E")
|
| 698 |
+
is_current = i == path.current_node_index
|
| 699 |
+
highlight = "border: 3px solid #FF5722; padding: 10px;" if is_current else "padding: 10px;"
|
| 700 |
+
path_html += f"""
|
| 701 |
+
<div style='{highlight} background-color:white; border-left: 5px solid {status_color}; border-radius:5px; margin:5px 0;'>
|
| 702 |
+
<div style='display:flex; justify-content:space-between; align-items:center;'>
|
| 703 |
+
<div><strong>{node.topic}</strong> - {node.bloom_level.title()} ({node.content_type})<br><small>Difficulty: {node.difficulty:.2f} | Time: {node.estimated_time} min</small></div>
|
| 704 |
+
<div style='color:{status_color}; font-weight:bold;'>{node.status.title()}</div>
|
| 705 |
+
</div>
|
| 706 |
+
</div>
|
| 707 |
+
"""
|
| 708 |
+
path_html += "</div></div>"
|
| 709 |
+
|
| 710 |
+
if path.current_node_index < len(path.nodes):
|
| 711 |
+
current_node = path.nodes[path.current_node_index]
|
| 712 |
+
current_node_info_text = f"""
|
| 713 |
+
### Current Learning Node
|
| 714 |
+
|
| 715 |
+
**Topic:** {current_node.topic}
|
| 716 |
+
**Bloom Level:** {current_node.bloom_level.title()}
|
| 717 |
+
**Content Type:** {current_node.content_type.title()}
|
| 718 |
+
**Difficulty:** {current_node.difficulty:.2f}
|
| 719 |
+
**Estimated Time:** {current_node.estimated_time} minutes
|
| 720 |
+
"""
|
| 721 |
+
else:
|
| 722 |
+
current_node_info_text = "### Learning Path Complete! π"
|
| 723 |
+
|
| 724 |
+
recommendations = self.adaptive_engine.get_recommendations(user_id)
|
| 725 |
+
|
| 726 |
+
return (
|
| 727 |
+
gr.update(visible=True),
|
| 728 |
+
progress_html,
|
| 729 |
+
path_html,
|
| 730 |
+
current_node_info_text,
|
| 731 |
+
recommendations
|
| 732 |
+
)
|
| 733 |
+
|
| 734 |
+
load_profile_btn.click(
|
| 735 |
+
load_user_profile,
|
| 736 |
+
inputs=[user_id_input],
|
| 737 |
+
outputs=[profile_container, profile_summary, knowledge_level_display, learning_stats, focus_areas_input]
|
| 738 |
+
)
|
| 739 |
+
|
| 740 |
+
generate_path_btn.click(
|
| 741 |
+
generate_learning_path,
|
| 742 |
+
inputs=[user_id_input, focus_areas_input],
|
| 743 |
+
outputs=[path_container, path_progress, path_visualization, current_node_info, recommendations_display]
|
| 744 |
+
)
|
| 745 |
+
|
src/knowledge_graph.py
ADDED
|
@@ -0,0 +1,323 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Knowledge Graph Visualization Module
|
| 3 |
+
Creates knowledge maps and similarity heatmaps from document relationships
|
| 4 |
+
"""
|
| 5 |
+
import matplotlib.pyplot as plt
|
| 6 |
+
import networkx as nx
|
| 7 |
+
import numpy as np
|
| 8 |
+
import os
|
| 9 |
+
import re
|
| 10 |
+
import json
|
| 11 |
+
from typing import Tuple, Optional, Dict, List
|
| 12 |
+
from openai import OpenAI
|
| 13 |
+
from pathlib import Path
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
class KnowledgeGraphGenerator:
|
| 17 |
+
"""Generates knowledge graphs and visualizations"""
|
| 18 |
+
|
| 19 |
+
def __init__(self, client: OpenAI, vector_store_id: str, output_dir: str = "output"):
|
| 20 |
+
self.client = client
|
| 21 |
+
self.vector_store_id = vector_store_id
|
| 22 |
+
self.output_dir = Path(output_dir)
|
| 23 |
+
self.output_dir.mkdir(exist_ok=True)
|
| 24 |
+
|
| 25 |
+
def get_files_from_vector_store(self) -> List[str]:
|
| 26 |
+
"""Get list of files from vector store"""
|
| 27 |
+
try:
|
| 28 |
+
query = "List all documents in the manual"
|
| 29 |
+
response = self.client.responses.create(
|
| 30 |
+
input=query,
|
| 31 |
+
model="gpt-4o-mini",
|
| 32 |
+
tools=[{
|
| 33 |
+
"type": "file_search",
|
| 34 |
+
"vector_store_ids": [self.vector_store_id],
|
| 35 |
+
"max_num_results": 25
|
| 36 |
+
}]
|
| 37 |
+
)
|
| 38 |
+
|
| 39 |
+
file_list = []
|
| 40 |
+
if response and hasattr(response.output[1].content[0], 'annotations'):
|
| 41 |
+
annotations = response.output[1].content[0].annotations
|
| 42 |
+
file_list = list(set([annotation.filename for annotation in annotations]))
|
| 43 |
+
file_list = [f.replace('.pdf', '') for f in file_list]
|
| 44 |
+
file_list.sort()
|
| 45 |
+
|
| 46 |
+
return file_list
|
| 47 |
+
except Exception as e:
|
| 48 |
+
print(f"β Error getting files: {str(e)}")
|
| 49 |
+
return []
|
| 50 |
+
|
| 51 |
+
def extract_topics_from_content(self, file_list: List[str]) -> Tuple[Dict[str, List[str]], List[str]]:
|
| 52 |
+
"""Extract topics from document content using GPT"""
|
| 53 |
+
all_topics = set()
|
| 54 |
+
file_topics = {}
|
| 55 |
+
file_descriptions = {}
|
| 56 |
+
|
| 57 |
+
print("π Getting content descriptions for each file...")
|
| 58 |
+
|
| 59 |
+
# Get descriptions for each file
|
| 60 |
+
for file in file_list:
|
| 61 |
+
try:
|
| 62 |
+
query = f"What is the main purpose and key concepts covered in the document titled '{file}'? Be brief and focused on technical concepts."
|
| 63 |
+
response = self.client.responses.create(
|
| 64 |
+
input=query,
|
| 65 |
+
model="gpt-4o-mini",
|
| 66 |
+
tools=[{
|
| 67 |
+
"type": "file_search",
|
| 68 |
+
"vector_store_ids": [self.vector_store_id]
|
| 69 |
+
}]
|
| 70 |
+
)
|
| 71 |
+
|
| 72 |
+
if response and hasattr(response.output[1], 'content'):
|
| 73 |
+
description = response.output[1].content[0].text
|
| 74 |
+
file_descriptions[file] = description
|
| 75 |
+
print(f" β Got description for {file}")
|
| 76 |
+
else:
|
| 77 |
+
file_descriptions[file] = f"Information about {file}"
|
| 78 |
+
except Exception as e:
|
| 79 |
+
print(f" β οΈ Error getting description for {file}: {e}")
|
| 80 |
+
file_descriptions[file] = f"Information about {file}"
|
| 81 |
+
|
| 82 |
+
# Extract topics from descriptions
|
| 83 |
+
prompt = "Extract key technical concepts (single words or short phrases) from these document descriptions. Focus on functional concepts, components, and technologies.\n\n"
|
| 84 |
+
|
| 85 |
+
for file, desc in file_descriptions.items():
|
| 86 |
+
prompt += f"Document: {file}\nDescription: {desc}\n\n"
|
| 87 |
+
|
| 88 |
+
prompt += "\nFor each document, list 3-5 key technical concepts. Format as a JSON object where keys are document names and values are arrays of concepts."
|
| 89 |
+
|
| 90 |
+
try:
|
| 91 |
+
response = self.client.chat.completions.create(
|
| 92 |
+
model="gpt-4o",
|
| 93 |
+
messages=[
|
| 94 |
+
{"role": "system", "content": "You extract key technical concepts from document descriptions in a structured way."},
|
| 95 |
+
{"role": "user", "content": prompt}
|
| 96 |
+
],
|
| 97 |
+
temperature=0.3
|
| 98 |
+
)
|
| 99 |
+
|
| 100 |
+
topics_text = response.choices[0].message.content
|
| 101 |
+
json_match = re.search(r'\{.*\}', topics_text, re.DOTALL)
|
| 102 |
+
|
| 103 |
+
if json_match:
|
| 104 |
+
try:
|
| 105 |
+
file_topics = json.loads(json_match.group(0))
|
| 106 |
+
for topics in file_topics.values():
|
| 107 |
+
all_topics.update(topics)
|
| 108 |
+
print(f"β
Successfully extracted topics for {len(file_topics)} documents")
|
| 109 |
+
except json.JSONDecodeError:
|
| 110 |
+
print("β οΈ Error parsing JSON response, using fallback")
|
| 111 |
+
file_topics = self._create_fallback_topics(file_list)
|
| 112 |
+
else:
|
| 113 |
+
file_topics = self._create_fallback_topics(file_list)
|
| 114 |
+
except Exception as e:
|
| 115 |
+
print(f"β οΈ Error extracting topics: {e}, using fallback")
|
| 116 |
+
file_topics = self._create_fallback_topics(file_list)
|
| 117 |
+
|
| 118 |
+
# Ensure all files have topics
|
| 119 |
+
for file in file_list:
|
| 120 |
+
if file not in file_topics or not file_topics[file]:
|
| 121 |
+
words = [word for word in re.findall(r'\b[A-Za-z]{3,}\b', file)
|
| 122 |
+
if word.lower() not in ['the', 'and', 'for', 'with', 'function', 'of']]
|
| 123 |
+
file_topics[file] = words if words else ["Topic"]
|
| 124 |
+
|
| 125 |
+
return file_topics, list(all_topics)
|
| 126 |
+
|
| 127 |
+
def _create_fallback_topics(self, file_list: List[str]) -> Dict[str, List[str]]:
|
| 128 |
+
"""Create fallback topics from filenames"""
|
| 129 |
+
file_topics = {}
|
| 130 |
+
for file in file_list:
|
| 131 |
+
words = [word for word in re.findall(r'\b[A-Za-z]{3,}\b', file)
|
| 132 |
+
if word.lower() not in ['the', 'and', 'for', 'with', 'function', 'of']]
|
| 133 |
+
file_topics[file] = words if words else ["Topic"]
|
| 134 |
+
return file_topics
|
| 135 |
+
|
| 136 |
+
def analyze_document_relationships(self, file_list: List[str],
|
| 137 |
+
file_topics: Dict[str, List[str]]) -> np.ndarray:
|
| 138 |
+
"""Analyze relationships between documents based on topics"""
|
| 139 |
+
n = len(file_list)
|
| 140 |
+
similarity_matrix = np.zeros((n, n))
|
| 141 |
+
|
| 142 |
+
# Create topic vectors
|
| 143 |
+
all_topics = set()
|
| 144 |
+
for topics in file_topics.values():
|
| 145 |
+
all_topics.update(topics)
|
| 146 |
+
topic_list = list(all_topics)
|
| 147 |
+
|
| 148 |
+
# Create binary vectors for each document
|
| 149 |
+
topic_vectors = {}
|
| 150 |
+
for file in file_list:
|
| 151 |
+
vector = np.zeros(len(topic_list))
|
| 152 |
+
for i, topic in enumerate(topic_list):
|
| 153 |
+
if topic in file_topics.get(file, []):
|
| 154 |
+
vector[i] = 1
|
| 155 |
+
topic_vectors[file] = vector
|
| 156 |
+
|
| 157 |
+
# Calculate cosine similarity
|
| 158 |
+
for i, file1 in enumerate(file_list):
|
| 159 |
+
for j, file2 in enumerate(file_list):
|
| 160 |
+
if i == j:
|
| 161 |
+
similarity_matrix[i][j] = 1.0
|
| 162 |
+
else:
|
| 163 |
+
vec1 = topic_vectors.get(file1, np.zeros(len(topic_list)))
|
| 164 |
+
vec2 = topic_vectors.get(file2, np.zeros(len(topic_list)))
|
| 165 |
+
|
| 166 |
+
dot_product = np.dot(vec1, vec2)
|
| 167 |
+
norm1 = np.linalg.norm(vec1)
|
| 168 |
+
norm2 = np.linalg.norm(vec2)
|
| 169 |
+
|
| 170 |
+
if norm1 > 0 and norm2 > 0:
|
| 171 |
+
similarity_matrix[i][j] = dot_product / (norm1 * norm2)
|
| 172 |
+
|
| 173 |
+
return similarity_matrix
|
| 174 |
+
|
| 175 |
+
def create_knowledge_graph(self, file_list: List[str], file_topics: Dict[str, List[str]],
|
| 176 |
+
similarity_matrix: np.ndarray) -> nx.Graph:
|
| 177 |
+
"""Create knowledge graph from documents and topics"""
|
| 178 |
+
G = nx.Graph()
|
| 179 |
+
|
| 180 |
+
# Add document nodes
|
| 181 |
+
for file in file_list:
|
| 182 |
+
G.add_node(file, type='document', size=700)
|
| 183 |
+
|
| 184 |
+
# Add topic nodes and connections
|
| 185 |
+
for file, topics in file_topics.items():
|
| 186 |
+
for topic in topics:
|
| 187 |
+
if topic not in G:
|
| 188 |
+
G.add_node(topic, type='topic', size=500)
|
| 189 |
+
G.add_edge(file, topic, weight=3)
|
| 190 |
+
|
| 191 |
+
# Add edges between similar documents
|
| 192 |
+
for i, file1 in enumerate(file_list):
|
| 193 |
+
for j, file2 in enumerate(file_list):
|
| 194 |
+
if i < j:
|
| 195 |
+
sim = similarity_matrix[i][j]
|
| 196 |
+
if sim > 0.25:
|
| 197 |
+
G.add_edge(file1, file2, weight=sim * 5)
|
| 198 |
+
|
| 199 |
+
return G
|
| 200 |
+
|
| 201 |
+
def save_knowledge_graph(self, G: nx.Graph) -> str:
|
| 202 |
+
"""Save knowledge graph visualization"""
|
| 203 |
+
plt.figure(figsize=(16, 12))
|
| 204 |
+
|
| 205 |
+
pos = nx.kamada_kawai_layout(G)
|
| 206 |
+
|
| 207 |
+
document_nodes = [n for n, attr in G.nodes(data=True) if attr.get('type') == 'document']
|
| 208 |
+
topic_nodes = [n for n, attr in G.nodes(data=True) if attr.get('type') == 'topic']
|
| 209 |
+
|
| 210 |
+
edge_widths = [G[u][v].get('weight', 1) * 0.6 for u, v in G.edges()]
|
| 211 |
+
|
| 212 |
+
nx.draw_networkx_nodes(G, pos, nodelist=document_nodes, node_color='#5B9BD5',
|
| 213 |
+
node_size=800, alpha=0.8)
|
| 214 |
+
nx.draw_networkx_nodes(G, pos, nodelist=topic_nodes, node_color='#70AD47',
|
| 215 |
+
node_size=600, alpha=0.8)
|
| 216 |
+
nx.draw_networkx_edges(G, pos, width=edge_widths, alpha=0.7, edge_color='#A5A5A5')
|
| 217 |
+
|
| 218 |
+
# Create labels
|
| 219 |
+
doc_labels = {}
|
| 220 |
+
for node in document_nodes:
|
| 221 |
+
if len(node) > 20:
|
| 222 |
+
shortened = re.sub(r'(?:Function|Operating|Setting|Activating|Deactivating) of ', '', node)
|
| 223 |
+
shortened = re.sub(r' Assist', '', shortened)
|
| 224 |
+
if len(shortened) > 20:
|
| 225 |
+
shortened = shortened[:18] + '...'
|
| 226 |
+
doc_labels[node] = shortened
|
| 227 |
+
else:
|
| 228 |
+
doc_labels[node] = node
|
| 229 |
+
|
| 230 |
+
# Draw labels
|
| 231 |
+
for node, label in doc_labels.items():
|
| 232 |
+
x, y = pos[node]
|
| 233 |
+
plt.text(x, y, label, fontsize=9, fontweight='bold',
|
| 234 |
+
ha='center', va='center',
|
| 235 |
+
bbox=dict(facecolor='white', alpha=0.7, edgecolor='none', boxstyle='round,pad=0.3'))
|
| 236 |
+
|
| 237 |
+
for node in topic_nodes:
|
| 238 |
+
x, y = pos[node]
|
| 239 |
+
plt.text(x, y, node, fontsize=8, ha='center', va='center',
|
| 240 |
+
bbox=dict(facecolor='#E8F4E5', alpha=0.9, edgecolor='none', boxstyle='round,pad=0.2'))
|
| 241 |
+
|
| 242 |
+
plt.title("System Knowledge Map", fontsize=18)
|
| 243 |
+
plt.axis('off')
|
| 244 |
+
plt.tight_layout()
|
| 245 |
+
|
| 246 |
+
output_path = self.output_dir / "knowledge_graph.png"
|
| 247 |
+
plt.savefig(output_path, dpi=300, bbox_inches='tight')
|
| 248 |
+
plt.close()
|
| 249 |
+
|
| 250 |
+
print(f"β
Knowledge graph saved to {output_path}")
|
| 251 |
+
return str(output_path)
|
| 252 |
+
|
| 253 |
+
def save_similarity_heatmap(self, matrix: np.ndarray, labels: List[str]) -> str:
|
| 254 |
+
"""Save similarity heatmap"""
|
| 255 |
+
plt.figure(figsize=(12, 10))
|
| 256 |
+
|
| 257 |
+
plt.imshow(matrix, cmap='Blues')
|
| 258 |
+
plt.colorbar(label='Similarity')
|
| 259 |
+
|
| 260 |
+
# Shorten labels
|
| 261 |
+
shortened_labels = []
|
| 262 |
+
for label in labels:
|
| 263 |
+
if len(label) > 15:
|
| 264 |
+
shortened = re.sub(r'(?:Function|Operating|Setting|Activating|Deactivating) of ', '', label)
|
| 265 |
+
shortened = re.sub(r' Assist', '', shortened)
|
| 266 |
+
if len(shortened) > 15:
|
| 267 |
+
shortened = shortened[:13] + '...'
|
| 268 |
+
shortened_labels.append(shortened)
|
| 269 |
+
else:
|
| 270 |
+
shortened_labels.append(label)
|
| 271 |
+
|
| 272 |
+
plt.xticks(range(len(labels)), shortened_labels, rotation=45, ha='right')
|
| 273 |
+
plt.yticks(range(len(labels)), shortened_labels)
|
| 274 |
+
|
| 275 |
+
# Add similarity values
|
| 276 |
+
for i in range(len(labels)):
|
| 277 |
+
for j in range(len(labels)):
|
| 278 |
+
if i != j:
|
| 279 |
+
plt.text(j, i, f'{matrix[i, j]:.2f}',
|
| 280 |
+
ha="center", va="center",
|
| 281 |
+
color="white" if matrix[i, j] > 0.5 else "black")
|
| 282 |
+
|
| 283 |
+
plt.title("Document Similarity Heatmap", fontsize=16)
|
| 284 |
+
plt.tight_layout()
|
| 285 |
+
|
| 286 |
+
output_path = self.output_dir / "similarity_heatmap.png"
|
| 287 |
+
plt.savefig(output_path, dpi=300, bbox_inches='tight')
|
| 288 |
+
plt.close()
|
| 289 |
+
|
| 290 |
+
print(f"β
Similarity heatmap saved to {output_path}")
|
| 291 |
+
return str(output_path)
|
| 292 |
+
|
| 293 |
+
def generate_visualizations(self) -> Tuple[Optional[str], Optional[str]]:
|
| 294 |
+
"""Generate both knowledge graph and heatmap visualizations"""
|
| 295 |
+
print("π Generating knowledge graph visualizations...")
|
| 296 |
+
|
| 297 |
+
file_list = self.get_files_from_vector_store()
|
| 298 |
+
if not file_list:
|
| 299 |
+
print("β οΈ No files found. Cannot create knowledge map.")
|
| 300 |
+
return None, None
|
| 301 |
+
|
| 302 |
+
print("π Extracting topics from content...")
|
| 303 |
+
file_topics, all_topics = self.extract_topics_from_content(file_list)
|
| 304 |
+
|
| 305 |
+
print("π Analyzing document relationships...")
|
| 306 |
+
similarity_matrix = self.analyze_document_relationships(file_list, file_topics)
|
| 307 |
+
|
| 308 |
+
print("π¨ Creating knowledge graph...")
|
| 309 |
+
G = self.create_knowledge_graph(file_list, file_topics, similarity_matrix)
|
| 310 |
+
|
| 311 |
+
print("πΎ Saving visualizations...")
|
| 312 |
+
graph_path = self.save_knowledge_graph(G)
|
| 313 |
+
heatmap_path = self.save_similarity_heatmap(similarity_matrix, file_list)
|
| 314 |
+
|
| 315 |
+
print("β
Dynamic visualizations complete!")
|
| 316 |
+
return graph_path, heatmap_path
|
| 317 |
+
|
| 318 |
+
|
| 319 |
+
|
| 320 |
+
|
| 321 |
+
|
| 322 |
+
|
| 323 |
+
|
src/question_generator.py
ADDED
|
@@ -0,0 +1,161 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Question Generation Module
|
| 3 |
+
Generates multiple-choice questions based on Bloom's taxonomy
|
| 4 |
+
"""
|
| 5 |
+
import json
|
| 6 |
+
from typing import Dict, List
|
| 7 |
+
from openai import OpenAI
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
class QuestionGenerator:
|
| 11 |
+
"""Generates educational questions based on Bloom's taxonomy"""
|
| 12 |
+
|
| 13 |
+
def __init__(self, client: OpenAI, rag_query_engine):
|
| 14 |
+
self.client = client
|
| 15 |
+
self.rag_query_engine = rag_query_engine
|
| 16 |
+
|
| 17 |
+
self.blooms_levels = {
|
| 18 |
+
"remember": "generate questions that test basic recall of facts and information",
|
| 19 |
+
"understand": "generate questions that test explanation and interpretation of concepts",
|
| 20 |
+
"apply": "generate questions that test application of knowledge in practical situations",
|
| 21 |
+
"analyze": "generate questions that test analysis of relationships and structure",
|
| 22 |
+
"evaluate": "generate questions that test evaluation and judgment based on criteria",
|
| 23 |
+
"create": "generate questions that test creation of new ideas or solutions"
|
| 24 |
+
}
|
| 25 |
+
|
| 26 |
+
def generate_questions(self, topic_file: str) -> Dict[str, Dict]:
|
| 27 |
+
"""
|
| 28 |
+
Generate multiple-choice questions for all Bloom's taxonomy levels
|
| 29 |
+
|
| 30 |
+
Args:
|
| 31 |
+
topic_file: Name of the topic file (PDF)
|
| 32 |
+
|
| 33 |
+
Returns:
|
| 34 |
+
Dictionary mapping Bloom's level to question data
|
| 35 |
+
"""
|
| 36 |
+
topic_clean = topic_file.replace('.pdf', '')
|
| 37 |
+
|
| 38 |
+
# Get content about this topic
|
| 39 |
+
file_content_query = f"What are the key points covered in the document '{topic_clean}'?"
|
| 40 |
+
content_response, _ = self.rag_query_engine.query(file_content_query)
|
| 41 |
+
|
| 42 |
+
# Build prompt
|
| 43 |
+
prompt = self._build_question_prompt(topic_clean, content_response)
|
| 44 |
+
|
| 45 |
+
try:
|
| 46 |
+
response = self.client.chat.completions.create(
|
| 47 |
+
model="gpt-4o",
|
| 48 |
+
messages=[
|
| 49 |
+
{"role": "system", "content": "You are an expert in creating educational assessment materials for automotive systems."},
|
| 50 |
+
{"role": "user", "content": prompt}
|
| 51 |
+
],
|
| 52 |
+
temperature=0.4,
|
| 53 |
+
response_format={"type": "json_object"}
|
| 54 |
+
)
|
| 55 |
+
|
| 56 |
+
response_text = response.choices[0].message.content
|
| 57 |
+
|
| 58 |
+
try:
|
| 59 |
+
questions_data = json.loads(response_text)
|
| 60 |
+
question_dict = self._validate_and_format_questions(questions_data, topic_clean)
|
| 61 |
+
return question_dict
|
| 62 |
+
except json.JSONDecodeError as e:
|
| 63 |
+
print(f"β οΈ Error parsing JSON: {e}")
|
| 64 |
+
return self._create_fallback_questions(topic_clean)
|
| 65 |
+
except Exception as e:
|
| 66 |
+
print(f"β Error generating questions: {e}")
|
| 67 |
+
return self._create_fallback_questions(topic_clean)
|
| 68 |
+
|
| 69 |
+
def _build_question_prompt(self, topic_clean: str, content_response: str) -> str:
|
| 70 |
+
"""Build the prompt for question generation"""
|
| 71 |
+
prompt = f"""You are a tester trying to come up with multiple choice questions from system users based on the input car manuals.
|
| 72 |
+
You are trying to make it not tricky, but at the same time not too easy. However, users' understanding of the system is your utmost priority.
|
| 73 |
+
|
| 74 |
+
Create 1 multiple choice question based on the manual file about '{topic_clean}' for each of the following levels of Bloom's taxonomy:
|
| 75 |
+
- Remember: {self.blooms_levels['remember']}
|
| 76 |
+
- Understand: {self.blooms_levels['understand']}
|
| 77 |
+
- Apply: {self.blooms_levels['apply']}
|
| 78 |
+
- Analyze: {self.blooms_levels['analyze']}
|
| 79 |
+
- Evaluate: {self.blooms_levels['evaluate']}
|
| 80 |
+
- Create: {self.blooms_levels['create']}
|
| 81 |
+
|
| 82 |
+
Try to find the most important and insightful content for each question. Do note where the right answer in the manual file is located.
|
| 83 |
+
Separate the questions and explanations (i.e., only write all the explanations at the end).
|
| 84 |
+
Please do not generate questions that give varying numbers as answers. Test users' concepts and understanding of the vehicle system.
|
| 85 |
+
Make sure there are no questions with possibility of two correct answers.
|
| 86 |
+
|
| 87 |
+
Try to have a definitive right answer. Be slow and steady.
|
| 88 |
+
|
| 89 |
+
Here is the content from the manual:
|
| 90 |
+
{content_response}
|
| 91 |
+
|
| 92 |
+
Output your response as a clean JSON object with these fields for each question:
|
| 93 |
+
- level (string): the Bloom's taxonomy level
|
| 94 |
+
- question_text (string): the full question text
|
| 95 |
+
- options (array): four answer choices as strings
|
| 96 |
+
- correct_option_index (integer): index of the correct answer (0-3)
|
| 97 |
+
- explanation (string): explanation of why the correct answer is right
|
| 98 |
+
|
| 99 |
+
Example JSON format:
|
| 100 |
+
{{
|
| 101 |
+
"questions": [
|
| 102 |
+
{{
|
| 103 |
+
"level": "remember",
|
| 104 |
+
"question_text": "What does DISTRONIC stand for?",
|
| 105 |
+
"options": ["Distance Control", "Dynamic Intelligent Speed Tronic", "Direct Intelligence Control", "Digital Road Navigation Intelligence Control"],
|
| 106 |
+
"correct_option_index": 1,
|
| 107 |
+
"explanation": "DISTRONIC stands for Dynamic Intelligent Speed Tronic as stated in section 3.2 of the manual."
|
| 108 |
+
}}
|
| 109 |
+
]
|
| 110 |
+
}}
|
| 111 |
+
"""
|
| 112 |
+
return prompt
|
| 113 |
+
|
| 114 |
+
def _validate_and_format_questions(self, questions_data: Dict, topic_clean: str) -> Dict[str, Dict]:
|
| 115 |
+
"""Validate and format questions, ensuring all levels are present"""
|
| 116 |
+
expected_levels = ["remember", "understand", "apply", "analyze", "evaluate", "create"]
|
| 117 |
+
question_dict = {}
|
| 118 |
+
|
| 119 |
+
for q in questions_data.get("questions", []):
|
| 120 |
+
level = q.get("level", "").lower()
|
| 121 |
+
if level in expected_levels:
|
| 122 |
+
question_dict[level] = q
|
| 123 |
+
|
| 124 |
+
# Fill missing levels with fallback questions
|
| 125 |
+
for level in expected_levels:
|
| 126 |
+
if level not in question_dict:
|
| 127 |
+
print(f"β οΈ Missing question for level: {level}")
|
| 128 |
+
question_dict[level] = {
|
| 129 |
+
"level": level,
|
| 130 |
+
"question_text": f"Question for {level} level could not be generated.",
|
| 131 |
+
"options": ["Option A", "Option B", "Option C", "Option D"],
|
| 132 |
+
"correct_option_index": 0,
|
| 133 |
+
"explanation": "Please try again or select a different topic."
|
| 134 |
+
}
|
| 135 |
+
|
| 136 |
+
return question_dict
|
| 137 |
+
|
| 138 |
+
def _create_fallback_questions(self, topic_name: str) -> Dict[str, Dict]:
|
| 139 |
+
"""Create fallback questions when generation fails"""
|
| 140 |
+
fallback = {}
|
| 141 |
+
for level in ["remember", "understand", "apply", "analyze", "evaluate", "create"]:
|
| 142 |
+
fallback[level] = {
|
| 143 |
+
"level": level,
|
| 144 |
+
"question_text": f"What is a key feature of {topic_name}?",
|
| 145 |
+
"options": [
|
| 146 |
+
f"Option A about {topic_name}",
|
| 147 |
+
f"Option B about {topic_name}",
|
| 148 |
+
f"Option C about {topic_name}",
|
| 149 |
+
f"Option D about {topic_name}"
|
| 150 |
+
],
|
| 151 |
+
"correct_option_index": 0,
|
| 152 |
+
"explanation": f"This is a fallback question for the {level} level. Please try again or select a different topic."
|
| 153 |
+
}
|
| 154 |
+
return fallback
|
| 155 |
+
|
| 156 |
+
|
| 157 |
+
|
| 158 |
+
|
| 159 |
+
|
| 160 |
+
|
| 161 |
+
|
src/rag_query.py
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
RAG Query Module
|
| 3 |
+
Handles querying the RAG system and extracting answers with sources
|
| 4 |
+
"""
|
| 5 |
+
from typing import Tuple, Optional, List
|
| 6 |
+
from openai import OpenAI
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
class RAGQueryEngine:
|
| 10 |
+
"""Handles RAG queries with source attribution"""
|
| 11 |
+
|
| 12 |
+
def __init__(self, client: OpenAI, vector_store_id: str, model: str = "gpt-4o-mini"):
|
| 13 |
+
self.client = client
|
| 14 |
+
self.vector_store_id = vector_store_id
|
| 15 |
+
self.model = model
|
| 16 |
+
|
| 17 |
+
def get_response_from_vectorstore(self, query: str):
|
| 18 |
+
"""
|
| 19 |
+
Get response from vector store using OpenAI responses API
|
| 20 |
+
|
| 21 |
+
Args:
|
| 22 |
+
query: User query
|
| 23 |
+
|
| 24 |
+
Returns:
|
| 25 |
+
Response object or None if failed
|
| 26 |
+
"""
|
| 27 |
+
try:
|
| 28 |
+
response = self.client.responses.create(
|
| 29 |
+
input=query,
|
| 30 |
+
model=self.model,
|
| 31 |
+
tools=[{
|
| 32 |
+
"type": "file_search",
|
| 33 |
+
"vector_store_ids": [self.vector_store_id],
|
| 34 |
+
}]
|
| 35 |
+
)
|
| 36 |
+
|
| 37 |
+
# Check if response is valid
|
| 38 |
+
if response and hasattr(response.output[1], 'content'):
|
| 39 |
+
return response
|
| 40 |
+
else:
|
| 41 |
+
print("β οΈ Invalid response structure")
|
| 42 |
+
return None
|
| 43 |
+
except Exception as e:
|
| 44 |
+
print(f"β Error during API call: {e}")
|
| 45 |
+
return None
|
| 46 |
+
|
| 47 |
+
def query(self, query: str) -> Tuple[str, str]:
|
| 48 |
+
"""
|
| 49 |
+
Query the RAG model and return answer with sources
|
| 50 |
+
|
| 51 |
+
Args:
|
| 52 |
+
query: User query
|
| 53 |
+
|
| 54 |
+
Returns:
|
| 55 |
+
Tuple of (answer_text, footnotes)
|
| 56 |
+
"""
|
| 57 |
+
response = self.get_response_from_vectorstore(query)
|
| 58 |
+
|
| 59 |
+
if not response:
|
| 60 |
+
return "That question is outside my area of expertise.", ""
|
| 61 |
+
|
| 62 |
+
# Extract the answer text
|
| 63 |
+
answer_text = response.output[1].content[0].text
|
| 64 |
+
|
| 65 |
+
# Extract the source files
|
| 66 |
+
footnotes = ""
|
| 67 |
+
if hasattr(response.output[1].content[0], 'annotations'):
|
| 68 |
+
annotations = response.output[1].content[0].annotations
|
| 69 |
+
if annotations:
|
| 70 |
+
# Get unique source files
|
| 71 |
+
source_files = list(set([result.filename for result in annotations]))
|
| 72 |
+
|
| 73 |
+
# Format the footnotes
|
| 74 |
+
footnotes = "\n\nπ **Sources:**\n"
|
| 75 |
+
for i, filename in enumerate(source_files, 1):
|
| 76 |
+
# Remove the ".pdf" extension and format nicely
|
| 77 |
+
clean_name = filename.replace('.pdf', '')
|
| 78 |
+
footnotes += f"{i}. {clean_name}\n"
|
| 79 |
+
|
| 80 |
+
return answer_text, footnotes
|
| 81 |
+
|
| 82 |
+
def get_files_from_vector_store(self) -> List[str]:
|
| 83 |
+
"""
|
| 84 |
+
Get list of files in the vector store
|
| 85 |
+
|
| 86 |
+
Returns:
|
| 87 |
+
List of filenames
|
| 88 |
+
"""
|
| 89 |
+
try:
|
| 90 |
+
query = "List all documents about Mercedes E-class ADAS features"
|
| 91 |
+
response = self.get_response_from_vectorstore(query)
|
| 92 |
+
|
| 93 |
+
file_list = []
|
| 94 |
+
if response and hasattr(response.output[1].content[0], 'annotations'):
|
| 95 |
+
annotations = response.output[1].content[0].annotations
|
| 96 |
+
file_list = list(set([annotation.filename for annotation in annotations]))
|
| 97 |
+
file_list.sort()
|
| 98 |
+
|
| 99 |
+
# Fallback to default list if empty
|
| 100 |
+
if not file_list:
|
| 101 |
+
file_list = [
|
| 102 |
+
"Function of Active Distance Assist DISTRONIC.pdf",
|
| 103 |
+
"Function of Active Lane Change Assist.pdf",
|
| 104 |
+
"Function of Active Steering Assist.pdf",
|
| 105 |
+
"Function of Active Stop-and-Go Assist.pdf"
|
| 106 |
+
]
|
| 107 |
+
|
| 108 |
+
return file_list
|
| 109 |
+
except Exception as e:
|
| 110 |
+
print(f"β Error getting files: {str(e)}")
|
| 111 |
+
return []
|
| 112 |
+
|
| 113 |
+
|
| 114 |
+
|
| 115 |
+
|
| 116 |
+
|
| 117 |
+
|
| 118 |
+
|
src/vector_store.py
ADDED
|
@@ -0,0 +1,145 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Vector Store Management Module
|
| 3 |
+
Handles creation, file upload, and management of OpenAI vector stores
|
| 4 |
+
"""
|
| 5 |
+
from typing import Dict, List, Optional
|
| 6 |
+
from concurrent.futures import ThreadPoolExecutor
|
| 7 |
+
from tqdm import tqdm
|
| 8 |
+
import concurrent.futures
|
| 9 |
+
import os
|
| 10 |
+
from openai import OpenAI
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
class VectorStoreManager:
|
| 14 |
+
"""Manages OpenAI vector store operations"""
|
| 15 |
+
|
| 16 |
+
def __init__(self, client: OpenAI):
|
| 17 |
+
self.client = client
|
| 18 |
+
|
| 19 |
+
def create_vector_store(self, store_name: str) -> Optional[Dict]:
|
| 20 |
+
"""
|
| 21 |
+
Create a Vector Store on OpenAI's servers
|
| 22 |
+
|
| 23 |
+
Args:
|
| 24 |
+
store_name: Name for the vector store
|
| 25 |
+
|
| 26 |
+
Returns:
|
| 27 |
+
Dictionary with vector store details or None if failed
|
| 28 |
+
"""
|
| 29 |
+
try:
|
| 30 |
+
vector_store = self.client.vector_stores.create(name=store_name)
|
| 31 |
+
details = {
|
| 32 |
+
"id": vector_store.id,
|
| 33 |
+
"name": vector_store.name,
|
| 34 |
+
"created_at": vector_store.created_at,
|
| 35 |
+
"file_count": vector_store.file_counts.completed
|
| 36 |
+
}
|
| 37 |
+
print(f"β
Vector store created: {details}")
|
| 38 |
+
return details
|
| 39 |
+
except Exception as e:
|
| 40 |
+
print(f"β Error creating vector store: {e}")
|
| 41 |
+
return None
|
| 42 |
+
|
| 43 |
+
def upload_single_pdf(self, file_path: str, vector_store_id: str) -> Dict:
|
| 44 |
+
"""
|
| 45 |
+
Upload a single PDF file to the vector store
|
| 46 |
+
|
| 47 |
+
Args:
|
| 48 |
+
file_path: Path to the PDF file
|
| 49 |
+
vector_store_id: ID of the vector store
|
| 50 |
+
|
| 51 |
+
Returns:
|
| 52 |
+
Dictionary with upload status
|
| 53 |
+
"""
|
| 54 |
+
file_name = os.path.basename(file_path)
|
| 55 |
+
try:
|
| 56 |
+
# Create file
|
| 57 |
+
with open(file_path, 'rb') as f:
|
| 58 |
+
file_response = self.client.files.create(
|
| 59 |
+
file=f,
|
| 60 |
+
purpose="assistants"
|
| 61 |
+
)
|
| 62 |
+
|
| 63 |
+
# Attach to vector store
|
| 64 |
+
attach_response = self.client.vector_stores.files.create(
|
| 65 |
+
vector_store_id=vector_store_id,
|
| 66 |
+
file_id=file_response.id
|
| 67 |
+
)
|
| 68 |
+
return {"file": file_name, "status": "success"}
|
| 69 |
+
except Exception as e:
|
| 70 |
+
print(f"β Error uploading {file_name}: {str(e)}")
|
| 71 |
+
return {"file": file_name, "status": "failed", "error": str(e)}
|
| 72 |
+
|
| 73 |
+
def upload_pdf_files(self, pdf_files: List[str], vector_store_id: str,
|
| 74 |
+
max_workers: int = 10) -> Dict:
|
| 75 |
+
"""
|
| 76 |
+
Upload multiple PDF files to vector store in parallel
|
| 77 |
+
|
| 78 |
+
Args:
|
| 79 |
+
pdf_files: List of PDF file paths
|
| 80 |
+
vector_store_id: ID of the vector store
|
| 81 |
+
max_workers: Maximum number of parallel workers
|
| 82 |
+
|
| 83 |
+
Returns:
|
| 84 |
+
Dictionary with upload statistics
|
| 85 |
+
"""
|
| 86 |
+
stats = {
|
| 87 |
+
"total_files": len(pdf_files),
|
| 88 |
+
"successful_uploads": 0,
|
| 89 |
+
"failed_uploads": 0,
|
| 90 |
+
"errors": []
|
| 91 |
+
}
|
| 92 |
+
|
| 93 |
+
if not pdf_files:
|
| 94 |
+
print("β οΈ No PDF files to upload")
|
| 95 |
+
return stats
|
| 96 |
+
|
| 97 |
+
print(f"π€ Uploading {len(pdf_files)} PDF files in parallel...")
|
| 98 |
+
|
| 99 |
+
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
| 100 |
+
futures = {
|
| 101 |
+
executor.submit(self.upload_single_pdf, file_path, vector_store_id): file_path
|
| 102 |
+
for file_path in pdf_files
|
| 103 |
+
}
|
| 104 |
+
|
| 105 |
+
for future in tqdm(concurrent.futures.as_completed(futures),
|
| 106 |
+
total=len(pdf_files), desc="Uploading"):
|
| 107 |
+
result = future.result()
|
| 108 |
+
if result["status"] == "success":
|
| 109 |
+
stats["successful_uploads"] += 1
|
| 110 |
+
else:
|
| 111 |
+
stats["failed_uploads"] += 1
|
| 112 |
+
stats["errors"].append(result)
|
| 113 |
+
|
| 114 |
+
print(f"β
Upload complete: {stats['successful_uploads']}/{stats['total_files']} successful")
|
| 115 |
+
return stats
|
| 116 |
+
|
| 117 |
+
def search_vector_store(self, query: str, vector_store_id: str,
|
| 118 |
+
max_results: int = 10):
|
| 119 |
+
"""
|
| 120 |
+
Search the vector store directly
|
| 121 |
+
|
| 122 |
+
Args:
|
| 123 |
+
query: Search query
|
| 124 |
+
vector_store_id: ID of the vector store
|
| 125 |
+
max_results: Maximum number of results
|
| 126 |
+
|
| 127 |
+
Returns:
|
| 128 |
+
Search results
|
| 129 |
+
"""
|
| 130 |
+
try:
|
| 131 |
+
search_results = self.client.vector_stores.search(
|
| 132 |
+
vector_store_id=vector_store_id,
|
| 133 |
+
query=query
|
| 134 |
+
)
|
| 135 |
+
return search_results
|
| 136 |
+
except Exception as e:
|
| 137 |
+
print(f"β Error searching vector store: {e}")
|
| 138 |
+
return None
|
| 139 |
+
|
| 140 |
+
|
| 141 |
+
|
| 142 |
+
|
| 143 |
+
|
| 144 |
+
|
| 145 |
+
|