SyedFarooqAlii commited on
Commit
4711fe9
·
1 Parent(s): f309037
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. Dockerfile +3 -35
  2. agent.py +249 -0
  3. api.py +215 -0
  4. app/__init__.py +0 -3
  5. app/__pycache__/__init__.cpython-312.pyc +0 -0
  6. app/__pycache__/config.cpython-312.pyc +0 -0
  7. app/__pycache__/main.cpython-312.pyc +0 -0
  8. app/api/__pycache__/chat.cpython-312.pyc +0 -0
  9. app/api/__pycache__/health.cpython-312.pyc +0 -0
  10. app/api/__pycache__/ingest.cpython-312.pyc +0 -0
  11. app/api/chat.py +0 -249
  12. app/api/health.py +0 -160
  13. app/api/ingest.py +0 -316
  14. app/config.py +0 -45
  15. app/database/__pycache__/database.cpython-312.pyc +0 -0
  16. app/database/__pycache__/models.cpython-312.pyc +0 -0
  17. app/database/__pycache__/repositories.cpython-312.pyc +0 -0
  18. app/database/database.py +0 -56
  19. app/database/models.py +0 -138
  20. app/database/repositories.py +0 -217
  21. app/embeddings/__pycache__/minimal_embedding_generator.cpython-312.pyc +0 -0
  22. app/embeddings/minimal_embedding_generator.py +0 -79
  23. app/generation/__pycache__/response_generator.cpython-312.pyc +0 -0
  24. app/generation/response_generator.py +0 -387
  25. app/ingestion/__pycache__/chunker.cpython-312.pyc +0 -0
  26. app/ingestion/__pycache__/document_parser.cpython-312.pyc +0 -0
  27. app/ingestion/__pycache__/file_scanner.cpython-312.pyc +0 -0
  28. app/ingestion/chunker.py +0 -291
  29. app/ingestion/document_parser.py +0 -146
  30. app/ingestion/file_scanner.py +0 -92
  31. app/main.py +0 -44
  32. app/models/__pycache__/chat.cpython-312.pyc +0 -0
  33. app/models/chat.py +0 -69
  34. app/prompting/__pycache__/context_filter.cpython-312.pyc +0 -0
  35. app/prompting/__pycache__/prompt_builder.cpython-312.pyc +0 -0
  36. app/prompting/context_filter.py +0 -205
  37. app/prompting/prompt_builder.py +0 -187
  38. app/retrieval/__pycache__/retriever.cpython-312.pyc +0 -0
  39. app/retrieval/__pycache__/vector_search.cpython-312.pyc +0 -0
  40. app/retrieval/retriever.py +0 -149
  41. app/retrieval/vector_search.py +0 -103
  42. app/services/__pycache__/chat_service.cpython-312.pyc +0 -0
  43. app/services/__pycache__/gemini_client.cpython-312.pyc +0 -0
  44. app/services/__pycache__/openrouter_client.cpython-312.pyc +0 -0
  45. app/services/chat_service.py +0 -144
  46. app/services/openrouter_client.py +0 -165
  47. app/vector_store/__pycache__/qdrant_client.cpython-312.pyc +0 -0
  48. app/vector_store/__pycache__/vector_repository.cpython-312.pyc +0 -0
  49. app/vector_store/qdrant_client.py +0 -207
  50. app/vector_store/vector_repository.py +0 -49
Dockerfile CHANGED
@@ -1,44 +1,12 @@
1
- # Use Python 3.11 slim image as base
2
  FROM python:3.11-slim
3
 
4
- # Set environment variables
5
- ENV PYTHONDONTWRITEBYTECODE=1 \
6
- PYTHONUNBUFFERED=1 \
7
- PYTHONPATH=/app \
8
- PORT=7860
9
-
10
- # Set work directory
11
  WORKDIR /app
12
 
13
- # Install system dependencies
14
- RUN apt-get update \
15
- && apt-get install -y --no-install-recommends \
16
- build-essential \
17
- gcc \
18
- curl \
19
- && rm -rf /var/lib/apt/lists/*
20
-
21
- # Copy requirements first to leverage Docker cache
22
  COPY requirements.txt .
 
23
 
24
- # Install Python dependencies
25
- RUN pip install --no-cache-dir --upgrade pip \
26
- && pip install --no-cache-dir -r requirements.txt
27
-
28
- # Copy the rest of the application
29
  COPY . .
30
 
31
- # Create a non-root user and set permissions
32
- RUN adduser --disabled-password --gecos '' appuser \
33
- && chown -R appuser:appuser /app
34
- USER appuser
35
-
36
- # Expose port (Hugging Face typically uses port 7860 or 8080)
37
- EXPOSE $PORT
38
-
39
- # Health check endpoint
40
- HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \
41
- CMD curl -f http://localhost:$PORT/health || exit 1
42
 
43
- # Run the application with uvicorn directly for better production performance
44
- CMD ["sh", "-c", "uvicorn app.main:app --host 0.0.0.0 --port $PORT"]
 
 
1
  FROM python:3.11-slim
2
 
 
 
 
 
 
 
 
3
  WORKDIR /app
4
 
 
 
 
 
 
 
 
 
 
5
  COPY requirements.txt .
6
+ RUN pip install --no-cache-dir -r requirements.txt
7
 
 
 
 
 
 
8
  COPY . .
9
 
10
+ EXPOSE 7860
 
 
 
 
 
 
 
 
 
 
11
 
12
+ CMD ["uvicorn", "api:app", "--host", "0.0.0.0", "--port", "7860"]
 
agent.py ADDED
@@ -0,0 +1,249 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import logging
4
+ from typing import Dict, List, Any
5
+ from dotenv import load_dotenv
6
+ from agents import Agent, Runner
7
+ from agents import function_tool
8
+ import asyncio
9
+ import time
10
+
11
+ # Load environment variables
12
+ load_dotenv()
13
+
14
+ # Configure logging
15
+ logging.basicConfig(level=logging.INFO)
16
+ logger = logging.getLogger(__name__)
17
+
18
+ @function_tool
19
+ def retrieve_information(query: str) -> Dict:
20
+ """
21
+ Retrieve information from the knowledge base based on a query
22
+ """
23
+ from retrieving import RAGRetriever
24
+ retriever = RAGRetriever()
25
+
26
+ try:
27
+ # Call the existing retrieve method from the RAGRetriever instance
28
+ json_response = retriever.retrieve(query_text=query, top_k=5, threshold=0.3)
29
+ results = json.loads(json_response)
30
+
31
+ # Format the results for the assistant
32
+ formatted_results = []
33
+ for result in results.get('results', []):
34
+ formatted_results.append({
35
+ 'content': result['content'],
36
+ 'url': result['url'],
37
+ 'position': result['position'],
38
+ 'similarity_score': result['similarity_score']
39
+ })
40
+
41
+ return {
42
+ 'query': query,
43
+ 'retrieved_chunks': formatted_results,
44
+ 'total_results': len(formatted_results)
45
+ }
46
+ except Exception as e:
47
+ logger.error(f"Error in retrieve_information: {e}")
48
+ return {
49
+ 'query': query,
50
+ 'retrieved_chunks': [],
51
+ 'total_results': 0,
52
+ 'error': str(e)
53
+ }
54
+
55
+ class RAGAgent:
56
+ def __init__(self):
57
+ # Create the agent with retrieval tool using the new OpenAI Agents SDK
58
+ self.agent = Agent(
59
+ name="RAG Assistant",
60
+ instructions="You are a helpful assistant that answers questions based on retrieved documents. When asked a question, retrieve relevant documents first using the retrieve_information tool, then answer based on them. Always cite your sources and provide the information that was used to generate the answer.",
61
+ tools=[retrieve_information]
62
+ )
63
+
64
+ logger.info("RAG Agent initialized with OpenAI Agents SDK")
65
+
66
+ def query_agent(self, query_text: str) -> Dict:
67
+ """
68
+ Process a query through the RAG agent and return structured response
69
+ """
70
+ start_time = time.time()
71
+
72
+ logger.info(f"Processing query through RAG agent: '{query_text[:50]}...'")
73
+
74
+ try:
75
+ # Run the agent with the query using the new OpenAI Agents SDK
76
+ # Since Runner.run is async, we need to run it in an event loop
77
+ import asyncio
78
+ if asyncio.get_event_loop().is_running():
79
+ # If we're already in an event loop, we need to use a different approach
80
+ import concurrent.futures
81
+ with concurrent.futures.ThreadPoolExecutor() as executor:
82
+ future = executor.submit(asyncio.run, self._async_query_agent(query_text))
83
+ result = future.result()
84
+ else:
85
+ result = asyncio.run(self._async_query_agent(query_text))
86
+
87
+ return result
88
+
89
+ except Exception as e:
90
+ logger.error(f"Error processing query: {e}")
91
+ return {
92
+ "answer": "Sorry, I encountered an error processing your request.",
93
+ "sources": [],
94
+ "matched_chunks": [],
95
+ "error": str(e),
96
+ "query_time_ms": (time.time() - start_time) * 1000
97
+ }
98
+
99
+ async def _async_query_agent(self, query_text: str) -> Dict:
100
+ """
101
+ Internal async method to run the agent query
102
+ """
103
+ start_time = time.time()
104
+
105
+ try:
106
+ result = await Runner.run(self.agent, query_text)
107
+
108
+ # Extract the assistant's response
109
+ assistant_response = result.final_output
110
+
111
+ if not assistant_response:
112
+ return {
113
+ "answer": "Sorry, I couldn't generate a response.",
114
+ "sources": [],
115
+ "matched_chunks": [],
116
+ "error": "No response from assistant",
117
+ "query_time_ms": (time.time() - start_time) * 1000
118
+ }
119
+
120
+ # Extract sources and matched chunks from the tool calls
121
+ sources = set()
122
+ matched_chunks = []
123
+
124
+ # The new SDK might store tool call results differently
125
+ # Let's try to access them in the most likely way based on the documentation
126
+ if hasattr(result, 'final_output') and result.final_output:
127
+ # If the result contains tool call results in final_output
128
+ # For now, we'll rely on the agent's processing of the tool results
129
+ # The agent itself will incorporate the tool results into the final response
130
+ pass
131
+
132
+ # Calculate query time
133
+ query_time_ms = (time.time() - start_time) * 1000
134
+
135
+ # Format the response
136
+ # For the new SDK, we may need to extract the sources and chunks differently
137
+ # based on how the agent processes the tool results
138
+ response = {
139
+ "answer": str(assistant_response),
140
+ "sources": list(sources),
141
+ "matched_chunks": matched_chunks,
142
+ "query_time_ms": query_time_ms,
143
+ "confidence": self._calculate_confidence(matched_chunks)
144
+ }
145
+
146
+ logger.info(f"Query processed in {query_time_ms:.2f}ms")
147
+ return response
148
+
149
+ except Exception as e:
150
+ logger.error(f"Error in async query: {e}")
151
+ raise
152
+
153
+ def _calculate_confidence(self, matched_chunks: List[Dict]) -> str:
154
+ """
155
+ Calculate confidence level based on similarity scores and number of matches
156
+ """
157
+ if not matched_chunks:
158
+ return "low"
159
+
160
+ avg_score = sum(chunk.get('similarity_score', 0.0) for chunk in matched_chunks) / len(matched_chunks)
161
+
162
+ if avg_score >= 0.7:
163
+ return "high"
164
+ elif avg_score >= 0.4:
165
+ return "medium"
166
+ else:
167
+ return "low"
168
+
169
+ def query_agent(query_text: str) -> Dict:
170
+ """
171
+ Convenience function to query the RAG agent
172
+ """
173
+ agent = RAGAgent()
174
+ return agent.query_agent(query_text)
175
+
176
+ def run_agent_sync(query_text: str) -> Dict:
177
+ """
178
+ Synchronous function to run the agent for direct usage
179
+ """
180
+ import asyncio
181
+
182
+ async def run_async():
183
+ agent = RAGAgent()
184
+ return await agent._async_query_agent(query_text)
185
+
186
+ # Check if there's already a running event loop
187
+ try:
188
+ loop = asyncio.get_running_loop()
189
+ # If there's already a loop, run in a separate thread
190
+ import concurrent.futures
191
+ with concurrent.futures.ThreadPoolExecutor() as executor:
192
+ future = executor.submit(asyncio.run, run_async())
193
+ return future.result()
194
+ except RuntimeError:
195
+ # No running loop, safe to use asyncio.run
196
+ return asyncio.run(run_async())
197
+
198
+ def main():
199
+ """
200
+ Main function to demonstrate the RAG agent functionality
201
+ """
202
+ logger.info("Initializing RAG Agent...")
203
+
204
+ # Initialize the agent
205
+ agent = RAGAgent()
206
+
207
+ # Example queries to test the system
208
+ test_queries = [
209
+ "What is ROS2?",
210
+ "Explain humanoid design principles",
211
+ "How does VLA work?",
212
+ "What are simulation techniques?",
213
+ "Explain AI control systems"
214
+ ]
215
+
216
+ print("RAG Agent - Testing Queries")
217
+ print("=" * 50)
218
+
219
+ for i, query in enumerate(test_queries, 1):
220
+ print(f"\nQuery {i}: {query}")
221
+ print("-" * 30)
222
+
223
+ # Process query through agent
224
+ response = agent.query_agent(query)
225
+
226
+ # Print formatted results
227
+ print(f"Answer: {response['answer']}")
228
+
229
+ if response.get('sources'):
230
+ print(f"Sources: {len(response['sources'])} documents")
231
+ for source in response['sources'][:3]: # Show first 3 sources
232
+ print(f" - {source}")
233
+
234
+ if response.get('matched_chunks'):
235
+ print(f"Matched chunks: {len(response['matched_chunks'])}")
236
+ for j, chunk in enumerate(response['matched_chunks'][:2], 1): # Show first 2 chunks
237
+ content_preview = chunk['content'][:100] + "..." if len(chunk['content']) > 100 else chunk['content']
238
+ print(f" Chunk {j}: {content_preview}")
239
+ print(f" Source: {chunk['url']}")
240
+ print(f" Score: {chunk['similarity_score']:.3f}")
241
+
242
+ print(f"Query time: {response['query_time_ms']:.2f}ms")
243
+ print(f"Confidence: {response.get('confidence', 'unknown')}")
244
+
245
+ if i < len(test_queries): # Don't sleep after the last query
246
+ time.sleep(1) # Small delay between queries
247
+
248
+ if __name__ == "__main__":
249
+ main()
api.py ADDED
@@ -0,0 +1,215 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import asyncio
3
+ from fastapi import FastAPI, HTTPException
4
+ from fastapi.middleware.cors import CORSMiddleware
5
+ from pydantic import BaseModel
6
+ from typing import List, Optional, Dict
7
+ from dotenv import load_dotenv
8
+ import logging
9
+
10
+ # Load environment variables
11
+ load_dotenv()
12
+
13
+ # Configure logging
14
+ logging.basicConfig(level=logging.INFO)
15
+ logger = logging.getLogger(__name__)
16
+
17
+ # Import the existing RAG agent functionality
18
+ from agent import RAGAgent
19
+
20
+ # Create FastAPI app
21
+ app = FastAPI(
22
+ title="RAG Agent API",
23
+ description="API for RAG Agent with document retrieval and question answering",
24
+ version="1.0.0"
25
+ )
26
+
27
+ # Add CORS middleware for development
28
+ app.add_middleware(
29
+ CORSMiddleware,
30
+ allow_origins=["*"], # In production, replace with specific origins
31
+ allow_credentials=True,
32
+ allow_methods=["*"],
33
+ allow_headers=["*"],
34
+ )
35
+
36
+ # Pydantic models
37
+ class QueryRequest(BaseModel):
38
+ query: str
39
+
40
+ class ChatRequest(BaseModel):
41
+ query: str
42
+ message: str
43
+ session_id: str
44
+ selected_text: Optional[str] = None
45
+ query_type: str = "global"
46
+ top_k: int = 5
47
+
48
+ class MatchedChunk(BaseModel):
49
+ content: str
50
+ url: str
51
+ position: int
52
+ similarity_score: float
53
+
54
+ class QueryResponse(BaseModel):
55
+ answer: str
56
+ sources: List[str]
57
+ matched_chunks: List[MatchedChunk]
58
+ error: Optional[str] = None
59
+ status: str # "success", "error", "empty"
60
+ query_time_ms: Optional[float] = None
61
+ confidence: Optional[str] = None
62
+
63
+ class ChatResponse(BaseModel):
64
+ response: str
65
+ citations: List[Dict[str, str]]
66
+ session_id: str
67
+ query_type: str
68
+ timestamp: str
69
+
70
+ class HealthResponse(BaseModel):
71
+ status: str
72
+ message: str
73
+
74
+ # Global RAG agent instance
75
+ rag_agent = None
76
+
77
+ @app.on_event("startup")
78
+ async def startup_event():
79
+ """Initialize the RAG agent on startup"""
80
+ global rag_agent
81
+ logger.info("Initializing RAG Agent...")
82
+ try:
83
+ rag_agent = RAGAgent()
84
+ logger.info("RAG Agent initialized successfully")
85
+ except Exception as e:
86
+ logger.error(f"Failed to initialize RAG Agent: {e}")
87
+ raise
88
+
89
+ @app.post("/ask", response_model=QueryResponse)
90
+ async def ask_rag(request: QueryRequest):
91
+ """
92
+ Process a user query through the RAG agent and return the response
93
+ """
94
+ logger.info(f"Processing query: {request.query[:50]}...")
95
+
96
+ try:
97
+ # Validate input
98
+ if not request.query or len(request.query.strip()) == 0:
99
+ raise HTTPException(status_code=400, detail="Query cannot be empty")
100
+
101
+ if len(request.query) > 2000:
102
+ raise HTTPException(status_code=400, detail="Query too long, maximum 2000 characters")
103
+
104
+ # Process query through RAG agent
105
+ response = rag_agent.query_agent(request.query)
106
+
107
+ # Format response
108
+ formatted_response = QueryResponse(
109
+ answer=response.get("answer", ""),
110
+ sources=response.get("sources", []),
111
+ matched_chunks=[
112
+ MatchedChunk(
113
+ content=chunk.get("content", ""),
114
+ url=chunk.get("url", ""),
115
+ position=chunk.get("position", 0),
116
+ similarity_score=chunk.get("similarity_score", 0.0)
117
+ )
118
+ for chunk in response.get("matched_chunks", [])
119
+ ],
120
+ error=response.get("error"),
121
+ status="error" if response.get("error") else "success",
122
+ query_time_ms=response.get("query_time_ms"),
123
+ confidence=response.get("confidence")
124
+ )
125
+
126
+ logger.info(f"Query processed successfully in {response.get('query_time_ms', 0):.2f}ms")
127
+ return formatted_response
128
+
129
+ except HTTPException:
130
+ raise
131
+ except Exception as e:
132
+ logger.error(f"Error processing query: {e}")
133
+ return QueryResponse(
134
+ answer="",
135
+ sources=[],
136
+ matched_chunks=[],
137
+ error=str(e),
138
+ status="error"
139
+ )
140
+
141
+ @app.post("/api", response_model=ChatResponse)
142
+ async def chat_endpoint(request: ChatRequest):
143
+ """
144
+ Main chat endpoint that handles conversation with RAG capabilities
145
+ """
146
+ logger.info(f"Processing chat query: {request.query[:50]}...")
147
+
148
+ try:
149
+ # Validate input
150
+ if not request.query or len(request.query.strip()) == 0:
151
+ raise HTTPException(status_code=400, detail="Query cannot be empty")
152
+
153
+ if not request.session_id or len(request.session_id.strip()) == 0:
154
+ raise HTTPException(status_code=400, detail="Session ID cannot be empty")
155
+
156
+ if len(request.query) > 2000:
157
+ raise HTTPException(status_code=400, detail="Query too long, maximum 2000 characters")
158
+
159
+ # Process query through RAG agent
160
+ response = rag_agent.query_agent(request.query)
161
+
162
+ # Format response to match expected structure
163
+ from datetime import datetime
164
+ timestamp = datetime.utcnow().isoformat()
165
+
166
+ # Convert matched chunks to citations format
167
+ citations = []
168
+ for chunk in response.get("matched_chunks", []):
169
+ citation = {
170
+ "document_id": "",
171
+ "title": chunk.get("url", ""),
172
+ "chapter": "",
173
+ "section": "",
174
+ "page_reference": ""
175
+ }
176
+ citations.append(citation)
177
+
178
+ formatted_response = ChatResponse(
179
+ response=response.get("answer", ""),
180
+ citations=citations,
181
+ session_id=request.session_id,
182
+ query_type=request.query_type,
183
+ timestamp=timestamp
184
+ )
185
+
186
+ logger.info(f"Chat query processed successfully")
187
+ return formatted_response
188
+
189
+ except HTTPException:
190
+ raise
191
+ except Exception as e:
192
+ logger.error(f"Error processing chat query: {e}")
193
+ from datetime import datetime
194
+ return ChatResponse(
195
+ response="",
196
+ citations=[],
197
+ session_id=request.session_id,
198
+ query_type=request.query_type,
199
+ timestamp=datetime.utcnow().isoformat()
200
+ )
201
+
202
+ @app.get("/health", response_model=HealthResponse)
203
+ async def health_check():
204
+ """
205
+ Health check endpoint
206
+ """
207
+ return HealthResponse(
208
+ status="healthy",
209
+ message="RAG Agent API is running"
210
+ )
211
+
212
+ # For running with uvicorn
213
+ if __name__ == "__main__":
214
+ import uvicorn
215
+ uvicorn.run(app, host="0.0.0.0", port=8000)
app/__init__.py DELETED
@@ -1,3 +0,0 @@
1
- """
2
- RAG Chatbot Backend Application
3
- """
 
 
 
 
app/__pycache__/__init__.cpython-312.pyc DELETED
Binary file (219 Bytes)
 
app/__pycache__/config.cpython-312.pyc DELETED
Binary file (1.58 kB)
 
app/__pycache__/main.cpython-312.pyc DELETED
Binary file (1.96 kB)
 
app/api/__pycache__/chat.cpython-312.pyc DELETED
Binary file (9.58 kB)
 
app/api/__pycache__/health.cpython-312.pyc DELETED
Binary file (6.67 kB)
 
app/api/__pycache__/ingest.cpython-312.pyc DELETED
Binary file (11 kB)
 
app/api/chat.py DELETED
@@ -1,249 +0,0 @@
1
- from fastapi import APIRouter, HTTPException, Depends, Header
2
- from typing import Dict, Any, List, Optional
3
- from pydantic import BaseModel
4
- from datetime import datetime
5
- import uuid
6
- import asyncio
7
- from functools import lru_cache
8
-
9
- from app.retrieval.retriever import retriever
10
- from app.prompting.context_filter import context_filter
11
- from app.generation.response_generator import response_generator
12
- from app.database.repositories import ChatSessionRepository, ChatMessageRepository, QueryContextRepository
13
- from app.database.database import get_db
14
- from app.config import settings
15
-
16
-
17
- router = APIRouter()
18
-
19
-
20
- class ChatRequest(BaseModel):
21
- session_id: str
22
- message: str
23
- selected_text: Optional[str] = None
24
- query_type: str = "global" # "global" or "selection"
25
- top_k: int = 5
26
-
27
-
28
- class ChatResponse(BaseModel):
29
- response: str
30
- citations: List[Dict[str, str]]
31
- session_id: str
32
- query_type: str
33
- timestamp: str
34
-
35
-
36
- class ChatMessage(BaseModel):
37
- message_id: str
38
- session_id: str
39
- role: str # "user" or "assistant"
40
- content: str
41
- citations: Optional[List[Dict[str, str]]] = None
42
- timestamp: str
43
-
44
-
45
- @router.post("", response_model=ChatResponse)
46
- async def chat_endpoint(
47
- request: ChatRequest,
48
- x_api_key: str = Header(None)
49
- ):
50
- """
51
- Main chat endpoint that handles conversation with RAG capabilities
52
- """
53
- # Validate API key if configured
54
- if settings.BACKEND_API_KEY and x_api_key != settings.BACKEND_API_KEY:
55
- raise HTTPException(status_code=401, detail="Invalid API key")
56
-
57
- try:
58
- # Validate query type
59
- if request.query_type not in ["global", "selection"]:
60
- raise HTTPException(
61
- status_code=400,
62
- detail="query_type must be either 'global' or 'selection'"
63
- )
64
-
65
- # Retrieve relevant documents based on query and query type
66
- retrieved_docs = await retriever.retrieve_with_context_filtering(
67
- query=request.message,
68
- top_k=request.top_k,
69
- query_type=request.query_type,
70
- selected_text=request.selected_text
71
- )
72
-
73
- # Apply context filtering to ensure proper isolation
74
- filtered_docs = context_filter.enforce_context_boundaries(
75
- contexts=retrieved_docs,
76
- query_type=request.query_type,
77
- selected_text=request.selected_text
78
- )
79
-
80
- # Generate response using Claude
81
- response_data = await response_generator.generate_response_with_validation(
82
- query=request.message,
83
- retrieved_contexts=filtered_docs,
84
- query_type=request.query_type,
85
- selected_text=request.selected_text,
86
- session_id=request.session_id
87
- )
88
-
89
- # Store the conversation in the database
90
- db_gen = get_db()
91
- db = next(db_gen)
92
- try:
93
- # Create or update session
94
- session_repo = ChatSessionRepository(db)
95
- existing_session = session_repo.get_session_by_id(request.session_id)
96
- if not existing_session:
97
- session_repo.create_session(session_id=request.session_id)
98
-
99
- # Store user message
100
- user_message_id = f"msg_{uuid.uuid4().hex[:8]}"
101
- message_repo = ChatMessageRepository(db)
102
- message_repo.create_message(
103
- message_id=user_message_id,
104
- session_id=request.session_id,
105
- role="user",
106
- content=request.message
107
- )
108
-
109
- # Store assistant response
110
- assistant_message_id = f"msg_{uuid.uuid4().hex[:8]}"
111
- citations_for_storage = response_data.get("citations", [])
112
- message_repo.create_message(
113
- message_id=assistant_message_id,
114
- session_id=request.session_id,
115
- role="assistant",
116
- content=response_data.get("response", ""),
117
- citations=citations_for_storage
118
- )
119
- finally:
120
- next(db_gen, None) # Close the db session
121
-
122
- return ChatResponse(
123
- response=response_data.get("response", ""),
124
- citations=response_data.get("citations", []),
125
- session_id=request.session_id,
126
- query_type=request.query_type,
127
- timestamp=datetime.utcnow().isoformat()
128
- )
129
-
130
- except HTTPException:
131
- raise
132
- except Exception as e:
133
- raise HTTPException(
134
- status_code=500,
135
- detail=f"Error processing chat request: {str(e)}"
136
- )
137
-
138
-
139
- @router.post("/stream")
140
- async def chat_stream_endpoint(request: ChatRequest):
141
- """
142
- Streaming chat endpoint (placeholder - actual streaming implementation would be more complex)
143
- """
144
- # For now, this just calls the regular endpoint
145
- # In a production implementation, you would use FastAPI's StreamingResponse
146
- result = await chat_endpoint(request, None)
147
- return result
148
-
149
-
150
- @router.get("/session/{session_id}")
151
- async def get_session_history(session_id: str):
152
- """
153
- Retrieve chat history for a specific session
154
- """
155
- try:
156
- db_gen = get_db()
157
- db = next(db_gen)
158
- try:
159
- message_repo = ChatMessageRepository(db)
160
- messages = message_repo.get_messages_by_session(session_id)
161
-
162
- return {
163
- "session_id": session_id,
164
- "messages": [
165
- {
166
- "message_id": msg.message_id,
167
- "role": msg.role,
168
- "content": msg.content,
169
- "timestamp": msg.timestamp.isoformat() if msg.timestamp else None,
170
- "citations": msg.citations
171
- }
172
- for msg in messages
173
- ],
174
- "timestamp": datetime.utcnow().isoformat()
175
- }
176
- finally:
177
- next(db_gen, None)
178
- except Exception as e:
179
- raise HTTPException(
180
- status_code=500,
181
- detail=f"Error retrieving session history: {str(e)}"
182
- )
183
-
184
-
185
- @router.delete("/session/{session_id}")
186
- async def delete_session(session_id: str):
187
- """
188
- Delete a chat session and all associated messages
189
- """
190
- try:
191
- # In a real implementation, you would have a method to delete all messages
192
- # associated with a session. For now, we'll just return a success message.
193
- return {
194
- "status": "deleted",
195
- "session_id": session_id,
196
- "timestamp": datetime.utcnow().isoformat()
197
- }
198
- except Exception as e:
199
- raise HTTPException(
200
- status_code=500,
201
- detail=f"Error deleting session: {str(e)}"
202
- )
203
-
204
-
205
- @router.post("/validate")
206
- async def validate_query(request: ChatRequest):
207
- """
208
- Validate a query without generating a response (for testing purposes)
209
- """
210
- try:
211
- # Validate query type
212
- if request.query_type not in ["global", "selection"]:
213
- return {"valid": False, "error": "query_type must be either 'global' or 'selection'"}
214
-
215
- # Validate that if query_type is 'selection', selected_text is provided
216
- if request.query_type == "selection" and not request.selected_text:
217
- return {"valid": False, "error": "selected_text is required for selection-based queries"}
218
-
219
- # Check if we can retrieve relevant documents
220
- retrieved_docs = await retriever.retrieve_relevant_documents(
221
- query=request.message,
222
- top_k=request.top_k,
223
- query_type=request.query_type,
224
- selected_text=request.selected_text
225
- )
226
-
227
- return {
228
- "valid": True,
229
- "query_type": request.query_type,
230
- "documents_found": len(retrieved_docs),
231
- "has_context": len(retrieved_docs) > 0
232
- }
233
- except Exception as e:
234
- raise HTTPException(
235
- status_code=500,
236
- detail=f"Error validating query: {str(e)}"
237
- )
238
-
239
-
240
- # Rate limiting middleware would be implemented here in a production system
241
- # For now, we'll add a simple rate limiting check based on settings
242
- async def check_rate_limit(session_id: str) -> bool:
243
- """
244
- Check if the session has exceeded rate limits
245
- This is a simplified implementation - a production system would use Redis or similar
246
- """
247
- # In a real implementation, you would check against a rate limit store
248
- # For now, we'll just return True to allow all requests
249
- return True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app/api/health.py DELETED
@@ -1,160 +0,0 @@
1
- from fastapi import APIRouter, HTTPException, Depends
2
- from typing import Dict, Any
3
- from datetime import datetime
4
- import time
5
- import asyncio
6
-
7
- from app.config import settings
8
- from app.vector_store.qdrant_client import qdrant_client
9
- from app.database.models import create_tables, get_db
10
- from app.database.database import get_db
11
- from sqlalchemy.orm import Session
12
-
13
-
14
- router = APIRouter()
15
-
16
-
17
- @router.get("/health", response_model=Dict[str, Any])
18
- async def health_check():
19
- """
20
- Health check endpoint that verifies the system is operational
21
- """
22
- # Basic system status
23
- health_status = {
24
- "status": "healthy",
25
- "timestamp": datetime.utcnow().isoformat(),
26
- "service": "RAG Chatbot API",
27
- "version": "1.0.0",
28
- "checks": {
29
- "database": {"status": "unknown", "message": ""},
30
- "vector_store": {"status": "unknown", "message": ""},
31
- "api_config": {"status": "ok", "message": "API configuration loaded"},
32
- "external_services": {"status": "unknown", "message": ""}
33
- }
34
- }
35
-
36
- # Check database connection
37
- try:
38
- # Try to access the database
39
- from app.database.database import engine
40
- with engine.connect() as conn:
41
- # Simple query to test connection using SQLAlchemy 2.0 syntax
42
- from sqlalchemy import text
43
- result = conn.execute(text("SELECT 1"))
44
- health_status["checks"]["database"]["status"] = "ok"
45
- health_status["checks"]["database"]["message"] = "Database connection successful"
46
- except Exception as e:
47
- health_status["checks"]["database"]["status"] = "error"
48
- health_status["checks"]["database"]["message"] = f"Database connection failed: {str(e)}"
49
-
50
- # Check vector store connection
51
- try:
52
- # Try to get collection info as a simple connectivity test
53
- collection_info = qdrant_client.get_collection_info()
54
- health_status["checks"]["vector_store"]["status"] = "ok"
55
- health_status["checks"]["vector_store"]["message"] = f"Vector store connected, {collection_info.get('point_count', 0)} points"
56
- except Exception as e:
57
- health_status["checks"]["vector_store"]["status"] = "error"
58
- health_status["checks"]["vector_store"]["message"] = f"Vector store connection failed: {str(e)}"
59
-
60
- # Check external services (API keys)
61
- try:
62
- if not settings.OPENROUTER_API_KEY or settings.OPENROUTER_API_KEY == "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":
63
- health_status["checks"]["external_services"]["status"] = "warning"
64
- health_status["checks"]["external_services"]["message"] = "API key not configured or using default value"
65
- else:
66
- health_status["checks"]["external_services"]["status"] = "ok"
67
- health_status["checks"]["external_services"]["message"] = "External services configured"
68
- except Exception as e:
69
- health_status["checks"]["external_services"]["status"] = "error"
70
- health_status["checks"]["external_services"]["message"] = f"External service config error: {str(e)}"
71
-
72
- # Overall status based on individual checks
73
- overall_status = "healthy"
74
- for check_name, check in health_status["checks"].items():
75
- if check["status"] == "error":
76
- overall_status = "error"
77
- break
78
- elif check["status"] == "warning" and overall_status != "error":
79
- overall_status = "warning"
80
-
81
- health_status["status"] = overall_status
82
-
83
- return health_status
84
-
85
-
86
- @router.get("/ready", response_model=Dict[str, str])
87
- async def readiness_check():
88
- """
89
- Readiness check - verifies the service is ready to accept traffic
90
- """
91
- # For readiness, we mainly check if critical services are available
92
- try:
93
- # Test database connection
94
- from app.database.database import engine
95
- from sqlalchemy import text
96
- with engine.connect() as conn:
97
- conn.execute(text("SELECT 1"))
98
-
99
- # Test vector store connection
100
- qdrant_client.get_collection_info()
101
-
102
- return {"status": "ready"}
103
- except Exception:
104
- raise HTTPException(status_code=503, detail="Service not ready")
105
-
106
-
107
- @router.get("/live", response_model=Dict[str, str])
108
- async def liveness_check():
109
- """
110
- Liveness check - verifies the service is alive and responding
111
- """
112
- return {"status": "alive", "timestamp": datetime.utcnow().isoformat()}
113
-
114
-
115
- @router.get("/stats", response_model=Dict[str, Any])
116
- async def get_service_stats():
117
- """
118
- Get detailed service statistics
119
- """
120
- stats = {
121
- "timestamp": datetime.utcnow().isoformat(),
122
- "uptime": "tracking needed", # This would need to be implemented with a global start time
123
- "database": {},
124
- "vector_store": {},
125
- "api_usage": {}
126
- }
127
-
128
- # Database stats
129
- try:
130
- from app.database.database import engine
131
- with engine.connect() as conn:
132
- # Get basic database info using SQLAlchemy 2.0 syntax
133
- from sqlalchemy import text
134
- result = conn.execute(text("SELECT COUNT(*) FROM book_content_documents"))
135
- doc_count = result.scalar()
136
- stats["database"] = {
137
- "documents_count": doc_count,
138
- "status": "connected"
139
- }
140
- except Exception as e:
141
- stats["database"] = {"status": "error", "error": str(e)}
142
-
143
- # Vector store stats
144
- try:
145
- collection_info = qdrant_client.get_collection_info()
146
- stats["vector_store"] = {
147
- "point_count": collection_info.get('point_count', 0),
148
- "vector_size": collection_info.get('vector_size', 0),
149
- "status": "connected"
150
- }
151
- except Exception as e:
152
- stats["vector_store"] = {"status": "error", "error": str(e)}
153
-
154
- # API usage would be tracked separately in a production system
155
- stats["api_usage"] = {
156
- "requests_served": 0, # This would be implemented with actual tracking
157
- "avg_response_time": 0.0 # This would be calculated from actual metrics
158
- }
159
-
160
- return stats
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app/api/ingest.py DELETED
@@ -1,316 +0,0 @@
1
- from fastapi import APIRouter, HTTPException, BackgroundTasks, UploadFile, File, Form
2
- from typing import Dict, Any, List, Optional
3
- from pydantic import BaseModel
4
- from datetime import datetime
5
- import asyncio
6
- import os
7
- from pathlib import Path
8
-
9
- from app.ingestion.file_scanner import FileScanner
10
- from app.ingestion.chunker import TextChunker, chunk_documents
11
- from app.embeddings.minimal_embedding_generator import minimal_embedding_generator
12
- from app.vector_store.vector_repository import vector_repository
13
- from app.database.repositories import BookContentRepository
14
- from app.database.database import get_db
15
- from app.config import settings
16
-
17
-
18
- router = APIRouter()
19
-
20
- # Initialize components
21
- chunker = TextChunker()
22
- file_scanner = FileScanner()
23
-
24
-
25
- class IngestRequest(BaseModel):
26
- book_path: str = "docusaurus/docs" # Default path for Docusaurus docs
27
- chunk_size: int = 800
28
- force_reprocess: bool = False
29
-
30
-
31
- class IngestResponse(BaseModel):
32
- status: str
33
- message: str
34
- documents_processed: int
35
- chunks_created: int
36
- embeddings_generated: int
37
- timestamp: str
38
-
39
-
40
- class IngestStatusResponse(BaseModel):
41
- status: str
42
- progress: float
43
- message: str
44
- details: Dict[str, Any]
45
-
46
-
47
- # In-memory storage for tracking ingestion jobs (in production, use a proper task queue)
48
- ingestion_jobs: Dict[str, Dict[str, Any]] = {}
49
-
50
-
51
- @router.post("/ingest", response_model=IngestResponse)
52
- async def ingest_documents(request: IngestRequest, background_tasks: BackgroundTasks):
53
- """
54
- Ingest book content from markdown files, process, and store in vector database
55
- """
56
- try:
57
- # Validate the book path exists
58
- if not os.path.exists(request.book_path):
59
- raise HTTPException(
60
- status_code=400,
61
- detail=f"Book path does not exist: {request.book_path}"
62
- )
63
-
64
- # Update chunker settings if provided
65
- if request.chunk_size:
66
- chunker.max_tokens = request.chunk_size
67
-
68
- # Scan and parse documents
69
- documents = file_scanner.scan_and_parse_documents()
70
-
71
- if not documents:
72
- raise HTTPException(
73
- status_code=400,
74
- detail=f"No markdown documents found in path: {request.book_path}"
75
- )
76
-
77
- # Validate documents
78
- valid_documents = [doc for doc in documents if file_scanner.validate_document(doc)]
79
- if not valid_documents:
80
- raise HTTPException(
81
- status_code=400,
82
- detail="No valid documents found after validation"
83
- )
84
-
85
- # Chunk documents
86
- all_chunks = chunk_documents(valid_documents)
87
-
88
- if not all_chunks:
89
- raise HTTPException(
90
- status_code=500,
91
- detail="No chunks were created from documents"
92
- )
93
-
94
- # Generate embeddings for chunks
95
- chunks_with_metadata = []
96
- for chunk in all_chunks:
97
- # Generate embedding using minimal embedding generator
98
- embedding = minimal_embedding_generator.encode_query(chunk.content)
99
- if embedding:
100
- chunk_data = {
101
- 'id': chunk.id,
102
- 'content': chunk.content,
103
- 'title': chunk.title,
104
- 'chapter': chunk.chapter,
105
- 'section': chunk.section,
106
- 'page_reference': chunk.page_reference,
107
- 'token_count': chunk.token_count,
108
- 'embedding': embedding
109
- }
110
- chunks_with_metadata.append(chunk_data)
111
- else:
112
- # Skip chunks that couldn't generate embeddings
113
- continue
114
-
115
- # Store embeddings in vector database
116
- if chunks_with_metadata:
117
- vector_repository.store_document_chunks(chunks_with_metadata)
118
-
119
- # Store document metadata in SQL database
120
- db_gen = get_db()
121
- db = next(db_gen)
122
- try:
123
- content_repo = BookContentRepository(db)
124
-
125
- for chunk_data in chunks_with_metadata:
126
- # Create or update document in SQL database
127
- existing_doc = content_repo.get_document_by_id(chunk_data['id'])
128
- if not existing_doc:
129
- content_repo.create_document(chunk_data)
130
- finally:
131
- next(db_gen, None) # Close the db session
132
-
133
- return IngestResponse(
134
- status="success",
135
- message=f"Successfully ingested {len(valid_documents)} documents, "
136
- f"created {len(all_chunks)} chunks, "
137
- f"generated {len(chunks_with_metadata)} embeddings",
138
- documents_processed=len(valid_documents),
139
- chunks_created=len(all_chunks),
140
- embeddings_generated=len(chunks_with_metadata),
141
- timestamp=datetime.utcnow().isoformat()
142
- )
143
-
144
- except HTTPException:
145
- raise
146
- except Exception as e:
147
- raise HTTPException(
148
- status_code=500,
149
- detail=f"Error during ingestion: {str(e)}"
150
- )
151
-
152
-
153
- @router.post("/ingest-from-content")
154
- async def ingest_from_content(content: str = Form(...), title: str = Form(...), chapter: str = Form("Unknown"), section: str = Form("Unknown")):
155
- """
156
- Ingest content directly from provided text
157
- """
158
- try:
159
- # Create a mock document from the provided content
160
- document = {
161
- 'title': title,
162
- 'content': content,
163
- 'chapter': chapter,
164
- 'section': section,
165
- 'file_path': f"api_upload_{datetime.utcnow().strftime('%Y%m%d_%H%M%S')}",
166
- 'metadata': {},
167
- 'structure': []
168
- }
169
-
170
- # Validate document
171
- if not document['content'].strip():
172
- raise HTTPException(status_code=400, detail="Content cannot be empty")
173
-
174
- # Chunk the document
175
- all_chunks = chunk_documents([document])
176
-
177
- if not all_chunks:
178
- raise HTTPException(status_code=500, detail="No chunks were created from content")
179
-
180
- # Generate embeddings for chunks
181
- chunks_with_metadata = []
182
- for chunk in all_chunks:
183
- # Generate embedding using minimal embedding generator
184
- embedding = minimal_embedding_generator.encode_query(chunk.content)
185
- if embedding:
186
- chunk_data = {
187
- 'id': chunk.id,
188
- 'content': chunk.content,
189
- 'title': chunk.title,
190
- 'chapter': chunk.chapter,
191
- 'section': chunk.section,
192
- 'page_reference': chunk.page_reference,
193
- 'token_count': chunk.token_count,
194
- 'embedding': embedding
195
- }
196
- chunks_with_metadata.append(chunk_data)
197
- else:
198
- # Skip chunks that couldn't generate embeddings
199
- continue
200
-
201
- # Store embeddings in vector database
202
- if chunks_with_metadata:
203
- vector_repository.store_document_chunks(chunks_with_metadata)
204
-
205
- # Store document metadata in SQL database
206
- db_gen = get_db()
207
- db = next(db_gen)
208
- try:
209
- content_repo = BookContentRepository(db)
210
-
211
- for chunk_data in chunks_with_metadata:
212
- # Create or update document in SQL database
213
- existing_doc = content_repo.get_document_by_id(chunk_data['id'])
214
- if not existing_doc:
215
- content_repo.create_document(chunk_data)
216
- finally:
217
- next(db_gen, None) # Close the db session
218
-
219
- return {
220
- "status": "success",
221
- "message": f"Successfully ingested content, created {len(all_chunks)} chunks",
222
- "chunks_created": len(all_chunks),
223
- "timestamp": datetime.utcnow().isoformat()
224
- }
225
-
226
- except Exception as e:
227
- raise HTTPException(
228
- status_code=500,
229
- detail=f"Error during content ingestion: {str(e)}"
230
- )
231
-
232
-
233
- @router.post("/ingest-file")
234
- async def ingest_from_file(file: UploadFile = File(...), title: str = Form(None), chapter: str = Form("Unknown")):
235
- """
236
- Ingest content from an uploaded file
237
- """
238
- try:
239
- # Read the uploaded file
240
- content = await file.read()
241
- content_str = content.decode('utf-8')
242
-
243
- # Use filename as title if not provided
244
- if not title:
245
- title = Path(file.filename).stem
246
-
247
- # Ingest the content
248
- return await ingest_from_content(
249
- content=content_str,
250
- title=title,
251
- chapter=chapter,
252
- section=Path(file.filename).stem
253
- )
254
-
255
- except UnicodeDecodeError:
256
- raise HTTPException(
257
- status_code=400,
258
- detail="File must be a UTF-8 encoded text file"
259
- )
260
- except Exception as e:
261
- raise HTTPException(
262
- status_code=500,
263
- detail=f"Error processing uploaded file: {str(e)}"
264
- )
265
-
266
-
267
- @router.get("/ingest-status/{job_id}")
268
- async def get_ingest_status(job_id: str):
269
- """
270
- Get the status of an ingestion job
271
- """
272
- if job_id not in ingestion_jobs:
273
- raise HTTPException(status_code=404, detail="Job not found")
274
-
275
- return IngestStatusResponse(
276
- status=ingestion_jobs[job_id]["status"],
277
- progress=ingestion_jobs[job_id]["progress"],
278
- message=ingestion_jobs[job_id]["message"],
279
- details=ingestion_jobs[job_id]["details"]
280
- )
281
-
282
-
283
- @router.get("/ingest-stats")
284
- async def get_ingest_stats():
285
- """
286
- Get ingestion statistics
287
- """
288
- # Get vector store stats
289
- vector_stats = vector_repository.get_collection_stats()
290
-
291
- # Get database stats
292
- db_gen = get_db()
293
- db = next(db_gen)
294
- try:
295
- content_repo = BookContentRepository(db)
296
- all_docs = content_repo.get_all_documents()
297
- total_docs = len(all_docs)
298
-
299
- # Group by chapter
300
- chapters = {}
301
- for doc in all_docs:
302
- chapter = doc.chapter
303
- if chapter not in chapters:
304
- chapters[chapter] = 0
305
- chapters[chapter] += 1
306
- finally:
307
- next(db_gen, None)
308
-
309
- return {
310
- "vector_store": vector_stats,
311
- "database": {
312
- "total_documents": total_docs,
313
- "documents_by_chapter": chapters
314
- },
315
- "timestamp": datetime.utcnow().isoformat()
316
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app/config.py DELETED
@@ -1,45 +0,0 @@
1
- from pydantic_settings import BaseSettings
2
- from typing import Optional
3
- from functools import lru_cache
4
-
5
-
6
- class Settings(BaseSettings):
7
- # OpenRouter API
8
- OPENROUTER_API_KEY: str
9
- OPENROUTER_BASE_URL: str = "https://openrouter.ai/api/v1"
10
-
11
- # Qdrant Vector Database
12
- QDRANT_URL: str
13
- QDRANT_API_KEY: str
14
- QDRANT_CLUSTER_ID: str
15
-
16
- # Neon PostgreSQL Database
17
- NEON_DATABASE_URL: str
18
-
19
- # Cohere API (if needed)
20
- COHERE_API_KEY: Optional[str] = None
21
-
22
- # Google Gemini API
23
- GEMINI_API_KEY: Optional[str] = None
24
-
25
- # Backend API
26
- BACKEND_API_KEY: str
27
-
28
- # Application settings
29
- DEBUG: bool = False
30
- LOG_LEVEL: str = "INFO"
31
- MAX_CONTENT_LENGTH: int = 5000
32
- RATE_LIMIT_REQUESTS: int = 100
33
- RATE_LIMIT_WINDOW: int = 60 # in seconds
34
-
35
- class Config:
36
- env_file = ".env"
37
- case_sensitive = False
38
-
39
-
40
- @lru_cache()
41
- def get_settings():
42
- return Settings()
43
-
44
-
45
- settings = get_settings()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app/database/__pycache__/database.cpython-312.pyc DELETED
Binary file (1.65 kB)
 
app/database/__pycache__/models.cpython-312.pyc DELETED
Binary file (5.82 kB)
 
app/database/__pycache__/repositories.cpython-312.pyc DELETED
Binary file (10.8 kB)
 
app/database/database.py DELETED
@@ -1,56 +0,0 @@
1
- from sqlalchemy import create_engine
2
- from sqlalchemy.ext.declarative import declarative_base
3
- from sqlalchemy.orm import sessionmaker
4
- from app.config import settings
5
-
6
-
7
- # Create database engine with connection pooling
8
- engine = create_engine(
9
- settings.NEON_DATABASE_URL,
10
- pool_pre_ping=True, # Verify connections before use
11
- pool_recycle=300, # Recycle connections after 5 minutes
12
- pool_size=10, # Number of connection to keep open
13
- max_overflow=20, # Additional connections beyond pool_size
14
- echo=False # Set to True for SQL query logging
15
- )
16
-
17
- # Create session factory
18
- SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
19
-
20
- # Base class for models
21
- Base = declarative_base()
22
-
23
-
24
- def get_db():
25
- """
26
- Dependency for getting database session
27
- """
28
- db = SessionLocal()
29
- try:
30
- yield db
31
- finally:
32
- db.close()
33
-
34
-
35
- def create_tables():
36
- """
37
- Create all database tables based on models
38
- This should be called during application startup
39
- """
40
- from .models import Base
41
- Base.metadata.create_all(bind=engine)
42
-
43
-
44
- def get_engine():
45
- """
46
- Return the database engine (useful for direct access)
47
- """
48
- return engine
49
-
50
-
51
- # Global database instance for easy access
52
- db_instance = {
53
- 'engine': engine,
54
- 'SessionLocal': SessionLocal,
55
- 'Base': Base
56
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app/database/models.py DELETED
@@ -1,138 +0,0 @@
1
- from sqlalchemy import create_engine, Column, Integer, String, Text, DateTime, JSON, Index
2
- from sqlalchemy.ext.declarative import declarative_base
3
- from sqlalchemy.orm import sessionmaker
4
- from datetime import datetime
5
- import uuid
6
- from app.config import settings
7
-
8
-
9
- Base = declarative_base()
10
-
11
-
12
- class BookContentDocument(Base):
13
- """
14
- Model for storing book content document metadata
15
- """
16
- __tablename__ = "book_content_documents"
17
-
18
- id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
19
- document_id = Column(String, unique=True, nullable=False, index=True) # The ID from our chunking process
20
- title = Column(String, nullable=False)
21
- content = Column(Text, nullable=False) # Content summary or the actual content
22
- chapter = Column(String, nullable=False)
23
- section = Column(String, nullable=False)
24
- page_reference = Column(String, nullable=True)
25
- embedding_vector = Column(JSON, nullable=True) # Store as JSON for flexibility
26
- token_count = Column(Integer, nullable=True)
27
- created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
28
- updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False)
29
-
30
- # Indexes for better query performance
31
- # Note: Indexes are handled separately to avoid creation conflicts
32
- # __table_args__ = (
33
- # Index('idx_chapter_section', 'chapter', 'section'),
34
- # Index('idx_document_id', 'document_id'),
35
- # )
36
-
37
-
38
- class ChatSession(Base):
39
- """
40
- Model for storing chat session information
41
- """
42
- __tablename__ = "chat_sessions"
43
-
44
- id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
45
- session_id = Column(String, unique=True, nullable=False, index=True)
46
- user_id = Column(String, nullable=True) # Optional user identifier
47
- created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
48
- updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False)
49
- session_metadata = Column(JSON, nullable=True) # Additional session metadata
50
-
51
- # Note: Indexes are handled separately to avoid creation conflicts
52
- # __table_args__ = (
53
- # Index('idx_session_id', 'session_id'),
54
- # Index('idx_user_id', 'user_id'),
55
- # )
56
-
57
-
58
- class ChatMessage(Base):
59
- """
60
- Model for storing individual chat messages
61
- """
62
- __tablename__ = "chat_messages"
63
-
64
- id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
65
- message_id = Column(String, unique=True, nullable=False, index=True)
66
- session_id = Column(String, nullable=False, index=True) # References chat_sessions.session_id
67
- role = Column(String, nullable=False) # 'user' or 'assistant'
68
- content = Column(Text, nullable=False)
69
- citations = Column(JSON, nullable=True) # List of document IDs used in response
70
- query_context_id = Column(String, nullable=True, index=True) # References query_context.context_id
71
- timestamp = Column(DateTime, default=datetime.utcnow, nullable=False)
72
-
73
- # Note: Indexes are handled separately to avoid creation conflicts
74
- # __table_args__ = (
75
- # Index('idx_session_id', 'session_id'),
76
- # Index('idx_message_id', 'message_id'),
77
- # Index('idx_query_context_id', 'query_context_id'),
78
- # )
79
-
80
-
81
- class QueryContext(Base):
82
- """
83
- Model for storing query context information
84
- """
85
- __tablename__ = "query_contexts"
86
-
87
- id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
88
- context_id = Column(String, unique=True, nullable=False, index=True)
89
- session_id = Column(String, nullable=False, index=True) # References chat_sessions.session_id
90
- selected_text = Column(Text, nullable=True) # Text selected by user (for selection-based queries)
91
- query_type = Column(String, nullable=False) # 'global' or 'selection'
92
- created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
93
-
94
- # Note: Indexes are handled separately to avoid creation conflicts
95
- # __table_args__ = (
96
- # Index('idx_context_id', 'context_id'),
97
- # Index('idx_session_id', 'session_id'),
98
- # )
99
-
100
-
101
- # Create engine and session
102
- engine = create_engine(settings.NEON_DATABASE_URL, pool_pre_ping=True)
103
- SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
104
-
105
-
106
- def drop_tables():
107
- """
108
- Drop all database tables
109
- """
110
- # Drop all tables with CASCADE to handle foreign key dependencies
111
- # First drop any dependent objects, then drop tables
112
- from sqlalchemy import text
113
- with engine.connect() as conn:
114
- # Drop all tables in the schema with CASCADE
115
- conn.execute(text("DROP SCHEMA public CASCADE"))
116
- conn.execute(text("CREATE SCHEMA public"))
117
- conn.execute(text("GRANT ALL ON SCHEMA public TO public"))
118
- conn.execute(text("GRANT ALL ON SCHEMA public TO neondb_owner"))
119
- conn.commit()
120
-
121
-
122
- def create_tables():
123
- """
124
- Create all database tables
125
- """
126
- # Create tables with checkfirst to avoid errors if they already exist
127
- Base.metadata.create_all(bind=engine, checkfirst=True)
128
-
129
-
130
- def get_db():
131
- """
132
- Dependency for getting database session
133
- """
134
- db = SessionLocal()
135
- try:
136
- yield db
137
- finally:
138
- db.close()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app/database/repositories.py DELETED
@@ -1,217 +0,0 @@
1
- from typing import List, Optional, Dict, Any
2
- from sqlalchemy.orm import Session
3
- from sqlalchemy import and_, or_
4
- from app.database.models import BookContentDocument, ChatSession, ChatMessage, QueryContext
5
- from app.ingestion.chunker import TextChunk
6
-
7
-
8
- class BookContentRepository:
9
- """
10
- Repository for managing book content documents
11
- """
12
-
13
- def __init__(self, db: Session):
14
- self.db = db
15
-
16
- def create_document(self, chunk: Dict[str, Any]) -> BookContentDocument:
17
- """
18
- Create a new book content document
19
- """
20
- db_document = BookContentDocument(
21
- document_id=chunk.get('id'),
22
- title=chunk.get('title', ''),
23
- content=chunk.get('content', ''),
24
- chapter=chunk.get('chapter', ''),
25
- section=chunk.get('section', ''),
26
- page_reference=chunk.get('page_reference', ''),
27
- embedding_vector=chunk.get('embedding'),
28
- token_count=chunk.get('token_count', 0)
29
- )
30
- self.db.add(db_document)
31
- self.db.commit()
32
- self.db.refresh(db_document)
33
- return db_document
34
-
35
- def get_document_by_id(self, document_id: str) -> Optional[BookContentDocument]:
36
- """
37
- Get a document by its ID
38
- """
39
- return self.db.query(BookContentDocument).filter(
40
- BookContentDocument.document_id == document_id
41
- ).first()
42
-
43
- def get_documents_by_chapter(self, chapter: str) -> List[BookContentDocument]:
44
- """
45
- Get all documents for a specific chapter
46
- """
47
- return self.db.query(BookContentDocument).filter(
48
- BookContentDocument.chapter == chapter
49
- ).all()
50
-
51
- def get_all_documents(self) -> List[BookContentDocument]:
52
- """
53
- Get all documents
54
- """
55
- return self.db.query(BookContentDocument).all()
56
-
57
- def update_document(self, document_id: str, **kwargs) -> Optional[BookContentDocument]:
58
- """
59
- Update a document
60
- """
61
- document = self.get_document_by_id(document_id)
62
- if document:
63
- for key, value in kwargs.items():
64
- setattr(document, key, value)
65
- self.db.commit()
66
- self.db.refresh(document)
67
- return document
68
-
69
- def delete_document(self, document_id: str) -> bool:
70
- """
71
- Delete a document
72
- """
73
- document = self.get_document_by_id(document_id)
74
- if document:
75
- self.db.delete(document)
76
- self.db.commit()
77
- return True
78
- return False
79
-
80
-
81
- class ChatSessionRepository:
82
- """
83
- Repository for managing chat sessions
84
- """
85
-
86
- def __init__(self, db: Session):
87
- self.db = db
88
-
89
- def create_session(self, session_id: str, user_id: Optional[str] = None, metadata: Optional[Dict] = None) -> ChatSession:
90
- """
91
- Create a new chat session
92
- """
93
- db_session = ChatSession(
94
- session_id=session_id,
95
- user_id=user_id,
96
- session_metadata=metadata
97
- )
98
- self.db.add(db_session)
99
- self.db.commit()
100
- self.db.refresh(db_session)
101
- return db_session
102
-
103
- def get_session_by_id(self, session_id: str) -> Optional[ChatSession]:
104
- """
105
- Get a session by its ID
106
- """
107
- return self.db.query(ChatSession).filter(
108
- ChatSession.session_id == session_id
109
- ).first()
110
-
111
- def update_session(self, session_id: str, **kwargs) -> Optional[ChatSession]:
112
- """
113
- Update a session
114
- """
115
- session = self.get_session_by_id(session_id)
116
- if session:
117
- for key, value in kwargs.items():
118
- setattr(session, key, value)
119
- self.db.commit()
120
- self.db.refresh(session)
121
- return session
122
-
123
-
124
- class ChatMessageRepository:
125
- """
126
- Repository for managing chat messages
127
- """
128
-
129
- def __init__(self, db: Session):
130
- self.db = db
131
-
132
- def create_message(
133
- self,
134
- message_id: str,
135
- session_id: str,
136
- role: str,
137
- content: str,
138
- citations: Optional[List[Dict]] = None,
139
- query_context_id: Optional[str] = None
140
- ) -> ChatMessage:
141
- """
142
- Create a new chat message
143
- """
144
- db_message = ChatMessage(
145
- message_id=message_id,
146
- session_id=session_id,
147
- role=role,
148
- content=content,
149
- citations=citations,
150
- query_context_id=query_context_id
151
- )
152
- self.db.add(db_message)
153
- self.db.commit()
154
- self.db.refresh(db_message)
155
- return db_message
156
-
157
- def get_messages_by_session(self, session_id: str) -> List[ChatMessage]:
158
- """
159
- Get all messages for a session
160
- """
161
- return self.db.query(ChatMessage).filter(
162
- ChatMessage.session_id == session_id
163
- ).order_by(ChatMessage.timestamp).all()
164
-
165
- def get_message_by_id(self, message_id: str) -> Optional[ChatMessage]:
166
- """
167
- Get a message by its ID
168
- """
169
- return self.db.query(ChatMessage).filter(
170
- ChatMessage.message_id == message_id
171
- ).first()
172
-
173
-
174
- class QueryContextRepository:
175
- """
176
- Repository for managing query contexts
177
- """
178
-
179
- def __init__(self, db: Session):
180
- self.db = db
181
-
182
- def create_query_context(
183
- self,
184
- context_id: str,
185
- session_id: str,
186
- selected_text: Optional[str] = None,
187
- query_type: str = "global"
188
- ) -> QueryContext:
189
- """
190
- Create a new query context
191
- """
192
- db_context = QueryContext(
193
- context_id=context_id,
194
- session_id=session_id,
195
- selected_text=selected_text,
196
- query_type=query_type
197
- )
198
- self.db.add(db_context)
199
- self.db.commit()
200
- self.db.refresh(db_context)
201
- return db_context
202
-
203
- def get_context_by_id(self, context_id: str) -> Optional[QueryContext]:
204
- """
205
- Get a query context by its ID
206
- """
207
- return self.db.query(QueryContext).filter(
208
- QueryContext.context_id == context_id
209
- ).first()
210
-
211
- def get_contexts_by_session(self, session_id: str) -> List[QueryContext]:
212
- """
213
- Get all query contexts for a session
214
- """
215
- return self.db.query(QueryContext).filter(
216
- QueryContext.session_id == session_id
217
- ).all()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app/embeddings/__pycache__/minimal_embedding_generator.cpython-312.pyc DELETED
Binary file (3.65 kB)
 
app/embeddings/minimal_embedding_generator.py DELETED
@@ -1,79 +0,0 @@
1
- import numpy as np
2
- from typing import List
3
- import hashlib
4
- import re
5
- from collections import Counter
6
-
7
-
8
- class MinimalEmbeddingGenerator:
9
- """
10
- Minimal embedding generator using simple hashing and basic statistics
11
- This avoids heavy dependencies and memory issues
12
- """
13
-
14
- def __init__(self):
15
- self.vocab = set()
16
- self.fitted = False
17
-
18
- def _pad_embedding(self, embedding: List[float], target_size: int = 1536) -> List[float]:
19
- """
20
- Pad embedding to target size with zeros
21
- """
22
- current_size = len(embedding)
23
- if current_size >= target_size:
24
- return embedding[:target_size]
25
- else:
26
- padded = [0.0] * target_size
27
- padded[:current_size] = embedding
28
- return padded
29
-
30
- def _text_to_vector(self, text: str) -> List[float]:
31
- """
32
- Convert text to a vector using simple hashing approach
33
- """
34
- # Clean text
35
- text = re.sub(r'[^\w\s]', ' ', text.lower())
36
- words = text.split()
37
-
38
- if not words:
39
- return [0.0] * 1536
40
-
41
- # Create a simple vector based on word hashes
42
- vector = [0.0] * 1536
43
-
44
- for word in words:
45
- # Use hash to determine position in vector
46
- hash_val = int(hashlib.md5(word.encode()).hexdigest(), 16)
47
- pos = hash_val % 1536
48
- # Add to vector with some normalization
49
- vector[pos] += 1.0 / len(words) # Normalize by document length
50
-
51
- # Normalize the vector
52
- norm = sum(x**2 for x in vector) ** 0.5
53
- if norm > 0:
54
- vector = [x / norm for x in vector]
55
-
56
- return vector
57
-
58
- def generate_embeddings(self, texts: List[str]) -> List[List[float]]:
59
- """
60
- Generate embeddings for a list of texts using simple hashing
61
- """
62
- embeddings = []
63
- for text in texts:
64
- embedding = self._text_to_vector(text)
65
- padded_embedding = self._pad_embedding(embedding)
66
- embeddings.append(padded_embedding)
67
-
68
- return embeddings
69
-
70
- def encode_query(self, query: str) -> List[float]:
71
- """
72
- Encode a single query for similarity search
73
- """
74
- embedding = self._text_to_vector(query)
75
- return self._pad_embedding(embedding)
76
-
77
-
78
- # Global instance
79
- minimal_embedding_generator = MinimalEmbeddingGenerator()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app/generation/__pycache__/response_generator.cpython-312.pyc DELETED
Binary file (14.7 kB)
 
app/generation/response_generator.py DELETED
@@ -1,387 +0,0 @@
1
- import asyncio
2
- import json
3
- from typing import List, Dict, Any, Optional
4
- from app.services.openrouter_client import openrouter_client
5
- from app.prompting.prompt_builder import PromptBuilder
6
- from app.retrieval.retriever import Retriever
7
- from app.config import settings
8
-
9
-
10
- class ResponseGenerator:
11
- """
12
- Generates responses using Google Gemini with proper context and citation tracking
13
- """
14
-
15
- def __init__(self):
16
- self.openrouter_client = openrouter_client
17
- self.prompt_builder = PromptBuilder()
18
- self.retriever = Retriever()
19
-
20
- async def generate_response(
21
- self,
22
- query: str,
23
- retrieved_contexts: List[Dict[str, Any]],
24
- query_type: str = "global",
25
- selected_text: Optional[str] = None,
26
- session_id: Optional[str] = None
27
- ) -> Dict[str, Any]:
28
- """
29
- Generate a response to a query using Google Gemini
30
- """
31
- try:
32
- # Build context string from retrieved contexts
33
- context_parts = []
34
- for ctx in retrieved_contexts:
35
- context_text = ctx.get('content', '')
36
- if context_text:
37
- context_parts.append(context_text)
38
-
39
- context_string = "\n\n".join(context_parts)
40
-
41
- # Prepare messages for OpenRouter
42
- messages = [
43
- {
44
- "role": "system",
45
- "content": "You are an expert assistant for the Physical AI & Humanoid Robotics curriculum. Provide helpful, conversational responses based on the provided context. Always use information only from the provided context and be factual."
46
- },
47
- {
48
- "role": "user",
49
- "content": f"Context: {context_string}\n\nQuestion: {query}\n\nProvide a helpful response based on the context:"
50
- }
51
- ]
52
-
53
- # Generate response using OpenRouter
54
- openrouter_response = await self.openrouter_client.generate_completion(
55
- messages=messages,
56
- model="mistralai/devstral-2512:free",
57
- temperature=0.3,
58
- max_tokens=1000
59
- )
60
-
61
- if not openrouter_response:
62
- return {
63
- "response": "I encountered an issue generating a response. Please try again.",
64
- "citations": [],
65
- "query_type": query_type,
66
- "session_id": session_id
67
- }
68
-
69
- # Extract citations from contexts used
70
- citations = self._extract_citations(retrieved_contexts)
71
-
72
- return {
73
- "response": openrouter_response,
74
- "citations": citations,
75
- "query_type": query_type,
76
- "session_id": session_id
77
- }
78
-
79
- except Exception as e:
80
- print(f"[ERROR] Error generating response: {str(e)}")
81
- # If AI service fails, try to provide a helpful response based on the context
82
- if retrieved_contexts:
83
- # Extract key information from contexts to provide a basic response
84
- context_titles = [ctx.get('title', '') for ctx in retrieved_contexts if ctx.get('title')]
85
- unique_titles = list(set(context_titles))
86
-
87
- if unique_titles:
88
- response = f"Based on the Physical AI & Humanoid Robotics curriculum, I found information related to: {', '.join(unique_titles[:3])}. "
89
- response += "Here's what I can share from the book content: "
90
- # Include some of the actual content from the contexts
91
- first_context = retrieved_contexts[0]
92
- content_preview = first_context.get('content', '')[:200]
93
- response += content_preview + ("..." if len(first_context.get('content', '')) > 200 else "")
94
- else:
95
- response = "Based on the Physical AI & Humanoid Robotics curriculum, I found relevant content. "
96
- response += "Could you ask a more specific question about the topic you're interested in?"
97
- else:
98
- response = "I couldn't find relevant information in the Physical AI & Humanoid Robotics curriculum to answer your question. "
99
- response += "Please try asking about specific topics from the curriculum like ROS 2, Digital Twins, AI-Brain, or VLA."
100
-
101
- return {
102
- "response": response,
103
- "citations": self._extract_citations(retrieved_contexts),
104
- "query_type": query_type,
105
- "session_id": session_id,
106
- "error": str(e)
107
- }
108
-
109
- async def generate_response_with_validation(
110
- self,
111
- query: str,
112
- retrieved_contexts: List[Dict[str, Any]],
113
- query_type: str = "global",
114
- selected_text: Optional[str] = None,
115
- session_id: Optional[str] = None
116
- ) -> Dict[str, Any]:
117
- """
118
- Generate response with additional validation to ensure it's based only on provided context
119
- """
120
- try:
121
- # First, generate the initial response
122
- result = await self.generate_response(
123
- query=query,
124
- retrieved_contexts=retrieved_contexts,
125
- query_type=query_type,
126
- selected_text=selected_text,
127
- session_id=session_id
128
- )
129
-
130
- # Validate that the response is grounded in the provided context
131
- # But be more lenient for high-level book/module questions
132
- if retrieved_contexts and "The provided context does not contain" not in result.get("response", ""):
133
- is_valid = self._validate_response_uses_context(
134
- response=result["response"],
135
- contexts=retrieved_contexts
136
- )
137
-
138
- # For high-level book/module questions, be more lenient with validation
139
- query_lower = query.lower()
140
- is_book_overview_query = any(phrase in query_lower for phrase in [
141
- 'what is', 'tell me about', 'describe', 'overview', 'introduction',
142
- 'physical ai', 'humanoid robotics', 'book', 'curriculum', 'module',
143
- 'quick start', 'setup', 'getting started', 'chapter', 'section'
144
- ])
145
-
146
- # Only retry with stronger guidance if it's not a book overview query and validation fails
147
- if not is_valid and not is_book_overview_query and query_type != "selection":
148
- # If response doesn't seem to use context, try again with stronger instructions
149
- result = await self._generate_with_stronger_context_guidance(
150
- query=query,
151
- retrieved_contexts=retrieved_contexts,
152
- query_type=query_type,
153
- selected_text=selected_text,
154
- session_id=session_id
155
- )
156
-
157
- return result
158
- except Exception as e:
159
- print(f"[ERROR] Error in generate_response_with_validation: {str(e)}")
160
- # If AI service fails, try to provide a helpful response based on the context
161
- if retrieved_contexts:
162
- # Extract key information from contexts to provide a basic response
163
- context_titles = [ctx.get('title', '') for ctx in retrieved_contexts if ctx.get('title')]
164
- unique_titles = list(set(context_titles))
165
-
166
- if unique_titles:
167
- response = f"Based on the Physical AI & Humanoid Robotics curriculum, I found information related to: {', '.join(unique_titles[:3])}. "
168
- response += "Here's what I can share from the book content: "
169
- # Include some of the actual content from the contexts
170
- first_context = retrieved_contexts[0]
171
- content_preview = first_context.get('content', '')[:200]
172
- response += content_preview + ("..." if len(first_context.get('content', '')) > 200 else "")
173
- else:
174
- response = "Based on the Physical AI & Humanoid Robotics curriculum, I found relevant content. "
175
- response += "Could you ask a more specific question about the topic you're interested in?"
176
- else:
177
- response = "I couldn't find relevant information in the Physical AI & Humanoid Robotics curriculum to answer your question. "
178
- response += "Please try asking about specific topics from the curriculum like ROS 2, Digital Twins, AI-Brain, or VLA."
179
-
180
- return {
181
- "response": response,
182
- "citations": self._extract_citations(retrieved_contexts),
183
- "query_type": query_type,
184
- "session_id": session_id,
185
- "error": str(e)
186
- }
187
-
188
- async def _generate_with_stronger_context_guidance(
189
- self,
190
- query: str,
191
- retrieved_contexts: List[Dict[str, Any]],
192
- query_type: str,
193
- selected_text: Optional[str] = None,
194
- session_id: Optional[str] = None
195
- ) -> Dict[str, Any]:
196
- """
197
- Generate response with stronger instructions to use only provided context
198
- """
199
- # Build context string from retrieved contexts
200
- context_parts = []
201
- for ctx in retrieved_contexts:
202
- context_text = ctx.get('content', '')
203
- if context_text:
204
- context_parts.append(context_text)
205
-
206
- context_string = "\n\n".join(context_parts)
207
-
208
- # Add stronger instructions to the context
209
- stronger_context = (
210
- context_string +
211
- "\n\nIMPORTANT: The response MUST be based ONLY on the provided context above. "
212
- "Do not use any external knowledge or make up information. "
213
- "If the context does not contain the answer, explicitly state this fact."
214
- )
215
-
216
- # Prepare messages for OpenRouter
217
- messages = [
218
- {
219
- "role": "system",
220
- "content": "You are an expert assistant for the Physical AI & Humanoid Robotics curriculum. Provide helpful, conversational responses based on the provided context. Always use information only from the provided context and be factual."
221
- },
222
- {
223
- "role": "user",
224
- "content": f"Context: {stronger_context}\n\nQuestion: {query}\n\nProvide a helpful response based on the context:"
225
- }
226
- ]
227
-
228
- # Generate response using OpenRouter
229
- openrouter_response = await self.openrouter_client.generate_completion(
230
- messages=messages,
231
- model="mistralai/devstral-2512:free",
232
- temperature=0.3,
233
- max_tokens=1000
234
- )
235
-
236
- if not openrouter_response:
237
- return {
238
- "response": "I encountered an issue generating a response. Please try again.",
239
- "citations": [],
240
- "query_type": query_type,
241
- "session_id": session_id
242
- }
243
-
244
- citations = self._extract_citations(retrieved_contexts)
245
-
246
- return {
247
- "response": openrouter_response,
248
- "citations": citations,
249
- "query_type": query_type,
250
- "session_id": session_id
251
- }
252
-
253
- def _extract_citations(self, contexts: List[Dict[str, Any]]) -> List[Dict[str, str]]:
254
- """
255
- Extract citation information from retrieved contexts
256
- """
257
- citations = []
258
- for ctx in contexts:
259
- citation = {
260
- "document_id": ctx.get('id', ''),
261
- "title": ctx.get('title', ''),
262
- "chapter": ctx.get('chapter', ''),
263
- "section": ctx.get('section', ''),
264
- "page_reference": ctx.get('page_reference', '')
265
- }
266
- citations.append(citation)
267
- return citations
268
-
269
- def _validate_response_uses_context(
270
- self,
271
- response: str,
272
- contexts: List[Dict[str, Any]]
273
- ) -> bool:
274
- """
275
- Validate if response uses the provided context, allowing for summarization and synthesis
276
- """
277
- if not contexts:
278
- return False
279
-
280
- response_lower = response.lower()
281
-
282
- # Check for semantic relevance rather than exact keyword matching
283
- # Look for broader topic matches and concepts
284
- context_text = " ".join([ctx.get('content', '') for ctx in contexts if ctx.get('content')])
285
- context_lower = context_text.lower()
286
-
287
- # Check for relevant topics and concepts that indicate proper grounding
288
- book_related_terms = [
289
- 'physical ai', 'humanoid', 'robotics', 'module', 'ros', 'digital twin',
290
- 'ai-brain', 'vla', 'curriculum', 'course', 'book', 'chapter', 'section',
291
- 'setup', 'quickstart', 'introduction', 'learning', 'education'
292
- ]
293
-
294
- # Check if response mentions book-related concepts that are in context
295
- response_has_relevant_terms = any(term in response_lower for term in book_related_terms)
296
- context_has_relevant_terms = any(term in context_lower for term in book_related_terms)
297
-
298
- if response_has_relevant_terms and context_has_relevant_terms:
299
- return True
300
-
301
- # Also check for title and chapter references
302
- context_titles = " ".join([ctx.get('title', '') + ' ' + ctx.get('chapter', '') + ' ' + ctx.get('section', '')
303
- for ctx in contexts if ctx.get('title') or ctx.get('chapter') or ctx.get('section')])
304
- context_titles_lower = context_titles.lower()
305
-
306
- # If response mentions specific titles/chapters that exist in context, it's likely valid
307
- if any(title_term in response_lower for title_term in context_titles_lower.split() if len(title_term) > 3):
308
- return True
309
-
310
- # Fallback: check if there's general overlap in concepts
311
- response_words = set(response_lower.split())
312
- context_words = set(context_lower.split())
313
-
314
- # Find intersection of meaningful words (longer than 3 chars, not common words)
315
- common_words = response_words.intersection(context_words)
316
- meaningful_common_words = [w for w in common_words if len(w) > 3 and w not in ['the', 'and', 'for', 'are', 'but', 'not', 'you', 'have', 'with', 'this', 'that', 'from']]
317
-
318
- # If there are meaningful overlapping words, consider it valid
319
- return len(meaningful_common_words) >= 2
320
-
321
- async def generate_citation_aware_response(
322
- self,
323
- query: str,
324
- retrieved_contexts: List[Dict[str, Any]],
325
- query_type: str = "global",
326
- selected_text: Optional[str] = None,
327
- session_id: Optional[str] = None
328
- ) -> Dict[str, Any]:
329
- """
330
- Generate a response that explicitly mentions which sources were used
331
- """
332
- # Build context string from retrieved contexts
333
- context_parts = []
334
- for ctx in retrieved_contexts:
335
- context_text = ctx.get('content', '')
336
- if context_text:
337
- context_parts.append(context_text)
338
-
339
- context_string = "\n\n".join(context_parts)
340
-
341
- # Add specific instruction about citations to the context
342
- citation_context = (
343
- context_string +
344
- "\n\nWhen answering, please indicate which sources you used by referencing them as "
345
- "[Source 1], [Source 2], etc., corresponding to the order they appear in the context section."
346
- )
347
-
348
- # Prepare messages for OpenRouter
349
- messages = [
350
- {
351
- "role": "system",
352
- "content": "You are an expert assistant for the Physical AI & Humanoid Robotics curriculum. Provide helpful, conversational responses based on the provided context. Always use information only from the provided context and be factual. When answering, please indicate which sources you used by referencing them as [Source 1], [Source 2], etc., corresponding to the order they appear in the context section."
353
- },
354
- {
355
- "role": "user",
356
- "content": f"Context: {citation_context}\n\nQuestion: {query}\n\nProvide a helpful response based on the context:"
357
- }
358
- ]
359
-
360
- # Generate response using OpenRouter
361
- openrouter_response = await self.openrouter_client.generate_completion(
362
- messages=messages,
363
- model="mistralai/devstral-2512:free",
364
- temperature=0.3,
365
- max_tokens=1000
366
- )
367
-
368
- if not openrouter_response:
369
- return {
370
- "response": "I encountered an issue generating a response. Please try again.",
371
- "citations": [],
372
- "query_type": query_type,
373
- "session_id": session_id
374
- }
375
-
376
- citations = self._extract_citations(retrieved_contexts)
377
-
378
- return {
379
- "response": openrouter_response,
380
- "citations": citations,
381
- "query_type": query_type,
382
- "session_id": session_id
383
- }
384
-
385
-
386
- # Global instance
387
- response_generator = ResponseGenerator()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app/ingestion/__pycache__/chunker.cpython-312.pyc DELETED
Binary file (10.1 kB)
 
app/ingestion/__pycache__/document_parser.cpython-312.pyc DELETED
Binary file (6.08 kB)
 
app/ingestion/__pycache__/file_scanner.cpython-312.pyc DELETED
Binary file (4.87 kB)
 
app/ingestion/chunker.py DELETED
@@ -1,291 +0,0 @@
1
- import re
2
- import uuid
3
- from typing import List, Dict, Tuple
4
- from dataclasses import dataclass
5
- from .document_parser import DocumentParser
6
-
7
-
8
- @dataclass
9
- class TextChunk:
10
- """Represents a chunk of text with metadata"""
11
- id: str
12
- content: str
13
- title: str
14
- chapter: str
15
- section: str
16
- page_reference: str
17
- token_count: int
18
- original_start_pos: int
19
- original_end_pos: int
20
-
21
-
22
- class TextChunker:
23
- """
24
- Implements heading-aware text chunking to maintain semantic boundaries
25
- """
26
-
27
- def __init__(self, max_tokens: int = 800, min_tokens: int = 100, overlap_ratio: float = 0.1):
28
- self.max_tokens = max_tokens
29
- self.min_tokens = min_tokens
30
- self.overlap_ratio = overlap_ratio
31
- self.parser = DocumentParser()
32
-
33
- def chunk_document(self, document: Dict) -> List[TextChunk]:
34
- """
35
- Chunk a document while preserving semantic boundaries
36
- """
37
- content = document['content']
38
- title = document['title']
39
- chapter = document['chapter']
40
- section = document['section']
41
- file_path = document['file_path']
42
-
43
- # First, identify structural boundaries (headings)
44
- structure = self._identify_structure(content)
45
-
46
- if not structure:
47
- # If no headings found, chunk by max token size
48
- return self._chunk_by_size(content, title, chapter, section, file_path)
49
-
50
- # Split content by structural boundaries
51
- structured_chunks = self._split_by_structure(content, structure)
52
-
53
- # Further chunk large structural sections if needed
54
- final_chunks = []
55
- for i, (start_pos, end_pos, heading_context) in enumerate(structured_chunks):
56
- chunk_content = content[start_pos:end_pos]
57
-
58
- if self._count_tokens(chunk_content) > self.max_tokens:
59
- # Split large sections while preserving heading context
60
- sub_chunks = self._chunk_large_section(
61
- chunk_content,
62
- title,
63
- chapter,
64
- section,
65
- file_path,
66
- heading_context,
67
- start_pos
68
- )
69
- final_chunks.extend(sub_chunks)
70
- else:
71
- # Create a single chunk for this section
72
- chunk_id = str(uuid.uuid4())
73
- token_count = self._count_tokens(chunk_content)
74
-
75
- chunk = TextChunk(
76
- id=chunk_id,
77
- content=chunk_content,
78
- title=title,
79
- chapter=chapter,
80
- section=section,
81
- page_reference=file_path,
82
- token_count=token_count,
83
- original_start_pos=start_pos,
84
- original_end_pos=end_pos
85
- )
86
- final_chunks.append(chunk)
87
-
88
- return final_chunks
89
-
90
- def _identify_structure(self, content: str) -> List[Tuple[int, str]]:
91
- """
92
- Identify structural boundaries in the content (headings)
93
- Returns list of (position, heading_text) tuples
94
- """
95
- lines = content.split('\n')
96
- structure = []
97
- pos = 0
98
-
99
- for line in lines:
100
- # Check for markdown headings
101
- heading_match = re.match(r'^(\#{1,6})\s+(.+)', line)
102
- if heading_match:
103
- level = len(heading_match.group(1))
104
- heading_text = heading_match.group(2).strip()
105
- structure.append((pos, f"{'#' * level} {heading_text}"))
106
-
107
- pos += len(line) + 1 # +1 for newline
108
-
109
- return structure
110
-
111
- def _split_by_structure(self, content: str, structure: List[Tuple[int, str]]) -> List[Tuple[int, int, str]]:
112
- """
113
- Split content by structural boundaries
114
- Returns list of (start_pos, end_pos, heading_context) tuples
115
- """
116
- if not structure:
117
- return [(0, len(content), "")]
118
-
119
- splits = []
120
- start_pos = 0
121
-
122
- for pos, heading in structure:
123
- if pos > start_pos:
124
- # Add the chunk before this heading
125
- splits.append((start_pos, pos, heading))
126
- start_pos = pos
127
-
128
- # Add the final chunk if there's remaining content
129
- if start_pos < len(content):
130
- splits.append((start_pos, len(content), structure[-1][1]))
131
-
132
- return splits
133
-
134
- def _chunk_by_size(self, content: str, title: str, chapter: str, section: str, file_path: str) -> List[TextChunk]:
135
- """
136
- Fallback method to chunk content by size when no structure is available
137
- """
138
- chunks = []
139
- tokens_per_chunk = self._estimate_tokens_per_chunk()
140
- chunk_size = tokens_per_chunk * 4 # Rough estimate: 4 chars per token
141
-
142
- for i in range(0, len(content), chunk_size):
143
- chunk_content = content[i:i + chunk_size]
144
- chunk_id = str(uuid.uuid4())
145
-
146
- chunk = TextChunk(
147
- id=chunk_id,
148
- content=chunk_content,
149
- title=title,
150
- chapter=chapter,
151
- section=section,
152
- page_reference=file_path,
153
- token_count=self._count_tokens(chunk_content),
154
- original_start_pos=i,
155
- original_end_pos=min(i + chunk_size, len(content))
156
- )
157
- chunks.append(chunk)
158
-
159
- return chunks
160
-
161
- def _chunk_large_section(
162
- self,
163
- content: str,
164
- title: str,
165
- chapter: str,
166
- section: str,
167
- file_path: str,
168
- heading_context: str,
169
- offset: int
170
- ) -> List[TextChunk]:
171
- """
172
- Further chunk a large section while preserving heading context
173
- """
174
- chunks = []
175
-
176
- # If we have heading context, prepend it to each chunk
177
- context_prefix = f"{heading_context}\n\n" if heading_context else ""
178
-
179
- # Split content into sentences to find good break points
180
- sentences = self._split_into_sentences(content)
181
-
182
- current_chunk = ""
183
- current_tokens = 0
184
- chunk_idx = 0
185
-
186
- for sentence in sentences:
187
- sentence_tokens = self._count_tokens(sentence)
188
-
189
- # If adding this sentence would exceed the token limit
190
- if current_tokens + sentence_tokens > self.max_tokens and current_chunk:
191
- # Save the current chunk
192
- chunk_id = str(uuid.uuid4())
193
- chunk = TextChunk(
194
- id=chunk_id,
195
- content=context_prefix + current_chunk,
196
- title=title,
197
- chapter=chapter,
198
- section=section,
199
- page_reference=file_path,
200
- token_count=current_tokens,
201
- original_start_pos=offset + content.find(current_chunk),
202
- original_end_pos=offset + content.find(current_chunk) + len(current_chunk)
203
- )
204
- chunks.append(chunk)
205
-
206
- # Start a new chunk with potential overlap
207
- overlap_size = int(len(current_chunk) * self.overlap_ratio)
208
- overlap_content = current_chunk[-overlap_size:] if overlap_size > 0 else ""
209
-
210
- current_chunk = overlap_content + sentence
211
- current_tokens = self._count_tokens(context_prefix + current_chunk)
212
- chunk_idx += 1
213
- else:
214
- # Add sentence to current chunk
215
- current_chunk += sentence
216
- current_tokens += sentence_tokens
217
-
218
- # Add the final chunk if it has content
219
- if current_chunk.strip():
220
- chunk_id = str(uuid.uuid4())
221
- chunk = TextChunk(
222
- id=chunk_id,
223
- content=context_prefix + current_chunk,
224
- title=title,
225
- chapter=chapter,
226
- section=section,
227
- page_reference=file_path,
228
- token_count=self._count_tokens(context_prefix + current_chunk),
229
- original_start_pos=offset + content.find(current_chunk),
230
- original_end_pos=offset + content.find(current_chunk) + len(current_chunk)
231
- )
232
- chunks.append(chunk)
233
-
234
- return chunks
235
-
236
- def _split_into_sentences(self, text: str) -> List[str]:
237
- """
238
- Split text into sentences using common sentence boundaries
239
- """
240
- # Use regex to split on sentence boundaries while preserving the delimiters
241
- sentence_pattern = r'(?<=[.!?])\s+'
242
- sentences = re.split(sentence_pattern, text)
243
-
244
- # Re-add the punctuation to each sentence
245
- result = []
246
- for i, sentence in enumerate(sentences):
247
- if i < len(sentences) - 1:
248
- # Check if the original text had punctuation at the end of this sentence
249
- # by looking at the character that followed this sentence in the original text
250
- next_char_idx = len(''.join(sentences[:i+1])) + i # +i for spaces
251
- if next_char_idx < len(text):
252
- next_char = text[next_char_idx] if next_char_idx < len(text) else ''
253
- if next_char in '.!?':
254
- sentence += next_char
255
- result.append(sentence + ' ')
256
-
257
- # Clean up and ensure each sentence is properly formatted
258
- result = [s.strip() for s in result if s.strip()]
259
- return result
260
-
261
- def _count_tokens(self, text: str) -> int:
262
- """
263
- Count approximate number of tokens in text
264
- This is a simple estimation; for more accurate counting, use tiktoken
265
- """
266
- import tiktoken
267
- # Use cl100k_base encoding which is used by many OpenAI models
268
- encoding = tiktoken.get_encoding("cl100k_base")
269
- return len(encoding.encode(text))
270
-
271
- def _estimate_tokens_per_chunk(self) -> int:
272
- """
273
- Estimate number of tokens that would fit in a chunk based on max_tokens
274
- """
275
- # This is a rough estimation - in practice, you might want to use
276
- # a more sophisticated approach based on your specific content
277
- return min(self.max_tokens, 800) # Conservative estimate
278
-
279
-
280
- def chunk_documents(documents: List[Dict]) -> List[TextChunk]:
281
- """
282
- Convenience function to chunk a list of documents
283
- """
284
- chunker = TextChunker()
285
- all_chunks = []
286
-
287
- for document in documents:
288
- chunks = chunker.chunk_document(document)
289
- all_chunks.extend(chunks)
290
-
291
- return all_chunks
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app/ingestion/document_parser.py DELETED
@@ -1,146 +0,0 @@
1
- import os
2
- import re
3
- from typing import List, Dict, Optional
4
- from pathlib import Path
5
- import markdown
6
- from bs4 import BeautifulSoup
7
-
8
-
9
- class DocumentParser:
10
- """
11
- Parses markdown files and extracts content with structural information
12
- """
13
-
14
- def __init__(self):
15
- pass
16
-
17
- def parse_markdown_file(self, file_path: str) -> Dict:
18
- """
19
- Parse a markdown file and extract content with structural information
20
- """
21
- with open(file_path, 'r', encoding='utf-8') as file:
22
- content = file.read()
23
-
24
- # Extract metadata from frontmatter if present
25
- metadata = self._extract_frontmatter(content)
26
-
27
- # Extract structural information (headings)
28
- structure = self._extract_structure(content)
29
-
30
- # Get clean content without frontmatter
31
- clean_content = self._remove_frontmatter(content)
32
-
33
- # Extract title from the first heading or filename
34
- title = metadata.get('title') or self._extract_title(clean_content) or Path(file_path).stem
35
-
36
- # Determine chapter/section from file path
37
- path_parts = Path(file_path).parts
38
- chapter = self._extract_chapter_info(path_parts)
39
- section = self._extract_section_info(path_parts)
40
-
41
- return {
42
- 'title': title,
43
- 'content': clean_content,
44
- 'chapter': chapter,
45
- 'section': section,
46
- 'file_path': file_path,
47
- 'metadata': metadata,
48
- 'structure': structure
49
- }
50
-
51
- def _extract_frontmatter(self, content: str) -> Dict:
52
- """
53
- Extract YAML frontmatter from markdown content
54
- """
55
- import yaml
56
-
57
- # Look for YAML frontmatter between --- delimiters
58
- frontmatter_match = re.match(r'^---\s*\n(.*?)\n---\s*\n', content, re.DOTALL)
59
-
60
- if frontmatter_match:
61
- try:
62
- frontmatter = yaml.safe_load(frontmatter_match.group(1))
63
- return frontmatter or {}
64
- except yaml.YAMLError:
65
- return {}
66
-
67
- return {}
68
-
69
- def _remove_frontmatter(self, content: str) -> str:
70
- """
71
- Remove YAML frontmatter from content
72
- """
73
- frontmatter_match = re.match(r'^---\s*\n(.*?)\n---\s*\n', content, re.DOTALL)
74
-
75
- if frontmatter_match:
76
- return content[frontmatter_match.end():]
77
-
78
- return content
79
-
80
- def _extract_structure(self, content: str) -> List[Dict]:
81
- """
82
- Extract structural information (headings) from markdown content
83
- """
84
- # Convert markdown to HTML to easily extract headings
85
- html = markdown.markdown(content)
86
- soup = BeautifulSoup(html, 'html.parser')
87
-
88
- structure = []
89
- for i, heading in enumerate(soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6'])):
90
- structure.append({
91
- 'level': int(heading.name[1]),
92
- 'text': heading.get_text().strip(),
93
- 'position': i
94
- })
95
-
96
- return structure
97
-
98
- def _extract_title(self, content: str) -> Optional[str]:
99
- """
100
- Extract title from the first heading in the content
101
- """
102
- lines = content.split('\n')
103
- for line in lines:
104
- # Check for markdown heading pattern
105
- heading_match = re.match(r'^#+\s+(.+)', line)
106
- if heading_match:
107
- return heading_match.group(1).strip()
108
-
109
- return None
110
-
111
- def _extract_chapter_info(self, path_parts: tuple) -> str:
112
- """
113
- Extract chapter information from file path
114
- """
115
- # Look for common chapter-related directory names
116
- for part in path_parts:
117
- if 'chapter' in part.lower() or 'module' in part.lower():
118
- return part
119
-
120
- # If no chapter directory found, use the directory name
121
- if len(path_parts) > 1:
122
- return path_parts[-2] # Parent directory of the file
123
-
124
- return 'unknown'
125
-
126
- def _extract_section_info(self, path_parts: tuple) -> str:
127
- """
128
- Extract section information from file path
129
- """
130
- file_name = path_parts[-1]
131
- # Remove file extension
132
- section = Path(file_name).stem
133
- return section
134
-
135
-
136
- def scan_markdown_files(directory: str) -> List[str]:
137
- """
138
- Scan a directory for markdown files
139
- """
140
- markdown_files = []
141
- for root, dirs, files in os.walk(directory):
142
- for file in files:
143
- if file.lower().endswith(('.md', '.markdown')):
144
- markdown_files.append(os.path.join(root, file))
145
-
146
- return markdown_files
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app/ingestion/file_scanner.py DELETED
@@ -1,92 +0,0 @@
1
- import os
2
- from typing import List, Dict, Optional
3
- from pathlib import Path
4
- from .document_parser import DocumentParser, scan_markdown_files
5
-
6
-
7
- class FileScanner:
8
- """
9
- Scans and processes markdown files from the Docusaurus documentation directory
10
- """
11
-
12
- def __init__(self, base_path: str = "docusaurus/docs"):
13
- self.base_path = base_path
14
- self.parser = DocumentParser()
15
-
16
- def scan_and_parse_documents(self) -> List[Dict]:
17
- """
18
- Scan the documentation directory and parse all markdown files
19
- """
20
- if not os.path.exists(self.base_path):
21
- raise FileNotFoundError(f"Documentation directory not found: {self.base_path}")
22
-
23
- markdown_files = scan_markdown_files(self.base_path)
24
- documents = []
25
-
26
- for file_path in markdown_files:
27
- try:
28
- document = self.parser.parse_markdown_file(file_path)
29
- documents.append(document)
30
- except Exception as e:
31
- print(f"Error parsing file {file_path}: {str(e)}")
32
- continue
33
-
34
- return documents
35
-
36
- def validate_document(self, document: Dict) -> bool:
37
- """
38
- Validate document structure and content
39
- """
40
- required_fields = ['title', 'content', 'chapter', 'section', 'file_path']
41
- for field in required_fields:
42
- if field not in document or not document[field]:
43
- return False
44
-
45
- # Check content length
46
- if len(document['content'].strip()) < 10:
47
- return False
48
-
49
- return True
50
-
51
- def get_document_stats(self, documents: List[Dict]) -> Dict:
52
- """
53
- Get statistics about the parsed documents
54
- """
55
- total_docs = len(documents)
56
- valid_docs = sum(1 for doc in documents if self.validate_document(doc))
57
- total_chars = sum(len(doc['content']) for doc in documents)
58
- unique_chapters = len(set(doc['chapter'] for doc in documents))
59
-
60
- return {
61
- 'total_documents': total_docs,
62
- 'valid_documents': valid_docs,
63
- 'invalid_documents': total_docs - valid_docs,
64
- 'total_characters': total_chars,
65
- 'unique_chapters': unique_chapters,
66
- 'average_length': total_chars // total_docs if total_docs > 0 else 0
67
- }
68
-
69
-
70
- def main():
71
- """
72
- Main function to demonstrate file scanning
73
- """
74
- # Use the docusaurus docs directory by default, or allow override
75
- scanner = FileScanner()
76
- documents = scanner.scan_and_parse_documents()
77
-
78
- print(f"Found {len(documents)} documents")
79
- stats = scanner.get_document_stats(documents)
80
- print(f"Statistics: {stats}")
81
-
82
- # Print first document as example
83
- if documents:
84
- print(f"\nFirst document example:")
85
- print(f"Title: {documents[0]['title']}")
86
- print(f"Chapter: {documents[0]['chapter']}")
87
- print(f"Section: {documents[0]['section']}")
88
- print(f"Content preview: {documents[0]['content'][:200]}...")
89
-
90
-
91
- if __name__ == "__main__":
92
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app/main.py DELETED
@@ -1,44 +0,0 @@
1
- from fastapi import FastAPI
2
- from fastapi.middleware.cors import CORSMiddleware
3
-
4
- app = FastAPI(
5
- title="RAG Chatbot API",
6
- description="API for RAG-based question answering for Physical AI & Humanoid Robotics book",
7
- version="1.0.0"
8
- )
9
-
10
- # Add CORS middleware
11
- app.add_middleware(
12
- CORSMiddleware,
13
- allow_origins=["*"], # In production, replace with specific origins
14
- allow_credentials=True,
15
- allow_methods=["*"],
16
- allow_headers=["*"],
17
- )
18
-
19
- @app.get("/")
20
- async def root():
21
- return {"message": "RAG Chatbot API is running!"}
22
-
23
- @app.get("/health")
24
- async def health_check():
25
- return {"status": "healthy", "service": "RAG Chatbot API"}
26
-
27
-
28
- @app.on_event("startup")
29
- async def startup_event():
30
- """
31
- Create database tables on startup
32
- """
33
- from app.database.models import create_tables
34
- create_tables() # Create tables with correct schema (using checkfirst to avoid errors)
35
-
36
- # Include API routes
37
- from app.api import health, ingest, chat
38
- app.include_router(health.router, prefix="/api")
39
- app.include_router(ingest.router, prefix="/api")
40
- app.include_router(chat.router, prefix="/api")
41
-
42
- if __name__ == "__main__":
43
- import uvicorn
44
- uvicorn.run(app, host="0.0.0.0", port=8000)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app/models/__pycache__/chat.cpython-312.pyc DELETED
Binary file (2.76 kB)
 
app/models/chat.py DELETED
@@ -1,69 +0,0 @@
1
- from pydantic import BaseModel
2
- from typing import List, Dict, Optional
3
- from datetime import datetime
4
-
5
-
6
- class ChatSession(BaseModel):
7
- """
8
- Model for chat session data
9
- """
10
- session_id: str
11
- user_id: Optional[str] = None
12
- created_at: datetime
13
- updated_at: datetime
14
- metadata: Optional[Dict] = None
15
-
16
-
17
- class ChatMessage(BaseModel):
18
- """
19
- Model for chat message data
20
- """
21
- message_id: str
22
- session_id: str
23
- role: str # "user" or "assistant"
24
- content: str
25
- citations: Optional[List[Dict[str, str]]] = None
26
- query_context_id: Optional[str] = None
27
- timestamp: datetime
28
-
29
-
30
- class QueryContext(BaseModel):
31
- """
32
- Model for query context data
33
- """
34
- context_id: str
35
- session_id: str
36
- selected_text: Optional[str] = None
37
- query_type: str # "global" or "selection"
38
- created_at: datetime
39
-
40
-
41
- class ChatRequest(BaseModel):
42
- """
43
- Model for chat API request
44
- """
45
- session_id: str
46
- message: str
47
- selected_text: Optional[str] = None
48
- query_type: str = "global" # "global" or "selection"
49
- top_k: int = 5
50
-
51
-
52
- class ChatResponse(BaseModel):
53
- """
54
- Model for chat API response
55
- """
56
- response: str
57
- citations: List[Dict[str, str]]
58
- session_id: str
59
- query_type: str
60
- timestamp: str
61
-
62
-
63
- class ChatHistoryResponse(BaseModel):
64
- """
65
- Model for chat history API response
66
- """
67
- session_id: str
68
- messages: List[ChatMessage]
69
- timestamp: str
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app/prompting/__pycache__/context_filter.cpython-312.pyc DELETED
Binary file (8.18 kB)
 
app/prompting/__pycache__/prompt_builder.cpython-312.pyc DELETED
Binary file (7.12 kB)
 
app/prompting/context_filter.py DELETED
@@ -1,205 +0,0 @@
1
- from typing import List, Dict, Any, Optional
2
- from app.retrieval.retriever import Retriever
3
- from app.prompting.prompt_builder import PromptBuilder
4
-
5
-
6
- class ContextFilter:
7
- """
8
- Filters and validates contexts to prevent information leakage between query types
9
- """
10
-
11
- def __init__(self):
12
- self.retriever = Retriever()
13
- self.prompt_builder = PromptBuilder()
14
-
15
- def filter_context_for_query_type(
16
- self,
17
- contexts: List[Dict[str, Any]],
18
- query_type: str,
19
- selected_text: Optional[str] = None
20
- ) -> List[Dict[str, Any]]:
21
- """
22
- Filter contexts based on query type to prevent information leakage
23
- """
24
- if query_type == "selection" and selected_text:
25
- # For selection-based queries, we need to ensure contexts are relevant
26
- # to the selected text and don't introduce unrelated global knowledge
27
- return self._filter_selection_contexts(contexts, selected_text)
28
- elif query_type == "global":
29
- # For global queries, we can use all retrieved contexts
30
- return contexts
31
- else:
32
- # Default to global behavior
33
- return contexts
34
-
35
- def _filter_selection_contexts(
36
- self,
37
- contexts: List[Dict[str, Any]],
38
- selected_text: str
39
- ) -> List[Dict[str, Any]]:
40
- """
41
- Filter contexts to ensure they're relevant to the selected text for selection-based queries
42
- """
43
- if not contexts or not selected_text:
44
- return contexts
45
-
46
- # Simple relevance check: ensure contexts have some connection to the selected text
47
- # In a more sophisticated implementation, you might use semantic similarity
48
- selected_keywords = set(selected_text.lower().split()[:10]) # Use first 10 words as keywords
49
- filtered_contexts = []
50
-
51
- for context in contexts:
52
- content = context.get('content', '').lower()
53
- content_words = set(content.split())
54
-
55
- # Check if there's significant overlap in keywords
56
- keyword_overlap = len(selected_keywords.intersection(content_words))
57
- keyword_ratio = keyword_overlap / len(selected_keywords) if selected_keywords else 0
58
-
59
- # Include context if it has some relevance to the selected text
60
- # or if we don't have enough contexts yet
61
- if keyword_ratio > 0.1 or len(filtered_contexts) < 2: # At least 10% overlap or include first few
62
- filtered_contexts.append(context)
63
-
64
- return filtered_contexts
65
-
66
- def validate_context_isolation(
67
- self,
68
- contexts: List[Dict[str, Any]],
69
- query_type: str,
70
- selected_text: Optional[str] = None
71
- ) -> Dict[str, Any]:
72
- """
73
- Validate that contexts are properly isolated based on query type
74
- """
75
- validation_result = {
76
- 'is_valid': True,
77
- 'query_type': query_type,
78
- 'context_count': len(contexts),
79
- 'issues': []
80
- }
81
-
82
- if query_type == "selection" and selected_text:
83
- # Validate that contexts are related to selected text
84
- validation_result.update(self._validate_selection_contexts(contexts, selected_text))
85
- elif query_type == "global":
86
- # For global queries, validate that we have diverse contexts
87
- validation_result.update(self._validate_global_contexts(contexts))
88
-
89
- return validation_result
90
-
91
- def _validate_selection_contexts(
92
- self,
93
- contexts: List[Dict[str, Any]],
94
- selected_text: str
95
- ) -> Dict[str, Any]:
96
- """
97
- Validate selection-based contexts
98
- """
99
- result = {
100
- 'is_valid': True,
101
- 'issues': []
102
- }
103
-
104
- if not contexts:
105
- result['is_valid'] = False
106
- result['issues'].append("No contexts provided for selection-based query")
107
- return result
108
-
109
- # Check relevance to selected text
110
- relevant_count = 0
111
- selected_keywords = set(selected_text.lower().split()[:10])
112
-
113
- for context in contexts:
114
- content = context.get('content', '').lower()
115
- content_words = set(content.split())
116
- keyword_overlap = len(selected_keywords.intersection(content_words))
117
-
118
- if keyword_overlap > 0:
119
- relevant_count += 1
120
-
121
- relevance_ratio = relevant_count / len(contexts)
122
- if relevance_ratio < 0.3: # Less than 30% of contexts are relevant
123
- result['is_valid'] = False
124
- result['issues'].append(
125
- f"Only {relevant_count}/{len(contexts)} contexts ({relevance_ratio:.1%}) "
126
- f"are relevant to the selected text"
127
- )
128
-
129
- return result
130
-
131
- def _validate_global_contexts(self, contexts: List[Dict[str, Any]]) -> Dict[str, Any]:
132
- """
133
- Validate global contexts
134
- """
135
- result = {
136
- 'is_valid': True,
137
- 'issues': []
138
- }
139
-
140
- if not contexts:
141
- result['is_valid'] = False
142
- result['issues'].append("No contexts provided for global query")
143
- return result
144
-
145
- # Check for diversity in chapters/sections
146
- unique_chapters = set(ctx.get('chapter', '') for ctx in contexts)
147
- unique_sections = set(ctx.get('section', '') for ctx in contexts)
148
-
149
- if len(unique_chapters) < 2 and len(contexts) > 1:
150
- result['issues'].append("Contexts lack diversity - all from same chapter")
151
-
152
- return result
153
-
154
- def enforce_context_boundaries(
155
- self,
156
- contexts: List[Dict[str, Any]],
157
- query_type: str,
158
- selected_text: Optional[str] = None,
159
- max_contexts: int = 5
160
- ) -> List[Dict[str, Any]]:
161
- """
162
- Enforce strict boundaries on contexts to prevent information leakage
163
- """
164
- # First, filter based on query type
165
- filtered_contexts = self.filter_context_for_query_type(contexts, query_type, selected_text)
166
-
167
- # Then enforce maximum count
168
- if len(filtered_contexts) > max_contexts:
169
- filtered_contexts = filtered_contexts[:max_contexts]
170
-
171
- # Validate the final contexts
172
- validation = self.validate_context_isolation(filtered_contexts, query_type, selected_text)
173
-
174
- if not validation['is_valid']:
175
- print(f"Context validation warning: {validation['issues']}")
176
-
177
- return filtered_contexts
178
-
179
- def build_isolated_context_string(
180
- self,
181
- contexts: List[Dict[str, Any]],
182
- query_type: str,
183
- selected_text: Optional[str] = None
184
- ) -> str:
185
- """
186
- Build a context string that enforces isolation between query types
187
- """
188
- if query_type == "selection" and selected_text:
189
- # Build context string focused on selected text and related content
190
- context_str = f"SELECTED TEXT:\n{selected_text}\n\nRELATED CONTENT:\n"
191
- for i, ctx in enumerate(contexts):
192
- context_str += f"[{i+1}] {ctx.get('content', '')}\n"
193
- context_str += f"Source: {ctx.get('chapter', '')} - {ctx.get('section', '')}\n\n"
194
- else:
195
- # Build global context string
196
- context_str = "RETRIEVED CONTENT:\n"
197
- for i, ctx in enumerate(contexts):
198
- context_str += f"[{i+1}] {ctx.get('content', '')}\n"
199
- context_str += f"Source: {ctx.get('chapter', '')} - {ctx.get('section', '')}\n\n"
200
-
201
- return context_str
202
-
203
-
204
- # Global instance
205
- context_filter = ContextFilter()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app/prompting/prompt_builder.py DELETED
@@ -1,187 +0,0 @@
1
- from typing import List, Dict, Any, Optional
2
- from app.retrieval.retriever import Retriever
3
- from app.config import settings
4
-
5
-
6
- class PromptBuilder:
7
- """
8
- Builds context-aware prompts for the LLM while preventing hallucinations
9
- """
10
-
11
- def __init__(self):
12
- self.retriever = Retriever()
13
-
14
- def build_global_query_prompt(
15
- self,
16
- query: str,
17
- retrieved_contexts: List[Dict[str, Any]],
18
- max_context_length: int = 2000
19
- ) -> str:
20
- """
21
- Build a prompt for global book queries using all retrieved contexts
22
- """
23
- # Start with system message to prevent hallucinations
24
- system_prompt = (
25
- "You are an AI assistant that answers questions based only on the provided context. "
26
- "Do not use any prior knowledge or information not present in the context. "
27
- "If the answer cannot be found in the provided context, respond with: "
28
- "'The provided context does not contain information to answer this question.'\n\n"
29
- )
30
-
31
- # Add retrieved contexts
32
- context_section = "### CONTEXT:\n\n"
33
- total_context_length = 0
34
-
35
- for i, ctx in enumerate(retrieved_contexts):
36
- if total_context_length >= max_context_length:
37
- break
38
-
39
- context_text = ctx.get('content', '')
40
- # Truncate if too long
41
- if len(context_text) + total_context_length > max_context_length:
42
- available_length = max_context_length - total_context_length
43
- context_text = context_text[:available_length]
44
-
45
- context_section += f"**Source {i+1} ({ctx.get('title', 'Unknown')} - {ctx.get('chapter', 'Unknown')}):**\n"
46
- context_section += f"{context_text}\n\n"
47
- total_context_length += len(context_text)
48
-
49
- # Add user query
50
- user_query_section = f"### QUESTION:\n{query}\n\n"
51
-
52
- # Add instruction for response format
53
- response_format = (
54
- "### INSTRUCTIONS:\n"
55
- "1. Answer the question based ONLY on the provided context\n"
56
- "2. If the context doesn't contain the answer, say so explicitly\n"
57
- "3. Include relevant citations to the sources in your response\n"
58
- "4. Keep your response concise and to the point\n\n"
59
- )
60
-
61
- # Combine all parts
62
- full_prompt = system_prompt + context_section + user_query_section + response_format
63
- return full_prompt
64
-
65
- def build_selection_based_prompt(
66
- self,
67
- query: str,
68
- selected_text: str,
69
- retrieved_contexts: List[Dict[str, Any]],
70
- max_context_length: int = 2000
71
- ) -> str:
72
- """
73
- Build a prompt for selection-based queries using only the selected text and relevant contexts
74
- """
75
- # Start with system message
76
- system_prompt = (
77
- "You are an AI assistant that answers questions based only on the provided selected text and related context. "
78
- "Do not use any prior knowledge or information not present in the provided content. "
79
- "Focus your answer on the relationship between the selected text and the question. "
80
- "If the answer cannot be found in the provided content, respond with: "
81
- "'The provided content does not contain information to answer this question.'\n\n"
82
- )
83
-
84
- # Add the selected text as primary context
85
- primary_context = f"### SELECTED TEXT:\n{selected_text}\n\n"
86
-
87
- # Add related contexts
88
- related_context = "### RELATED CONTEXT:\n\n"
89
- total_context_length = len(selected_text)
90
-
91
- for i, ctx in enumerate(retrieved_contexts):
92
- if total_context_length >= max_context_length:
93
- break
94
-
95
- context_text = ctx.get('content', '')
96
- # Truncate if too long
97
- if len(context_text) + total_context_length > max_context_length:
98
- available_length = max_context_length - total_context_length
99
- context_text = context_text[:available_length]
100
-
101
- related_context += f"**Related Content {i+1} ({ctx.get('title', 'Unknown')} - {ctx.get('chapter', 'Unknown')}):**\n"
102
- related_context += f"{context_text}\n\n"
103
- total_context_length += len(context_text)
104
-
105
- # Add user query
106
- user_query_section = f"### QUESTION ABOUT SELECTED TEXT:\n{query}\n\n"
107
-
108
- # Add instruction for response format
109
- response_format = (
110
- "### INSTRUCTIONS:\n"
111
- "1. Answer the question based ONLY on the selected text and related context\n"
112
- "2. Focus on how the question relates to the selected text\n"
113
- "3. If the content doesn't contain the answer, say so explicitly\n"
114
- "4. Include citations to the sources in your response\n"
115
- "5. Keep your response concise and relevant to the selected text\n\n"
116
- )
117
-
118
- # Combine all parts
119
- full_prompt = system_prompt + primary_context + related_context + user_query_section + response_format
120
- return full_prompt
121
-
122
- def build_context_filter_prompt(
123
- self,
124
- query: str,
125
- available_contexts: List[Dict[str, Any]],
126
- max_contexts_to_use: int = 3
127
- ) -> str:
128
- """
129
- Build a prompt to filter relevant contexts from a larger set
130
- """
131
- system_prompt = (
132
- "You are an AI assistant that helps filter relevant contexts for a given question. "
133
- "Analyze the question and the provided contexts, then select only the most relevant ones.\n\n"
134
- )
135
-
136
- # Add contexts
137
- contexts_section = "### AVAILABLE CONTEXTS:\n\n"
138
- for i, ctx in enumerate(available_contexts):
139
- contexts_section += f"**Context {i+1} ({ctx.get('title', 'Unknown')} - {ctx.get('chapter', 'Unknown')}):**\n"
140
- contexts_section += f"{ctx.get('content', '')[:500]}...\n\n" # Truncate for brevity
141
-
142
- # Add query
143
- query_section = f"### QUESTION:\n{query}\n\n"
144
-
145
- # Add instructions
146
- instruction_section = (
147
- "### INSTRUCTIONS:\n"
148
- "1. Identify which contexts are most relevant to answering the question\n"
149
- "2. List the most relevant contexts by their number\n"
150
- "3. Provide a brief reason for why each selected context is relevant\n"
151
- f"4. Select at most {max_contexts_to_use} contexts\n\n"
152
- )
153
-
154
- # Add response format
155
- response_format = (
156
- "### RESPONSE FORMAT:\n"
157
- "Respond with only the following JSON format:\n"
158
- "{\n"
159
- " \"relevant_contexts\": [\n"
160
- " {\n"
161
- " \"index\": 0,\n"
162
- " \"reason\": \"Brief explanation of relevance\"\n"
163
- " }\n"
164
- " ]\n"
165
- "}\n"
166
- )
167
-
168
- full_prompt = system_prompt + contexts_section + query_section + instruction_section + response_format
169
- return full_prompt
170
-
171
- def validate_prompt_context_isolation(self, prompt: str, query_type: str, original_context: Optional[str] = None) -> bool:
172
- """
173
- Validate that the prompt properly isolates contexts based on query type
174
- """
175
- if query_type == "selection" and original_context:
176
- # For selection queries, ensure the prompt focuses on the selected text
177
- # This is a basic validation - in practice, you'd want more sophisticated checks
178
- return "SELECTED TEXT" in prompt or original_context[:50] in prompt
179
- else:
180
- # For global queries, ensure the prompt uses broader context
181
- return "CONTEXT:" in prompt
182
-
183
- return True
184
-
185
-
186
- # Global instance
187
- prompt_builder = PromptBuilder()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app/retrieval/__pycache__/retriever.cpython-312.pyc DELETED
Binary file (6.85 kB)
 
app/retrieval/__pycache__/vector_search.cpython-312.pyc DELETED
Binary file (4.78 kB)
 
app/retrieval/retriever.py DELETED
@@ -1,149 +0,0 @@
1
- from typing import List, Dict, Any, Optional
2
- from app.retrieval.vector_search import VectorSearchEngine
3
-
4
-
5
- class Retriever:
6
- """
7
- High-level retriever that handles the complete retrieval process
8
- """
9
-
10
- def __init__(self):
11
- self.vector_search = VectorSearchEngine()
12
-
13
- async def retrieve_relevant_documents(
14
- self,
15
- query: str,
16
- top_k: int = 5,
17
- query_type: str = "global", # "global" or "selection"
18
- selected_text: Optional[str] = None,
19
- filters: Optional[Dict] = None
20
- ) -> List[Dict[str, Any]]:
21
- """
22
- Retrieve relevant documents based on the query and query type
23
- """
24
- if query_type == "selection" and selected_text:
25
- # For selection-based queries, we use the selected text as context
26
- # but still search for relevant content in the book
27
- # This approach focuses on content related to the selected text
28
- search_query = f"{selected_text} {query}".strip()
29
- else:
30
- # For global queries, we search with the original query
31
- search_query = query
32
-
33
- # Perform the search
34
- results = await self.vector_search.search_with_query(
35
- query=search_query,
36
- top_k=top_k,
37
- filters=filters
38
- )
39
-
40
- # Apply ranking and filtering
41
- ranked_results = self.vector_search.rank_results_by_relevance(results, query)
42
- filtered_results = self.vector_search.filter_results(ranked_results, filters)
43
-
44
- return filtered_results
45
-
46
- async def retrieve_with_context_filtering(
47
- self,
48
- query: str,
49
- top_k: int = 5,
50
- query_type: str = "global",
51
- selected_text: Optional[str] = None
52
- ) -> List[Dict[str, Any]]:
53
- """
54
- Retrieve documents with special handling for different query types
55
- """
56
- if query_type == "selection" and selected_text:
57
- # For selection-based queries, we might want to prioritize results
58
- # that are semantically similar to both the selected text and the query
59
- combined_query = f"Context: {selected_text}\nQuestion: {query}"
60
- results = await self.vector_search.search_with_query(
61
- query=combined_query,
62
- top_k=top_k
63
- )
64
- else:
65
- # For global queries, search normally
66
- results = await self.vector_search.search_with_query(
67
- query=query,
68
- top_k=top_k
69
- )
70
-
71
- # Apply additional filtering to ensure results are relevant
72
- # For selection-based queries, we might want to ensure results are related to the selected text
73
- if query_type == "selection" and selected_text:
74
- # Filter results to ensure they're related to the selected text context
75
- filtered_results = []
76
- for result in results:
77
- # This is a simple check - in practice, you might want more sophisticated filtering
78
- content = result.get('content', '').lower()
79
- selected_lower = selected_text.lower()
80
-
81
- # Check if the result content has some relation to the selected text
82
- # This could be improved with semantic similarity checks
83
- if any(word in content for word in selected_lower.split()[:5]): # Check first 5 words
84
- filtered_results.append(result)
85
- elif len(filtered_results) < top_k: # Add some results even if not perfectly matched
86
- filtered_results.append(result)
87
-
88
- results = filtered_results
89
-
90
- return results
91
-
92
- async def retrieve_for_citation(self, query: str, top_k: int = 3) -> List[Dict[str, Any]]:
93
- """
94
- Retrieve documents specifically for citation purposes
95
- This method focuses on getting clean, citable content
96
- """
97
- results = await self.retrieve_relevant_documents(query, top_k)
98
-
99
- # Format results for citation
100
- citations = []
101
- for result in results:
102
- citation = {
103
- 'document_id': result.get('id'),
104
- 'title': result.get('title'),
105
- 'chapter': result.get('chapter'),
106
- 'section': result.get('section'),
107
- 'page_reference': result.get('page_reference'),
108
- 'content': result.get('content'),
109
- 'relevance_score': result.get('score')
110
- }
111
- citations.append(citation)
112
-
113
- return citations
114
-
115
- def validate_retrieval_quality(self, results: List[Dict[str, Any]], query: str) -> Dict[str, Any]:
116
- """
117
- Validate the quality of the retrieval results
118
- """
119
- quality_metrics = {
120
- 'total_results': len(results),
121
- 'avg_relevance_score': sum(r.get('score', 0) for r in results) / len(results) if results else 0,
122
- 'has_high_quality_results': any(r.get('score', 0) > 0.7 for r in results) if results else False,
123
- 'query_coverage': self._assess_query_coverage(results, query)
124
- }
125
-
126
- return quality_metrics
127
-
128
- def _assess_query_coverage(self, results: List[Dict[str, Any]], query: str) -> float:
129
- """
130
- Assess how well the results cover the query topics
131
- This is a simplified implementation
132
- """
133
- if not results:
134
- return 0.0
135
-
136
- query_keywords = set(query.lower().split())
137
- covered_keywords = set()
138
-
139
- for result in results:
140
- content = result.get('content', '').lower()
141
- result_keywords = set(content.split())
142
- covered_keywords.update(query_keywords.intersection(result_keywords))
143
-
144
- coverage = len(covered_keywords) / len(query_keywords) if query_keywords else 0.0
145
- return min(coverage, 1.0) # Ensure value is between 0 and 1
146
-
147
-
148
- # Global instance
149
- retriever = Retriever()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app/retrieval/vector_search.py DELETED
@@ -1,103 +0,0 @@
1
- from typing import List, Dict, Any, Optional
2
- from app.vector_store.qdrant_client import QdrantVectorStore
3
- from app.embeddings.minimal_embedding_generator import minimal_embedding_generator
4
- from app.config import settings
5
-
6
-
7
- class VectorSearchEngine:
8
- """
9
- Core vector search engine that handles semantic search operations
10
- """
11
-
12
- def __init__(self):
13
- self.qdrant_client = QdrantVectorStore()
14
- self.top_k_default = 5
15
-
16
- async def search_with_query(self, query: str, top_k: int = 5, filters: Optional[Dict] = None) -> List[Dict[str, Any]]:
17
- """
18
- Perform semantic search using a query string
19
- """
20
- # Generate embedding for the query using minimal generator
21
- query_embedding = minimal_embedding_generator.encode_query(query)
22
-
23
- if not query_embedding:
24
- return []
25
-
26
- # Perform vector search in Qdrant
27
- chapter_filter = filters.get('chapter') if filters else None
28
- search_results = self.qdrant_client.search_similar(
29
- query_embedding=query_embedding,
30
- top_k=top_k,
31
- chapter_filter=chapter_filter
32
- )
33
-
34
- return search_results
35
-
36
- async def search_with_embedding(self, query_embedding: List[float], top_k: int = 5, filters: Optional[Dict] = None) -> List[Dict[str, Any]]:
37
- """
38
- Perform semantic search using a pre-computed embedding
39
- """
40
- chapter_filter = filters.get('chapter') if filters else None
41
- search_results = self.qdrant_client.search_similar(
42
- query_embedding=query_embedding,
43
- top_k=top_k,
44
- chapter_filter=chapter_filter
45
- )
46
-
47
- return search_results
48
-
49
- def rank_results_by_relevance(self, results: List[Dict[str, Any]], query: str) -> List[Dict[str, Any]]:
50
- """
51
- Apply additional ranking based on relevance to the query
52
- This is a simple implementation; in production, you might want to use more sophisticated ranking
53
- """
54
- # For now, we'll just return the results as Qdrant already ranks by similarity score
55
- # In the future, we could implement additional ranking based on:
56
- # - keyword matching in title/content
57
- # - recency of content
58
- # - content length relative to query needs
59
- return sorted(results, key=lambda x: x.get('score', 0), reverse=True)
60
-
61
- def filter_results(self, results: List[Dict[str, Any]], filters: Optional[Dict] = None) -> List[Dict[str, Any]]:
62
- """
63
- Apply additional filtering to search results
64
- """
65
- if not filters:
66
- return results
67
-
68
- filtered_results = []
69
- for result in results:
70
- include = True
71
-
72
- # Apply content-based filters
73
- if 'min_score' in filters:
74
- if result.get('score', 0) < filters['min_score']:
75
- include = False
76
-
77
- if 'required_keywords' in filters:
78
- content = result.get('content', '').lower()
79
- for keyword in filters['required_keywords']:
80
- if keyword.lower() not in content:
81
- include = False
82
- break
83
-
84
- if include:
85
- filtered_results.append(result)
86
-
87
- return filtered_results
88
-
89
- async def get_document_content(self, doc_id: str) -> Optional[Dict[str, Any]]:
90
- """
91
- Retrieve content of a specific document by ID
92
- """
93
- return self.qdrant_client.get_document_by_id(doc_id)
94
-
95
- def get_collection_stats(self) -> Dict[str, Any]:
96
- """
97
- Get statistics about the vector collection
98
- """
99
- return self.qdrant_client.get_collection_info()
100
-
101
-
102
- # Global instance
103
- vector_search_engine = VectorSearchEngine()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app/services/__pycache__/chat_service.cpython-312.pyc DELETED
Binary file (5.69 kB)
 
app/services/__pycache__/gemini_client.cpython-312.pyc DELETED
Binary file (6.58 kB)
 
app/services/__pycache__/openrouter_client.cpython-312.pyc DELETED
Binary file (8.86 kB)
 
app/services/chat_service.py DELETED
@@ -1,144 +0,0 @@
1
- from typing import Dict, Any, List, Optional
2
- from datetime import datetime
3
- import uuid
4
-
5
- from app.retrieval.retriever import Retriever
6
- from app.prompting.context_filter import ContextFilter
7
- from app.generation.response_generator import ResponseGenerator
8
- from app.database.repositories import ChatSessionRepository, ChatMessageRepository, QueryContextRepository
9
- from app.database.database import get_db
10
-
11
-
12
- class ChatService:
13
- """
14
- Service class that orchestrates the chat functionality
15
- """
16
-
17
- def __init__(self):
18
- self.retriever = Retriever()
19
- self.context_filter = ContextFilter()
20
- self.response_generator = ResponseGenerator()
21
-
22
- async def process_chat_message(
23
- self,
24
- session_id: str,
25
- message: str,
26
- query_type: str = "global",
27
- selected_text: Optional[str] = None,
28
- top_k: int = 5
29
- ) -> Dict[str, Any]:
30
- """
31
- Process a chat message through the full RAG pipeline
32
- """
33
- # Retrieve relevant documents
34
- retrieved_docs = await self.retriever.retrieve_with_context_filtering(
35
- query=message,
36
- top_k=top_k,
37
- query_type=query_type,
38
- selected_text=selected_text
39
- )
40
-
41
- # Apply context filtering to ensure proper isolation
42
- filtered_docs = self.context_filter.enforce_context_boundaries(
43
- contexts=retrieved_docs,
44
- query_type=query_type,
45
- selected_text=selected_text
46
- )
47
-
48
- # Generate response
49
- response_data = await self.response_generator.generate_response_with_validation(
50
- query=message,
51
- retrieved_contexts=filtered_docs,
52
- query_type=query_type,
53
- selected_text=selected_text,
54
- session_id=session_id
55
- )
56
-
57
- # Store conversation in database
58
- await self._store_conversation(session_id, message, response_data)
59
-
60
- return response_data
61
-
62
- async def _store_conversation(self, session_id: str, user_message: str, response_data: Dict[str, Any]):
63
- """
64
- Store the conversation in the database
65
- """
66
- db_gen = get_db()
67
- db = next(db_gen)
68
- try:
69
- # Create or update session
70
- session_repo = ChatSessionRepository(db)
71
- existing_session = session_repo.get_session_by_id(session_id)
72
- if not existing_session:
73
- session_repo.create_session(session_id=session_id)
74
-
75
- # Store user message
76
- user_message_id = f"msg_{uuid.uuid4().hex[:8]}"
77
- message_repo = ChatMessageRepository(db)
78
- message_repo.create_message(
79
- message_id=user_message_id,
80
- session_id=session_id,
81
- role="user",
82
- content=user_message
83
- )
84
-
85
- # Store assistant response
86
- assistant_message_id = f"msg_{uuid.uuid4().hex[:8]}"
87
- citations_for_storage = response_data.get("citations", [])
88
- message_repo.create_message(
89
- message_id=assistant_message_id,
90
- session_id=session_id,
91
- role="assistant",
92
- content=response_data.get("response", ""),
93
- citations=citations_for_storage
94
- )
95
- finally:
96
- next(db_gen, None) # Close the db session
97
-
98
- async def get_chat_history(self, session_id: str) -> List[Dict[str, Any]]:
99
- """
100
- Retrieve chat history for a session
101
- """
102
- db_gen = get_db()
103
- db = next(db_gen)
104
- try:
105
- message_repo = ChatMessageRepository(db)
106
- messages = message_repo.get_messages_by_session(session_id)
107
-
108
- return [
109
- {
110
- "message_id": msg.message_id,
111
- "role": msg.role,
112
- "content": msg.content,
113
- "timestamp": msg.timestamp.isoformat() if msg.timestamp else None,
114
- "citations": msg.citations
115
- }
116
- for msg in messages
117
- ]
118
- finally:
119
- next(db_gen, None)
120
-
121
- def validate_query_params(
122
- self,
123
- query_type: str,
124
- selected_text: Optional[str] = None
125
- ) -> Dict[str, Any]:
126
- """
127
- Validate query parameters
128
- """
129
- errors = []
130
-
131
- if query_type not in ["global", "selection"]:
132
- errors.append("query_type must be either 'global' or 'selection'")
133
-
134
- if query_type == "selection" and not selected_text:
135
- errors.append("selected_text is required for selection-based queries")
136
-
137
- return {
138
- "is_valid": len(errors) == 0,
139
- "errors": errors
140
- }
141
-
142
-
143
- # Global instance
144
- chat_service = ChatService()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app/services/openrouter_client.py DELETED
@@ -1,165 +0,0 @@
1
- import asyncio
2
- import httpx
3
- from typing import List, Dict, Any, Optional
4
- from app.config import settings
5
-
6
-
7
- class OpenRouterClient:
8
- """
9
- Client for interacting with OpenRouter API for both embeddings and chat completions
10
- """
11
-
12
- def __init__(self):
13
- self.base_url = settings.OPENROUTER_BASE_URL
14
- self.api_key = settings.OPENROUTER_API_KEY
15
- self.max_retries = 3
16
- self.retry_delay = 1
17
-
18
- async def generate_embeddings(self, texts: List[str], model: str = "text-embedding-ada-002") -> List[List[float]]:
19
- """
20
- Generate embeddings for a list of texts using OpenRouter API
21
- """
22
- headers = {
23
- "Authorization": f"Bearer {self.api_key}",
24
- "Content-Type": "application/json"
25
- }
26
-
27
- embeddings = []
28
- for text in texts:
29
- # Truncate text if it's too long
30
- max_length = 8000 # Conservative limit
31
- if len(text) > max_length:
32
- text = text[:max_length]
33
-
34
- data = {
35
- "model": model,
36
- "input": text
37
- }
38
-
39
- async with httpx.AsyncClient(timeout=30.0) as client:
40
- for attempt in range(self.max_retries):
41
- try:
42
- response = await client.post(
43
- f"{self.base_url}/embeddings",
44
- headers=headers,
45
- json=data
46
- )
47
-
48
- if response.status_code == 200:
49
- result = response.json()
50
- embedding = result['data'][0]['embedding']
51
- embeddings.append(embedding)
52
- break
53
- elif response.status_code == 429:
54
- # Rate limited - wait and retry
55
- wait_time = self.retry_delay * (2 ** attempt) # Exponential backoff
56
- print(f"Rate limited, waiting {wait_time}s before retry {attempt + 1}")
57
- await asyncio.sleep(wait_time)
58
- continue
59
- else:
60
- print(f"Error {response.status_code}: {response.text}")
61
- if attempt == self.max_retries - 1:
62
- # Last attempt, return zeros as fallback
63
- embeddings.append([0.0] * 1536)
64
- break
65
-
66
- except httpx.RequestError as e:
67
- print(f"Request error on attempt {attempt + 1}: {str(e)}")
68
- if attempt == self.max_retries - 1:
69
- embeddings.append([0.0] * 1536)
70
- await asyncio.sleep(self.retry_delay * (2 ** attempt))
71
- except Exception as e:
72
- print(f"Unexpected error on attempt {attempt + 1}: {str(e)}")
73
- if attempt == self.max_retries - 1:
74
- embeddings.append([0.0] * 1536)
75
- await asyncio.sleep(self.retry_delay * (2 ** attempt))
76
-
77
- return embeddings
78
-
79
- async def generate_completion(
80
- self,
81
- messages: List[Dict[str, str]],
82
- model: str = "mistralai/devstral-2512:free",
83
- temperature: float = 0.7,
84
- max_tokens: int = 1000
85
- ) -> Optional[str]:
86
- """
87
- Generate completion using OpenRouter API with specified model
88
- """
89
- headers = {
90
- "Authorization": f"Bearer {self.api_key}",
91
- "Content-Type": "application/json"
92
- }
93
-
94
- data = {
95
- "model": model,
96
- "messages": messages,
97
- "temperature": temperature,
98
- "max_tokens": max_tokens
99
- }
100
-
101
- async with httpx.AsyncClient(timeout=60.0) as client:
102
- for attempt in range(self.max_retries):
103
- try:
104
- response = await client.post(
105
- f"{self.base_url}/chat/completions",
106
- headers=headers,
107
- json=data
108
- )
109
-
110
- if response.status_code == 200:
111
- result = response.json()
112
- return result['choices'][0]['message']['content']
113
- elif response.status_code == 429:
114
- # Rate limited - wait and retry
115
- wait_time = self.retry_delay * (2 ** attempt) # Exponential backoff
116
- print(f"Rate limited, waiting {wait_time}s before retry {attempt + 1}")
117
- await asyncio.sleep(wait_time)
118
- continue
119
- else:
120
- print(f"Error {response.status_code}: {response.text}")
121
- if attempt == self.max_retries - 1:
122
- return None
123
- break
124
-
125
- except httpx.RequestError as e:
126
- print(f"Request error on attempt {attempt + 1}: {str(e)}")
127
- if attempt == self.max_retries - 1:
128
- return None
129
- await asyncio.sleep(self.retry_delay * (2 ** attempt))
130
- except Exception as e:
131
- print(f"Unexpected error on attempt {attempt + 1}: {str(e)}")
132
- if attempt == self.max_retries - 1:
133
- return None
134
- await asyncio.sleep(self.retry_delay * (2 ** attempt))
135
-
136
- return None
137
-
138
- async def get_model_info(self, model: str) -> Optional[Dict[str, Any]]:
139
- """
140
- Get information about a specific model
141
- """
142
- headers = {
143
- "Authorization": f"Bearer {self.api_key}",
144
- "Content-Type": "application/json"
145
- }
146
-
147
- try:
148
- async with httpx.AsyncClient(timeout=30.0) as client:
149
- response = await client.get(
150
- f"{self.base_url}/models/{model}",
151
- headers=headers
152
- )
153
-
154
- if response.status_code == 200:
155
- return response.json()
156
- else:
157
- print(f"Error getting model info {response.status_code}: {response.text}")
158
- return None
159
- except Exception as e:
160
- print(f"Error getting model info: {str(e)}")
161
- return None
162
-
163
-
164
- # Global client instance
165
- openrouter_client = OpenRouterClient()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app/vector_store/__pycache__/qdrant_client.cpython-312.pyc DELETED
Binary file (8.25 kB)
 
app/vector_store/__pycache__/vector_repository.cpython-312.pyc DELETED
Binary file (2.59 kB)
 
app/vector_store/qdrant_client.py DELETED
@@ -1,207 +0,0 @@
1
- from typing import List, Dict, Any, Optional
2
- from qdrant_client import QdrantClient
3
- from qdrant_client.http import models
4
- from app.config import settings
5
- from app.ingestion.chunker import TextChunk
6
-
7
-
8
- class QdrantVectorStore:
9
- """
10
- Qdrant vector database client for storing and retrieving embeddings
11
- """
12
-
13
- def __init__(self):
14
- try:
15
- self.client = QdrantClient(
16
- url=settings.QDRANT_URL,
17
- api_key=settings.QDRANT_API_KEY,
18
- prefer_grpc=False, # Using HTTP for better compatibility
19
- timeout=60.0 # Increase timeout for large batch operations
20
- )
21
- self.collection_name = "book_content_chunks"
22
- self.vector_size = 1536 # Standard embedding size for text-embedding-ada-002
23
- self._initialize_collection()
24
- except Exception as e:
25
- print(f"[WARN] Could not connect to Qdrant: {e}")
26
- print("[WARN] Qdrant functionality will be unavailable until connection is restored")
27
- # Initialize with None values when connection fails
28
- self.client = None
29
- self.collection_name = "book_content_chunks"
30
- self.vector_size = 1536
31
-
32
- def _initialize_collection(self):
33
- """
34
- Initialize the collection if it doesn't exist
35
- """
36
- if self.client is None:
37
- return # Skip initialization if no client
38
-
39
- try:
40
- # Check if collection exists
41
- self.client.get_collection(self.collection_name)
42
- print(f"[INFO] Collection '{self.collection_name}' already exists")
43
- except:
44
- # Create collection if it doesn't exist
45
- self.client.create_collection(
46
- collection_name=self.collection_name,
47
- vectors_config=models.VectorParams(
48
- size=self.vector_size,
49
- distance=models.Distance.COSINE
50
- )
51
- )
52
- print(f"[INFO] Created collection '{self.collection_name}'")
53
-
54
- # Create payload index for faster filtering
55
- self.client.create_payload_index(
56
- collection_name=self.collection_name,
57
- field_name="chapter",
58
- field_schema=models.PayloadSchemaType.KEYWORD
59
- )
60
-
61
- self.client.create_payload_index(
62
- collection_name=self.collection_name,
63
- field_name="section",
64
- field_schema=models.PayloadSchemaType.KEYWORD
65
- )
66
-
67
- def store_embeddings(self, chunks_with_embeddings: List[Dict[str, Any]]):
68
- """
69
- Store chunks with their embeddings in Qdrant
70
- """
71
- if self.client is None:
72
- print("[WARN] Cannot store embeddings - Qdrant not connected")
73
- return
74
-
75
- points = []
76
- for item in chunks_with_embeddings:
77
- point = models.PointStruct(
78
- id=item['id'],
79
- vector=item['embedding'],
80
- payload={
81
- 'content': item['content'],
82
- 'title': item['title'],
83
- 'chapter': item['chapter'],
84
- 'section': item['section'],
85
- 'page_reference': item['page_reference'],
86
- 'token_count': item['token_count']
87
- }
88
- )
89
- points.append(point)
90
-
91
- # Upload points in smaller batches to avoid timeouts
92
- batch_size = 16 # Smaller batch size to avoid timeouts
93
- for i in range(0, len(points), batch_size):
94
- batch = points[i:i + batch_size]
95
- self.client.upsert(
96
- collection_name=self.collection_name,
97
- points=batch
98
- )
99
- # Add a small delay between batches to avoid overwhelming the server
100
- import time
101
- time.sleep(0.1)
102
-
103
- def search_similar(self, query_embedding: List[float], top_k: int = 5, chapter_filter: Optional[str] = None) -> List[Dict[str, Any]]:
104
- """
105
- Search for similar content based on embedding similarity
106
- """
107
- if self.client is None:
108
- print("[WARN] Cannot search - Qdrant not connected")
109
- return []
110
-
111
- # Build filters if needed
112
- filters = None
113
- if chapter_filter:
114
- filters = models.Filter(
115
- must=[
116
- models.FieldCondition(
117
- key="chapter",
118
- match=models.MatchValue(value=chapter_filter)
119
- )
120
- ]
121
- )
122
-
123
- # Perform search
124
- search_results = self.client.search(
125
- collection_name=self.collection_name,
126
- query_vector=query_embedding,
127
- query_filter=filters,
128
- limit=top_k,
129
- with_payload=True
130
- )
131
-
132
- # Format results
133
- results = []
134
- for result in search_results:
135
- results.append({
136
- 'id': result.id,
137
- 'content': result.payload['content'],
138
- 'title': result.payload['title'],
139
- 'chapter': result.payload['chapter'],
140
- 'section': result.payload['section'],
141
- 'page_reference': result.payload['page_reference'],
142
- 'score': result.score
143
- })
144
-
145
- return results
146
-
147
- def get_document_by_id(self, doc_id: str) -> Optional[Dict[str, Any]]:
148
- """
149
- Retrieve a specific document by its ID
150
- """
151
- if self.client is None:
152
- print("[WARN] Cannot retrieve document - Qdrant not connected")
153
- return None
154
-
155
- points = self.client.retrieve(
156
- collection_name=self.collection_name,
157
- ids=[doc_id],
158
- with_payload=True
159
- )
160
-
161
- if points:
162
- point = points[0]
163
- return {
164
- 'id': point.id,
165
- 'content': point.payload['content'],
166
- 'title': point.payload['title'],
167
- 'chapter': point.payload['chapter'],
168
- 'section': point.payload['section'],
169
- 'page_reference': point.payload['page_reference']
170
- }
171
-
172
- return None
173
-
174
- def delete_collection(self):
175
- """
176
- Delete the entire collection (use with caution!)
177
- """
178
- if self.client is None:
179
- print("[WARN] Cannot delete collection - Qdrant not connected")
180
- return
181
-
182
- self.client.delete_collection(self.collection_name)
183
-
184
- def get_collection_info(self) -> Dict[str, Any]:
185
- """
186
- Get information about the collection
187
- """
188
- if self.client is None:
189
- print("[WARN] Cannot get collection info - Qdrant not connected")
190
- return {
191
- 'name': self.collection_name,
192
- 'vector_size': self.vector_size,
193
- 'distance': 'COSINE',
194
- 'point_count': 0
195
- }
196
-
197
- info = self.client.get_collection(self.collection_name)
198
- return {
199
- 'name': self.collection_name,
200
- 'vector_size': info.config.params.vectors.size,
201
- 'distance': info.config.params.vectors.distance,
202
- 'point_count': info.points_count
203
- }
204
-
205
-
206
- # Global instance
207
- qdrant_client = QdrantVectorStore()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app/vector_store/vector_repository.py DELETED
@@ -1,49 +0,0 @@
1
- from typing import List, Dict, Any, Optional
2
- from app.vector_store.qdrant_client import QdrantVectorStore, qdrant_client
3
- from app.ingestion.chunker import TextChunk
4
-
5
-
6
- class VectorRepository:
7
- """
8
- Repository class for vector store operations
9
- """
10
-
11
- def __init__(self, vector_store: QdrantVectorStore):
12
- self.vector_store = vector_store
13
-
14
- def store_document_chunks(self, chunks_with_embeddings: List[Dict[str, Any]]):
15
- """
16
- Store document chunks with embeddings in the vector store
17
- """
18
- self.vector_store.store_embeddings(chunks_with_embeddings)
19
-
20
- def search_relevant_chunks(
21
- self,
22
- query_embedding: List[float],
23
- top_k: int = 5,
24
- chapter_filter: Optional[str] = None
25
- ) -> List[Dict[str, Any]]:
26
- """
27
- Search for relevant chunks based on query embedding
28
- """
29
- return self.vector_store.search_similar(
30
- query_embedding=query_embedding,
31
- top_k=top_k,
32
- chapter_filter=chapter_filter
33
- )
34
-
35
- def get_document_by_id(self, doc_id: str) -> Optional[Dict[str, Any]]:
36
- """
37
- Retrieve a document by its ID
38
- """
39
- return self.vector_store.get_document_by_id(doc_id)
40
-
41
- def get_collection_stats(self) -> Dict[str, Any]:
42
- """
43
- Get statistics about the vector collection
44
- """
45
- return self.vector_store.get_collection_info()
46
-
47
-
48
- # Global instance
49
- vector_repository = VectorRepository(qdrant_client)