Spaces:
Sleeping
Sleeping
File size: 4,019 Bytes
78e8dd4 0cfa3a6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 |
"""
RAG Query Module
Handles querying the RAG system and extracting answers with sources
"""
from typing import Tuple, Optional, List
from openai import OpenAI
class RAGQueryEngine:
"""Handles RAG queries with source attribution"""
def __init__(self, client: OpenAI, vector_store_id: str, model: str = "gpt-4o-mini"):
self.client = client
self.vector_store_id = vector_store_id
self.model = model
def get_response_from_vectorstore(self, query: str):
"""
Get response from vector store using OpenAI responses API
Args:
query: User query
Returns:
Response object or None if failed
"""
try:
response = self.client.responses.create(
input=query,
model=self.model,
tools=[{
"type": "file_search",
"vector_store_ids": [self.vector_store_id],
}]
)
# Check if response is valid
if response and hasattr(response.output[1], 'content'):
return response
else:
print("β οΈ Invalid response structure")
return None
except Exception as e:
print(f"β Error during API call: {e}")
return None
def query(self, query: str) -> Tuple[str, str]:
"""
Query the RAG model and return answer with sources
Args:
query: User query
Returns:
Tuple of (answer_text, footnotes)
"""
response = self.get_response_from_vectorstore(query)
if not response:
return "That question is outside my area of expertise.", ""
# Extract the answer text
answer_text = response.output[1].content[0].text
# Extract the source files
footnotes = ""
if hasattr(response.output[1].content[0], 'annotations'):
annotations = response.output[1].content[0].annotations
if annotations:
# Get unique source files
source_files = list(set([result.filename for result in annotations]))
# Format the footnotes
footnotes = "\n\nπ **Sources:**\n"
for i, filename in enumerate(source_files, 1):
# Remove the ".pdf" extension and format nicely
clean_name = filename.replace('.pdf', '')
footnotes += f"{i}. {clean_name}\n"
return answer_text, footnotes
def get_files_from_vector_store(self) -> List[str]:
"""
Get list of files in the vector store
Returns:
List of filenames
"""
try:
query = "List all documents about Mercedes E-class ADAS features"
response = self.get_response_from_vectorstore(query)
file_list = []
if response and hasattr(response.output[1].content[0], 'annotations'):
annotations = response.output[1].content[0].annotations
file_list = list(set([annotation.filename for annotation in annotations]))
file_list.sort()
# Fallback to default list if empty
if not file_list:
file_list = [
"Function of Active Distance Assist DISTRONIC.pdf",
"Function of Active Lane Change Assist.pdf",
"Function of Active Steering Assist.pdf",
"Function of Active Stop-and-Go Assist.pdf"
]
return file_list
except Exception as e:
print(f"β Error getting files: {str(e)}")
return []
|