Spaces:
Sleeping
Sleeping
| """ | |
| RAG Query Module | |
| Handles querying the RAG system and extracting answers with sources | |
| """ | |
| from typing import Tuple, Optional, List | |
| from openai import OpenAI | |
| class RAGQueryEngine: | |
| """Handles RAG queries with source attribution""" | |
| def __init__(self, client: OpenAI, vector_store_id: str, model: str = "gpt-4o-mini"): | |
| self.client = client | |
| self.vector_store_id = vector_store_id | |
| self.model = model | |
| def get_response_from_vectorstore(self, query: str): | |
| """ | |
| Get response from vector store using OpenAI responses API | |
| Args: | |
| query: User query | |
| Returns: | |
| Response object or None if failed | |
| """ | |
| try: | |
| response = self.client.responses.create( | |
| input=query, | |
| model=self.model, | |
| tools=[{ | |
| "type": "file_search", | |
| "vector_store_ids": [self.vector_store_id], | |
| }] | |
| ) | |
| # Check if response is valid | |
| if response and hasattr(response.output[1], 'content'): | |
| return response | |
| else: | |
| print("β οΈ Invalid response structure") | |
| return None | |
| except Exception as e: | |
| print(f"β Error during API call: {e}") | |
| return None | |
| def query(self, query: str) -> Tuple[str, str]: | |
| """ | |
| Query the RAG model and return answer with sources | |
| Args: | |
| query: User query | |
| Returns: | |
| Tuple of (answer_text, footnotes) | |
| """ | |
| response = self.get_response_from_vectorstore(query) | |
| if not response: | |
| return "That question is outside my area of expertise.", "" | |
| # Extract the answer text | |
| answer_text = response.output[1].content[0].text | |
| # Extract the source files | |
| footnotes = "" | |
| if hasattr(response.output[1].content[0], 'annotations'): | |
| annotations = response.output[1].content[0].annotations | |
| if annotations: | |
| # Get unique source files | |
| source_files = list(set([result.filename for result in annotations])) | |
| # Format the footnotes | |
| footnotes = "\n\nπ **Sources:**\n" | |
| for i, filename in enumerate(source_files, 1): | |
| # Remove the ".pdf" extension and format nicely | |
| clean_name = filename.replace('.pdf', '') | |
| footnotes += f"{i}. {clean_name}\n" | |
| return answer_text, footnotes | |
| def get_files_from_vector_store(self) -> List[str]: | |
| """ | |
| Get list of files in the vector store | |
| Returns: | |
| List of filenames | |
| """ | |
| try: | |
| query = "List all documents about Mercedes E-class ADAS features" | |
| response = self.get_response_from_vectorstore(query) | |
| file_list = [] | |
| if response and hasattr(response.output[1].content[0], 'annotations'): | |
| annotations = response.output[1].content[0].annotations | |
| file_list = list(set([annotation.filename for annotation in annotations])) | |
| file_list.sort() | |
| # Fallback to default list if empty | |
| if not file_list: | |
| file_list = [ | |
| "Function of Active Distance Assist DISTRONIC.pdf", | |
| "Function of Active Lane Change Assist.pdf", | |
| "Function of Active Steering Assist.pdf", | |
| "Function of Active Stop-and-Go Assist.pdf" | |
| ] | |
| return file_list | |
| except Exception as e: | |
| print(f"β Error getting files: {str(e)}") | |
| return [] | |