File size: 4,019 Bytes
78e8dd4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0cfa3a6
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
"""

RAG Query Module

Handles querying the RAG system and extracting answers with sources

"""
from typing import Tuple, Optional, List
from openai import OpenAI


class RAGQueryEngine:
    """Handles RAG queries with source attribution"""
    
    def __init__(self, client: OpenAI, vector_store_id: str, model: str = "gpt-4o-mini"):
        self.client = client
        self.vector_store_id = vector_store_id
        self.model = model
    
    def get_response_from_vectorstore(self, query: str):
        """

        Get response from vector store using OpenAI responses API

        

        Args:

            query: User query

            

        Returns:

            Response object or None if failed

        """
        try:
            response = self.client.responses.create(
                input=query,
                model=self.model,
                tools=[{
                    "type": "file_search",
                    "vector_store_ids": [self.vector_store_id],
                }]
            )
            
            # Check if response is valid
            if response and hasattr(response.output[1], 'content'):
                return response
            else:
                print("⚠️ Invalid response structure")
                return None
        except Exception as e:
            print(f"❌ Error during API call: {e}")
            return None
    
    def query(self, query: str) -> Tuple[str, str]:
        """

        Query the RAG model and return answer with sources

        

        Args:

            query: User query

            

        Returns:

            Tuple of (answer_text, footnotes)

        """
        response = self.get_response_from_vectorstore(query)
        
        if not response:
            return "That question is outside my area of expertise.", ""
        
        # Extract the answer text
        answer_text = response.output[1].content[0].text
        
        # Extract the source files
        footnotes = ""
        if hasattr(response.output[1].content[0], 'annotations'):
            annotations = response.output[1].content[0].annotations
            if annotations:
                # Get unique source files
                source_files = list(set([result.filename for result in annotations]))
                
                # Format the footnotes
                footnotes = "\n\nπŸ“š **Sources:**\n"
                for i, filename in enumerate(source_files, 1):
                    # Remove the ".pdf" extension and format nicely
                    clean_name = filename.replace('.pdf', '')
                    footnotes += f"{i}. {clean_name}\n"
        
        return answer_text, footnotes
    
    def get_files_from_vector_store(self) -> List[str]:
        """

        Get list of files in the vector store

        

        Returns:

            List of filenames

        """
        try:
            query = "List all documents about Mercedes E-class ADAS features"
            response = self.get_response_from_vectorstore(query)
            
            file_list = []
            if response and hasattr(response.output[1].content[0], 'annotations'):
                annotations = response.output[1].content[0].annotations
                file_list = list(set([annotation.filename for annotation in annotations]))
                file_list.sort()
            
            # Fallback to default list if empty
            if not file_list:
                file_list = [
                    "Function of Active Distance Assist DISTRONIC.pdf",
                    "Function of Active Lane Change Assist.pdf",
                    "Function of Active Steering Assist.pdf",
                    "Function of Active Stop-and-Go Assist.pdf"
                ]
            
            return file_list
        except Exception as e:
            print(f"❌ Error getting files: {str(e)}")
            return []