File size: 2,988 Bytes
06bd253
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
"""
Client library for the RAG API
Use this to call the API from Python code
"""

import requests
from typing import Optional, Dict, List

class RAGAPIClient:
    """Client for the Product Design RAG API"""
    
    def __init__(self, base_url: str = "http://localhost:8000"):
        """
        Initialize the API client
        
        Args:
            base_url: Base URL of the RAG API
        """
        self.base_url = base_url.rstrip('/')
    
    def health_check(self) -> Dict:
        """Check if the API is healthy"""
        try:
            response = requests.get(f"{self.base_url}/health", timeout=5)
            response.raise_for_status()
            return response.json()
        except Exception as e:
            return {"status": "unhealthy", "error": str(e)}
    
    def query(
        self, 
        question: str, 
        top_k: int = 5, 
        max_tokens: int = 1024,
        timeout: int = 5
    ) -> Dict:
        """
        Query the RAG system
        
        Args:
            question: The question to ask
            top_k: Number of documents to retrieve
            max_tokens: Maximum tokens in response
            timeout: Request timeout in seconds
        
        Returns:
            Dictionary with answer, timing, and sources
        """
        try:
            response = requests.post(
                f"{self.base_url}/query",
                json={
                    "question": question,
                    "top_k": top_k,
                    "max_tokens": max_tokens
                },
                timeout=timeout
            )
            response.raise_for_status()
            return response.json()
        except requests.exceptions.Timeout:
            return {
                "success": False,
                "error": f"Request timed out after {timeout} seconds"
            }
        except requests.exceptions.RequestException as e:
            return {
                "success": False,
                "error": f"Request failed: {str(e)}"
            }
    
    def query_fast(self, question: str) -> Dict:
        """
        Fast query with optimized settings for <3 second responses
        
        Args:
            question: The question to ask
        
        Returns:
            Dictionary with answer, timing, and sources
        """
        return self.query(
            question=question,
            top_k=3,  # Fewer docs for speed
            max_tokens=512,  # Shorter responses
            timeout=5
        )

# Example usage
if __name__ == "__main__":
    # Initialize client
    client = RAGAPIClient(base_url="http://localhost:8000")
    
    # Health check
    print("Health check:", client.health_check())
    
    # Query
    result = client.query("What are the three product tiers?")
    print("\nQuery result:")
    print(f"Answer: {result.get('answer', 'N/A')}")
    print(f"Total time: {result.get('total_time', 0):.2f}s")
    print(f"Success: {result.get('success', False)}")