Spaces:
Build error
Build error
File size: 5,107 Bytes
40f6dcf | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 | """
Basic Example - API Client
Simple example showing how to use the RAG API programmatically.
"""
import aiohttp
import asyncio
from typing import Dict, Any
class RAGClient:
"""Simple client for RAG API."""
def __init__(self, base_url: str = "http://localhost:8000"):
self.base_url = base_url
self.session = None
async def __aenter__(self):
"""Async context manager for session."""
self.session = aiohttp.ClientSession()
return self
async def __aexit__(self, exc_type, exc_val, exc_tb):
"""Clean up session on exit."""
if self.session:
await self.session.close()
async def ingest_document(
self, content: str, metadata: Dict[str, Any] = None, chunk_strategy: str = "semantic"
) -> Dict[str, Any]:
"""Ingest a document into RAG system."""
url = f"{self.base_url}/ingest"
payload = {
"documents": [{"content": content, "metadata": metadata or {}}],
"chunk_strategy": chunk_strategy,
}
async with self as client:
async with client.post(url, json=payload) as response:
if response.status == 200:
return await response.json()
else:
error_text = await response.text()
raise Exception(f"Ingestion failed: {response.status} - {error_text}")
async def query(
self,
question: str,
top_k: int = 5,
include_sources: bool = True,
include_confidence: bool = True,
) -> Dict[str, Any]:
"""Query the RAG system."""
url = f"{self.base_url}/query"
payload = {
"query": question,
"top_k": top_k,
"include_sources": include_sources,
"include_confidence": include_confidence,
}
async with self as client:
async with client.post(url, json=payload) as response:
if response.status == 200:
return await response.json()
else:
error_text = await response.text()
raise Exception(f"Query failed: {response.status} - {error_text}")
async def get_stats(self) -> Dict[str, Any]:
"""Get RAG system statistics."""
url = f"{self.base_url}/stats"
async with self as client:
async with client.get(url) as response:
if response.status == 200:
return await response.json()
else:
raise Exception(f"Stats request failed: {response.status}")
async def main():
"""Run API client example."""
print("RAG API Client Example")
print("=" * 50)
client = RAGClient("http://localhost:8000")
try:
async with client:
# 1. Check health
print("\n1. Checking health...")
health = await client.get_stats()
print(f" Status: {health.get('status', 'unknown')}")
# 2. Ingest document
print("\n2. Ingesting document...")
doc_content = """
The transformer architecture, introduced in the 2017 paper 'Attention Is All You Need' by Vaswani et al., revolutionized natural language processing. It uses self-attention mechanisms to weigh the importance of different words in a sequence.
Key features include:
- Parallel computation: All positions in the sequence can be processed simultaneously
- Long-range dependencies: Unlike RNNs, transformers can learn long-range dependencies
- Scalability: Can handle very long sequences
- Transfer learning: Pre-trained models can be fine-tuned for specific tasks
"""
result = await client.ingest_document(
content=doc_content,
metadata={"title": "Transformers", "source": "example"},
chunk_strategy="semantic",
)
print(f" Document ID: {result.get('document_ids', ['N/A'])[0]}")
print(f" Chunks created: {result.get('total_chunks', 0)}")
# 3. Query
print("\n3. Querying RAG system...")
query_result = await client.query(
question="What is the transformer architecture?", top_k=5
)
print(f" Answer: {query_result.get('answer', '')[:100]}")
print(f" Confidence: {query_result.get('confidence', 0):.2f}")
print(f" Sources retrieved: {len(query_result.get('sources', []))}")
print(f" Response time: {query_result.get('total_time_ms', 0):.2f}ms")
# 4. Get stats
print("\n4. Getting statistics...")
stats = await client.get_stats()
for key, value in stats.items():
print(f" {key}: {value}")
print("\n" + "=" * 50)
print("API client example completed!")
except Exception as e:
print(f"\nError: {e}")
raise
if __name__ == "__main__":
asyncio.run(main())
|