Spaces:
Sleeping
Sleeping
File size: 2,469 Bytes
28a80d3 10b2640 28a80d3 612b0f4 28a80d3 612b0f4 28a80d3 612b0f4 146b2d1 28a80d3 97857a2 28a80d3 65a2085 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 |
import os
from typing import List, Generator, AsyncGenerator
import openai
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
from dotenv import load_dotenv
from langchain_core.documents import Document
load_dotenv()
class RAGProcessor:
def __init__(self, model_name: str = "bsmith3715/legal-ft-demo_final"):
self.model = SentenceTransformer(model_name)
self.index = None
self.documents = []
self.openai_client = openai.OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
def add_documents(self, documents: List[Document]):
"""Add documents to the RAG system."""
self.documents = [doc.page_content for doc in documents]
embeddings = self.model.encode(self.documents)
# Create FAISS index
dimension = embeddings.shape[1]
self.index = faiss.IndexFlatL2(dimension)
self.index.add(embeddings.astype('float32'))
def retrieve_relevant_context(self, query: str, k: int = 3) -> List[str]:
"""Retrieve relevant documents for a given query."""
if not self.index:
return []
query_embedding = self.model.encode([query])
distances, indices = self.index.search(query_embedding.astype('float32'), k)
return [self.documents[i] for i in indices[0]]
async def generate_response(self, query: str) -> AsyncGenerator[str, None]:
"""Generate a streaming response using OpenAI API with retrieved context."""
relevant_docs = self.retrieve_relevant_context(query)
context = "\n".join(relevant_docs)
prompt = f"""Context information is below.
---------------------
{context}
---------------------
Given the context information, please answer the following question. If the context doesn't contain relevant information, say so.
Question: {query}
Answer:"""
stream = self.openai_client.chat.completions.create(
model="gpt-4o-mini",
messages=[
{"role": "system", "content": "You are a helpful Pilates instructor assistant. Use the provided context to answer questions accurately."},
{"role": "user", "content": prompt}
],
temperature=0.1,
max_tokens=1000,
stream=True
)
for chunk in stream:
if chunk.choices[0].delta.content is not None:
yield chunk.choices[0].delta.content |