langgraph-rag-agent / src /llm_utils.py
Harsh-1132's picture
hf
a77376b
"""
LLM utilities for handling different LLM providers.
Supports OpenAI and Hugging Face models.
"""
import os
from typing import Optional, List
from dotenv import load_dotenv
# Load environment variables
load_dotenv(override=True)
class LLMHandler:
"""Handler for different LLM providers."""
def __init__(
self,
provider: str = "openai",
model_name: Optional[str] = None,
temperature: float = 0.7,
max_tokens: int = 500
):
"""
Initialize LLM handler.
Args:
provider: LLM provider ("openai" or "huggingface")
model_name: Model name (optional, uses default if not provided)
temperature: Temperature for generation
max_tokens: Maximum tokens to generate
"""
self.provider = provider.lower()
self.temperature = temperature
self.max_tokens = max_tokens
self.model = None
self.tokenizer = None
self.embedding_model = None
if self.provider == "openai":
self._initialize_openai(model_name)
elif self.provider == "huggingface":
self._initialize_huggingface(model_name)
else:
raise ValueError(f"Unsupported provider: {provider}")
def _initialize_openai(self, model_name: Optional[str] = None):
"""Initialize OpenAI client."""
try:
from openai import OpenAI
api_key = os.getenv("OPENAI_API_KEY")
if not api_key:
raise ValueError("OPENAI_API_KEY not found in environment variables")
self.client = OpenAI(api_key=api_key)
self.model_name = model_name or os.getenv("OPENAI_MODEL", "gpt-3.5-turbo")
print(f"✓ OpenAI client initialized with model: {self.model_name}")
except ImportError:
raise ImportError("OpenAI package not installed. Run: pip install openai")
except Exception as e:
raise Exception(f"Failed to initialize OpenAI: {e}")
def _initialize_huggingface(self, model_name: Optional[str] = None):
"""Initialize Hugging Face model."""
try:
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, AutoModelForCausalLM
import torch
# Get model name from parameter or environment
if model_name is None:
model_name = os.getenv(
"HUGGINGFACE_MODEL",
"google/flan-t5-large"
)
self.model_name = model_name
print(f"Initializing LLM: huggingface - {self.model_name}")
# Get HF token
hf_token = os.getenv("HUGGINGFACE_API_TOKEN")
if not hf_token:
print("⚠️ Warning: HUGGINGFACE_API_TOKEN not found. Some models may not be accessible.")
# Determine device
self.device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {self.device}")
# Load tokenizer
print(f"Loading tokenizer for {self.model_name}...")
self.tokenizer = AutoTokenizer.from_pretrained(
self.model_name,
token=hf_token,
trust_remote_code=True
)
# Load model based on type
print(f"Loading model {self.model_name}...")
# Detect model type
if "t5" in self.model_name.lower() or "flan" in self.model_name.lower():
# Seq2Seq models (T5, Flan-T5)
self.model = AutoModelForSeq2SeqLM.from_pretrained(
self.model_name,
token=hf_token,
trust_remote_code=True,
torch_dtype=torch.float16 if self.device == "cuda" else torch.float32
)
else:
# Causal LM models (Mistral, Llama, etc.)
self.model = AutoModelForCausalLM.from_pretrained(
self.model_name,
token=hf_token,
trust_remote_code=True,
torch_dtype=torch.float16 if self.device == "cuda" else torch.float32
)
self.model.to(self.device)
self.model.eval()
print(f"✓ LLM initialized successfully")
except ImportError as e:
raise ImportError(f"Required packages not installed: {e}")
except Exception as e:
raise Exception(f"Failed to initialize Hugging Face model: {e}")
def generate(
self,
prompt: str,
system_message: Optional[str] = None,
temperature: Optional[float] = None,
max_tokens: Optional[int] = None
) -> str:
"""
Generate text using the LLM.
Args:
prompt: Input prompt
system_message: Optional system message
temperature: Optional temperature override
max_tokens: Optional max tokens override
Returns:
Generated text
"""
temp = temperature if temperature is not None else self.temperature
max_tok = max_tokens if max_tokens is not None else self.max_tokens
if self.provider == "openai":
return self._generate_openai(prompt, system_message, temp, max_tok)
elif self.provider == "huggingface":
return self._generate_huggingface(prompt, system_message, temp, max_tok)
def _generate_openai(
self,
prompt: str,
system_message: Optional[str],
temperature: float,
max_tokens: int
) -> str:
"""Generate using OpenAI."""
messages = []
if system_message:
messages.append({"role": "system", "content": system_message})
messages.append({"role": "user", "content": prompt})
response = self.client.chat.completions.create(
model=self.model_name,
messages=messages,
temperature=temperature,
max_tokens=max_tokens
)
return response.choices[0].message.content
def _generate_huggingface(
self,
prompt: str,
system_message: Optional[str],
temperature: float,
max_tokens: int
) -> str:
"""Generate using Hugging Face."""
import torch
# Construct full prompt
if system_message:
full_prompt = f"{system_message}\n\n{prompt}"
else:
full_prompt = prompt
# Tokenize
inputs = self.tokenizer(
full_prompt,
return_tensors="pt",
truncation=True,
max_length=512
).to(self.device)
# Generate
with torch.no_grad():
outputs = self.model.generate(
**inputs,
max_new_tokens=max_tokens,
temperature=temperature,
do_sample=temperature > 0,
top_p=0.9,
num_return_sequences=1,
pad_token_id=self.tokenizer.eos_token_id
)
# Decode
generated_text = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
# For seq2seq models, return as-is
# For causal models, remove the prompt
if "t5" in self.model_name.lower() or "flan" in self.model_name.lower():
return generated_text.strip()
else:
# Remove the input prompt from output
return generated_text[len(full_prompt):].strip()
def generate_with_context(
self,
query: str,
context: str,
system_message: Optional[str] = None
) -> str:
"""
Generate answer using query and context.
Args:
query: User query
context: Retrieved context
system_message: Optional system message
Returns:
Generated answer
"""
prompt = f"""Context:
{context}
Question: {query}
Answer the question based on the context provided above. Be concise and accurate."""
return self.generate(prompt, system_message)
class EmbeddingHandler:
"""Handler for embedding models."""
def __init__(self, model_name: Optional[str] = None):
"""
Initialize embedding handler.
Args:
model_name: Embedding model name
"""
from sentence_transformers import SentenceTransformer
self.model_name = model_name or os.getenv(
"EMBEDDING_MODEL",
"sentence-transformers/all-MiniLM-L6-v2"
)
print(f"Loading embedding model: {self.model_name}")
self.model = SentenceTransformer(self.model_name)
print(f"✓ Embedding model loaded successfully")
def embed_documents(self, texts: List[str]) -> List[List[float]]:
"""
Embed a list of documents.
Args:
texts: List of text documents
Returns:
List of embeddings
"""
embeddings = self.model.encode(texts, convert_to_numpy=True)
return embeddings.tolist()
def embed_query(self, text: str) -> List[float]:
"""
Embed a single query.
Args:
text: Query text
Returns:
Embedding vector
"""
embedding = self.model.encode(text, convert_to_numpy=True)
return embedding.tolist()
def create_llm_handler(
provider: str = "openai",
model_name: Optional[str] = None,
temperature: float = 0.7,
max_tokens: int = 500
) -> LLMHandler:
"""
Create and return an LLM handler.
Args:
provider: LLM provider
model_name: Model name
temperature: Temperature
max_tokens: Max tokens
Returns:
LLMHandler instance
"""
return LLMHandler(provider, model_name, temperature, max_tokens)
def create_embedding_handler(model_name: Optional[str] = None) -> EmbeddingHandler:
"""
Create and return an embedding handler.
Args:
model_name: Embedding model name
Returns:
EmbeddingHandler instance
"""
return EmbeddingHandler(model_name)