import os from typing import Dict, List, Any, Optional from langsmith import Client import logging import traceback import sys import re import requests from dotenv import load_dotenv # Ensure environment variables are loaded load_dotenv() logger = logging.getLogger(__name__) def validate_api_key(api_key): """Validate if the provided API key follows LangSmith format and can connect""" if not api_key: return False, "API key is empty or None" # Check format - LangSmith API keys typically start with "lsv2_" if not api_key.startswith("lsv2_"): return False, f"API key does not match expected format (should start with 'lsv2_'): {api_key[:5]}..." # Try a simple API call to validate try: endpoint = os.getenv("LANGSMITH_ENDPOINT", "https://api.smith.langchain.com") headers = {"Authorization": f"Bearer {api_key}"} response = requests.get(f"{endpoint}/projects", headers=headers, timeout=5) if response.status_code == 200: return True, "API key is valid" elif response.status_code == 401: return False, f"API key is invalid (401 Unauthorized): {api_key[:5]}..." else: return False, f"API error (status code {response.status_code})" except Exception as e: return False, f"Error validating API key: {str(e)}" class LangSmithTracer: def __init__(self): """Initialize LangSmith tracer for evaluating context quality and prompts.""" # Default to disabled for safety self.tracing_enabled = False self.client = None self.project_name = os.getenv("LANGSMITH_PROJECT", "pythonic-rag") # Initialize LangSmith client try: # Debug environment variables api_key = os.getenv("LANGSMITH_API_KEY") tracing_v2 = os.getenv("LANGCHAIN_TRACING_V2") tracing = os.getenv("LANGSMITH_TRACING") project = os.getenv("LANGSMITH_PROJECT") endpoint = os.getenv("LANGSMITH_ENDPOINT") logger.info(f"LangSmith Environment: LANGSMITH_API_KEY={'present' if api_key else 'missing'}, " f"LANGCHAIN_TRACING_V2={tracing_v2}, LANGSMITH_TRACING={tracing}, " f"LANGSMITH_PROJECT={project}, LANGSMITH_ENDPOINT={endpoint}") # Force-enable tracing if LANGSMITH_TRACING is true if tracing and tracing.lower() == "true": os.environ["LANGCHAIN_TRACING_V2"] = "true" tracing_v2 = "true" # Quick validation to avoid API calls if key is obviously invalid if not api_key or len(api_key) < 10: logger.warning("LangSmith API key missing or invalid. Tracing will be disabled.") return # Initialize client with explicit parameters self.client = Client() self.project_name = project or "pythonic-rag" self.tracing_enabled = tracing_v2 and tracing_v2.lower() == "true" # Try a test API call to confirm it works try: self.client.list_projects(limit=1) logger.info(f"LangSmith client initialized successfully with tracing_enabled={self.tracing_enabled}") except Exception as e: logger.error(f"LangSmith API test failed, disabling tracing: {str(e)}") self.tracing_enabled = False self.client = None except Exception as e: exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] logger.error(f"Error initializing LangSmith client in {fname}, line {exc_tb.tb_lineno}: {str(e)}") logger.error(f"Exception type: {exc_type}, Traceback: {traceback.format_exc()}") def log_retrieval(self, query: str, retrieved_documents: List[Any], user_id: Optional[str] = None, session_id: Optional[str] = None) -> Optional[str]: """ Log document retrieval to LangSmith for evaluation. Args: query: User query retrieved_documents: List of retrieved documents/contexts user_id: User identifier (optional) session_id: Session identifier (optional) Returns: run_id: The LangSmith run ID if tracing is enabled, None otherwise """ if not self.tracing_enabled: return None try: # Create metadata metadata = { "user_id": user_id or "anonymous", "session_id": session_id or "unknown" } # Format retrieved documents for logging context_texts = [] for doc in retrieved_documents: if isinstance(doc, tuple) and len(doc) > 0: context_texts.append(doc[0]) elif hasattr(doc, "page_content"): context_texts.append(doc.page_content) else: context_texts.append(str(doc)) # Log the run using updated API self.client.create_run( name="Document Retrieval", run_type="retriever", inputs={"query": query}, outputs={"retrieved_documents": context_texts}, runtime={ "total_tokens": sum(len(text.split()) for text in context_texts) }, project_name=self.project_name, tags=["retrieval"], metadata=metadata ) logger.info(f"Logged retrieval run to LangSmith") except Exception as e: exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] logger.error(f"Error logging retrieval to LangSmith in {fname}, line {exc_tb.tb_lineno}: {str(e)}") return None def log_rag_generation(self, query: str, context: str, response: str, system_prompt: str, user_prompt: str, user_id: Optional[str] = None, session_id: Optional[str] = None, parent_run_id: Optional[str] = None) -> Optional[str]: """ Log RAG generation to LangSmith for evaluation. Args: query: User query context: Retrieved context response: Generated response system_prompt: System prompt template user_prompt: User prompt template user_id: User identifier (optional) session_id: Session identifier (optional) parent_run_id: Parent run ID for linking retrieval and generation (optional) Returns: run_id: The LangSmith run ID if tracing is enabled, None otherwise """ if not self.tracing_enabled: return None try: # Create metadata metadata = { "user_id": user_id or "anonymous", "session_id": session_id or "unknown", "parent_run_id": parent_run_id } # Log the run using updated API self.client.create_run( name="RAG Generation", run_type="llm", inputs={ "query": query, "context": context, "system_prompt": system_prompt, "user_prompt": user_prompt }, outputs={"response": response}, project_name=self.project_name, tags=["generation"], metadata=metadata ) logger.info(f"Logged generation run to LangSmith") except Exception as e: exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] logger.error(f"Error logging generation to LangSmith in {fname}, line {exc_tb.tb_lineno}: {str(e)}") return None # Singleton instance for use throughout the app langsmith_tracer = LangSmithTracer()