"""LLM generation service using Hugging Face Inference Client SDK""" import os from typing import Optional from huggingface_hub import InferenceClient from app.config import settings from app.utils.logger import setup_logger logger = setup_logger(__name__) class GeneratorService: """Handles text generation using Hugging Face InferenceClient""" def __init__(self): # Create a single reusable inference client self.client = InferenceClient(api_key=settings.HF_TOKEN) # Use model from settings or fallback self.model = getattr(settings, "HF_MODEL", "meta-llama/Llama-3.1-8B-Instruct") def generate( self, prompt: str, max_tokens: int = 512, temperature: float = 0.7, ) -> str: """Generate text using HF chat-completion API""" try: logger.info(f"Calling HF InferenceClient (model={self.model})...") completion = self.client.chat.completions.create( model=self.model, messages=[{"role": "user", "content": prompt}], max_tokens=max_tokens, temperature=temperature, ) generated_text = completion.choices[0].message.content logger.info("Generation successful") return generated_text.strip() except Exception as e: logger.error(f"HF Generation failed: {str(e)}") return self._fallback_response(prompt) def _fallback_response(self, prompt: str) -> str: """Fallback response when LLM API fails""" return ( "I apologize, but I'm unable to generate a response at the moment. " "Please try again later." ) def generate_rag_response(self, query: str, context: str) -> str: """Generate response using RAG-style prompt formatting""" prompt = self._build_rag_prompt(query, context) return self.generate(prompt) def _build_rag_prompt(self, query: str, context: str) -> str: """Build WorkWise-style RAG prompt""" return f""" You are WorkWise, an AI assistant specialized in analyzing Jira project data. Answer the user's question based only on the context. Context: {context} User Question: {query} Provide a clear, concise answer. If the context doesn't contain enough information, say so. """.strip() # Global instance generator = GeneratorService()