Spaces:
Sleeping
Sleeping
File size: 2,418 Bytes
a1544bb 402298d a1544bb 402298d a1544bb 402298d a1544bb 402298d a1544bb 402298d a1544bb 402298d a1544bb 402298d a1544bb 402298d a1544bb 402298d a1544bb 402298d a1544bb 402298d a1544bb 402298d a1544bb 402298d a1544bb 402298d a1544bb 402298d a1544bb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 |
"""LLM generation service using Hugging Face Inference Client SDK"""
import os
from typing import Optional
from huggingface_hub import InferenceClient
from app.config import settings
from app.utils.logger import setup_logger
logger = setup_logger(__name__)
class GeneratorService:
"""Handles text generation using Hugging Face InferenceClient"""
def __init__(self):
# Create a single reusable inference client
self.client = InferenceClient(api_key=settings.HF_TOKEN)
# Use model from settings or fallback
self.model = getattr(settings, "HF_MODEL", "meta-llama/Llama-3.1-8B-Instruct")
def generate(
self,
prompt: str,
max_tokens: int = 512,
temperature: float = 0.7,
) -> str:
"""Generate text using HF chat-completion API"""
try:
logger.info(f"Calling HF InferenceClient (model={self.model})...")
completion = self.client.chat.completions.create(
model=self.model,
messages=[{"role": "user", "content": prompt}],
max_tokens=max_tokens,
temperature=temperature,
)
generated_text = completion.choices[0].message.content
logger.info("Generation successful")
return generated_text.strip()
except Exception as e:
logger.error(f"HF Generation failed: {str(e)}")
return self._fallback_response(prompt)
def _fallback_response(self, prompt: str) -> str:
"""Fallback response when LLM API fails"""
return (
"I apologize, but I'm unable to generate a response at the moment. "
"Please try again later."
)
def generate_rag_response(self, query: str, context: str) -> str:
"""Generate response using RAG-style prompt formatting"""
prompt = self._build_rag_prompt(query, context)
return self.generate(prompt)
def _build_rag_prompt(self, query: str, context: str) -> str:
"""Build WorkWise-style RAG prompt"""
return f"""
You are WorkWise, an AI assistant specialized in analyzing Jira project data.
Answer the user's question based only on the context.
Context:
{context}
User Question: {query}
Provide a clear, concise answer.
If the context doesn't contain enough information, say so.
""".strip()
# Global instance
generator = GeneratorService()
|