File size: 2,418 Bytes
a1544bb
 
 
 
402298d
 
 
 
 
a1544bb
402298d
a1544bb
 
402298d
a1544bb
 
 
 
 
 
402298d
 
 
 
a1544bb
402298d
a1544bb
 
402298d
a1544bb
 
 
 
 
 
 
402298d
a1544bb
 
402298d
a1544bb
402298d
a1544bb
 
 
402298d
a1544bb
402298d
a1544bb
 
 
 
 
 
 
 
402298d
 
a1544bb
402298d
a1544bb
 
 
 
402298d
 
 
 
 
 
a1544bb
 
 
402298d
 
 
a1544bb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
"""LLM generation service using Hugging Face Inference Client SDK"""
import os
from typing import Optional
from huggingface_hub import InferenceClient
from app.config import settings
from app.utils.logger import setup_logger

logger = setup_logger(__name__)


class GeneratorService:
    """Handles text generation using Hugging Face InferenceClient"""

    def __init__(self):
        # Create a single reusable inference client
        self.client = InferenceClient(api_key=settings.HF_TOKEN)

        # Use model from settings or fallback
        self.model = getattr(settings, "HF_MODEL", "meta-llama/Llama-3.1-8B-Instruct")

    def generate(
        self,
        prompt: str,
        max_tokens: int = 512,
        temperature: float = 0.7,
    ) -> str:
        """Generate text using HF chat-completion API"""

        try:
            logger.info(f"Calling HF InferenceClient (model={self.model})...")

            completion = self.client.chat.completions.create(
                model=self.model,
                messages=[{"role": "user", "content": prompt}],
                max_tokens=max_tokens,
                temperature=temperature,
            )

            generated_text = completion.choices[0].message.content
            logger.info("Generation successful")

            return generated_text.strip()

        except Exception as e:
            logger.error(f"HF Generation failed: {str(e)}")
            return self._fallback_response(prompt)

    def _fallback_response(self, prompt: str) -> str:
        """Fallback response when LLM API fails"""
        return (
            "I apologize, but I'm unable to generate a response at the moment. "
            "Please try again later."
        )

    def generate_rag_response(self, query: str, context: str) -> str:
        """Generate response using RAG-style prompt formatting"""
        prompt = self._build_rag_prompt(query, context)
        return self.generate(prompt)

    def _build_rag_prompt(self, query: str, context: str) -> str:
        """Build WorkWise-style RAG prompt"""
        return f"""
You are WorkWise, an AI assistant specialized in analyzing Jira project data.
Answer the user's question based only on the context.

Context:
{context}

User Question: {query}

Provide a clear, concise answer. 
If the context doesn't contain enough information, say so.
""".strip()


# Global instance
generator = GeneratorService()