File size: 2,304 Bytes
07a91a1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
"""End-to-end orchestration for generation and validation."""

from __future__ import annotations

import time

from src.config import settings
from src.generator import generate_response
from src.hallucination import hallucination_check
from src.model_loader import load_model_bundle
from src.prompts import SYSTEM_PROMPT, build_user_prompt
from src.rag import CodeRAG
from src.relevancy import RelevancyScorer


class CodingLLMPipeline:
    """Coordinates model, RAG, explainability, and quality checks."""

    def __init__(self):
        self.model_bundle = None
        self.relevancy = RelevancyScorer()
        self.rag = CodeRAG() if settings.use_rag else None

    def _ensure_model_loaded(self):
        if self.model_bundle is None:
            self.model_bundle = load_model_bundle()

    def run(self, instruction: str, user_input: str) -> dict:
        started = time.perf_counter()
        self._ensure_model_loaded()

        query_text = f"{instruction}\n{user_input}".strip()
        retrieved_context = self.rag.retrieve(query_text) if self.rag else ""

        prompt = f"{SYSTEM_PROMPT}\n\n{build_user_prompt(instruction, user_input, retrieved_context)}"
        generation = generate_response(self.model_bundle, prompt)
        hallucination_result = hallucination_check(generation.code)
        relevancy_score = self.relevancy.score(query_text, generation.code)
        explanation = generation.explanation
        if hallucination_result.hallucination:
            explanation = f"{generation.explanation}\n\nHallucination check reason: {hallucination_result.reason}"

        latency_ms = int((time.perf_counter() - started) * 1000)
        return {
            "code": generation.code,
            "explanation": explanation,
            "confidence": round(generation.confidence, 4),
            "important_tokens": generation.important_tokens,
            "relevancy_score": round(relevancy_score, 4),
            "hallucination": hallucination_result.hallucination,
            "latency_ms": latency_ms,
        }

    @property
    def active_model_name(self) -> str:
        """Current model name, loading lazily if needed."""
        self._ensure_model_loaded()
        return self.model_bundle.active_model_name