nur-brain / app /llm_service.py
isam0's picture
Update app/llm_service.py
0396090 verified
import logging
from typing import Optional
from llama_cpp import Llama
from app.config import get_settings
logger = logging.getLogger(__name__)
_llm_model: Optional[Llama] = None
def get_llm() -> Llama:
global _llm_model
if _llm_model is None:
settings = get_settings()
if not settings.model_path:
raise ValueError("Model path is empty. Download failed?")
logger.info(f"⚡ Loading Llama-3 from: {settings.model_path}")
# High-Performance Configuration
_llm_model = Llama(
model_path=settings.model_path,
n_ctx=settings.model_n_ctx,
n_threads=settings.model_n_threads,
n_parallel=settings.model_n_parallel, # CRITICAL for 1000 users
n_batch=512, # Processes tokens in chunks (Faster)
verbose=False
)
logger.info("✅ Brain Loaded with Parallel Powers")
return _llm_model
ISLAMIC_SYSTEM_PROMPT = """You are Nur, a knowledgeable and compassionate Islamic assistant.
Guidelines:
1. Base responses on Quran and Sahih Hadith.
2. Be respectful and gentle.
3. Cite sources clearly.
4. Begin with "Bismillah"."""
def generate_islamic_reflection(prompt: str, max_tokens: int = 512, temperature: float = 0.7):
llm = get_llm()
full_prompt = f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
{ISLAMIC_SYSTEM_PROMPT}<|eot_id|><|start_header_id|>user<|end_header_id|>
{prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
"""
response = llm(
full_prompt,
max_tokens=max_tokens,
temperature=temperature,
stop=["<|eot_id|>", "<|end_of_text|>"],
echo=False
)
return {
"reflection": response["choices"][0]["text"].strip(),
"usage": response["usage"]
}