Update app/llm_service.py
Browse files- app/llm_service.py +18 -78
app/llm_service.py
CHANGED
|
@@ -1,120 +1,60 @@
|
|
| 1 |
-
"""
|
| 2 |
-
LLM Service for Islamic Reflections using Llama-3
|
| 3 |
-
"""
|
| 4 |
import logging
|
| 5 |
from typing import Optional
|
| 6 |
from llama_cpp import Llama
|
| 7 |
from app.config import get_settings
|
| 8 |
|
| 9 |
logger = logging.getLogger(__name__)
|
| 10 |
-
|
| 11 |
-
# Global model instance
|
| 12 |
_llm_model: Optional[Llama] = None
|
| 13 |
|
| 14 |
-
|
| 15 |
def get_llm() -> Llama:
|
| 16 |
-
"""Get or initialize the LLM model."""
|
| 17 |
global _llm_model
|
| 18 |
-
|
| 19 |
if _llm_model is None:
|
| 20 |
settings = get_settings()
|
| 21 |
-
logger.info(f"Loading model from: {settings.model_path}")
|
| 22 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
_llm_model = Llama(
|
| 24 |
model_path=settings.model_path,
|
| 25 |
n_ctx=settings.model_n_ctx,
|
| 26 |
n_threads=settings.model_n_threads,
|
| 27 |
-
|
| 28 |
-
|
|
|
|
| 29 |
)
|
| 30 |
-
logger.info("
|
| 31 |
|
| 32 |
return _llm_model
|
| 33 |
|
| 34 |
-
|
| 35 |
-
# Islamic reflection system prompt
|
| 36 |
ISLAMIC_SYSTEM_PROMPT = """You are Nur, a knowledgeable and compassionate Islamic assistant.
|
| 37 |
-
Your purpose is to provide thoughtful Islamic reflections, guidance based on the Quran and authentic
|
| 38 |
-
Sunnah, and spiritual wisdom rooted in the teachings of Islam.
|
| 39 |
-
|
| 40 |
Guidelines:
|
| 41 |
-
1.
|
| 42 |
-
2. Be respectful
|
| 43 |
-
3.
|
| 44 |
-
4.
|
| 45 |
-
5. Acknowledge different scholarly opinions when relevant
|
| 46 |
-
6. Encourage seeking knowledge from qualified scholars for complex matters
|
| 47 |
-
7. Focus on bringing hearts closer to Allah (SWT) and promoting good character
|
| 48 |
-
|
| 49 |
-
Begin each response with "Bismillah" when appropriate."""
|
| 50 |
|
| 51 |
-
|
| 52 |
-
def generate_islamic_reflection(
|
| 53 |
-
prompt: str,
|
| 54 |
-
max_tokens: Optional[int] = None,
|
| 55 |
-
temperature: Optional[float] = None,
|
| 56 |
-
top_p: Optional[float] = None
|
| 57 |
-
) -> dict:
|
| 58 |
-
"""
|
| 59 |
-
Generate an Islamic reflection based on the user's prompt.
|
| 60 |
-
|
| 61 |
-
Args:
|
| 62 |
-
prompt: The user's question or topic
|
| 63 |
-
max_tokens: Maximum tokens to generate
|
| 64 |
-
temperature: Sampling temperature
|
| 65 |
-
top_p: Top-p sampling parameter
|
| 66 |
-
|
| 67 |
-
Returns:
|
| 68 |
-
Dictionary containing the reflection and metadata
|
| 69 |
-
"""
|
| 70 |
-
settings = get_settings()
|
| 71 |
llm = get_llm()
|
| 72 |
|
| 73 |
-
# Use defaults if not specified
|
| 74 |
-
max_tokens = max_tokens or settings.default_max_tokens
|
| 75 |
-
temperature = temperature or settings.default_temperature
|
| 76 |
-
top_p = top_p or settings.default_top_p
|
| 77 |
-
|
| 78 |
-
# Format the prompt with the system instruction
|
| 79 |
full_prompt = f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
|
| 80 |
|
| 81 |
{ISLAMIC_SYSTEM_PROMPT}<|eot_id|><|start_header_id|>user<|end_header_id|>
|
| 82 |
|
| 83 |
{prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
|
| 84 |
-
|
| 85 |
"""
|
| 86 |
|
| 87 |
-
logger.info(f"Generating reflection for prompt: {prompt[:100]}...")
|
| 88 |
-
|
| 89 |
-
# Generate response
|
| 90 |
response = llm(
|
| 91 |
full_prompt,
|
| 92 |
max_tokens=max_tokens,
|
| 93 |
temperature=temperature,
|
| 94 |
-
top_p=top_p,
|
| 95 |
stop=["<|eot_id|>", "<|end_of_text|>"],
|
| 96 |
echo=False
|
| 97 |
)
|
| 98 |
|
| 99 |
-
# Extract the generated text
|
| 100 |
-
generated_text = response["choices"][0]["text"].strip()
|
| 101 |
-
|
| 102 |
return {
|
| 103 |
-
"reflection":
|
| 104 |
-
"usage":
|
| 105 |
-
|
| 106 |
-
"completion_tokens": response["usage"]["completion_tokens"],
|
| 107 |
-
"total_tokens": response["usage"]["total_tokens"]
|
| 108 |
-
},
|
| 109 |
-
"model": "llama-3-8b-instruct"
|
| 110 |
-
}
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
def health_check() -> bool:
|
| 114 |
-
"""Check if the LLM model is loaded and functional."""
|
| 115 |
-
try:
|
| 116 |
-
llm = get_llm()
|
| 117 |
-
return llm is not None
|
| 118 |
-
except Exception as e:
|
| 119 |
-
logger.error(f"Health check failed: {e}")
|
| 120 |
-
return False
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import logging
|
| 2 |
from typing import Optional
|
| 3 |
from llama_cpp import Llama
|
| 4 |
from app.config import get_settings
|
| 5 |
|
| 6 |
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
| 7 |
_llm_model: Optional[Llama] = None
|
| 8 |
|
|
|
|
| 9 |
def get_llm() -> Llama:
|
|
|
|
| 10 |
global _llm_model
|
|
|
|
| 11 |
if _llm_model is None:
|
| 12 |
settings = get_settings()
|
|
|
|
| 13 |
|
| 14 |
+
if not settings.model_path:
|
| 15 |
+
raise ValueError("Model path is empty. Download failed?")
|
| 16 |
+
|
| 17 |
+
logger.info(f"⚡ Loading Llama-3 from: {settings.model_path}")
|
| 18 |
+
|
| 19 |
+
# High-Performance Configuration
|
| 20 |
_llm_model = Llama(
|
| 21 |
model_path=settings.model_path,
|
| 22 |
n_ctx=settings.model_n_ctx,
|
| 23 |
n_threads=settings.model_n_threads,
|
| 24 |
+
n_parallel=settings.model_n_parallel, # CRITICAL for 1000 users
|
| 25 |
+
n_batch=512, # Processes tokens in chunks (Faster)
|
| 26 |
+
verbose=False
|
| 27 |
)
|
| 28 |
+
logger.info("✅ Brain Loaded with Parallel Powers")
|
| 29 |
|
| 30 |
return _llm_model
|
| 31 |
|
|
|
|
|
|
|
| 32 |
ISLAMIC_SYSTEM_PROMPT = """You are Nur, a knowledgeable and compassionate Islamic assistant.
|
|
|
|
|
|
|
|
|
|
| 33 |
Guidelines:
|
| 34 |
+
1. Base responses on Quran and Sahih Hadith.
|
| 35 |
+
2. Be respectful and gentle.
|
| 36 |
+
3. Cite sources clearly.
|
| 37 |
+
4. Begin with "Bismillah"."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
|
| 39 |
+
def generate_islamic_reflection(prompt: str, max_tokens: int = 512, temperature: float = 0.7):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
llm = get_llm()
|
| 41 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
full_prompt = f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
|
| 43 |
|
| 44 |
{ISLAMIC_SYSTEM_PROMPT}<|eot_id|><|start_header_id|>user<|end_header_id|>
|
| 45 |
|
| 46 |
{prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
|
|
|
|
| 47 |
"""
|
| 48 |
|
|
|
|
|
|
|
|
|
|
| 49 |
response = llm(
|
| 50 |
full_prompt,
|
| 51 |
max_tokens=max_tokens,
|
| 52 |
temperature=temperature,
|
|
|
|
| 53 |
stop=["<|eot_id|>", "<|end_of_text|>"],
|
| 54 |
echo=False
|
| 55 |
)
|
| 56 |
|
|
|
|
|
|
|
|
|
|
| 57 |
return {
|
| 58 |
+
"reflection": response["choices"][0]["text"].strip(),
|
| 59 |
+
"usage": response["usage"]
|
| 60 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|