Spaces:

isam0
/

nur-brain

Paused

App Files Files Community

isam0 commited on Jan 4

Commit

0396090

verified ·

1 Parent(s): 220ea84

Update app/llm_service.py

Browse files

Files changed (1) hide show

app/llm_service.py +18 -78

app/llm_service.py CHANGED Viewed

@@ -1,120 +1,60 @@
-"""
-LLM Service for Islamic Reflections using Llama-3
-"""
 import logging
 from typing import Optional
 from llama_cpp import Llama
 from app.config import get_settings
 logger = logging.getLogger(__name__)
-# Global model instance
 _llm_model: Optional[Llama] = None
 def get_llm() -> Llama:
-    """Get or initialize the LLM model."""
     global _llm_model
     if _llm_model is None:
         settings = get_settings()
-        logger.info(f"Loading model from: {settings.model_path}")
         _llm_model = Llama(
             model_path=settings.model_path,
             n_ctx=settings.model_n_ctx,
             n_threads=settings.model_n_threads,
-            n_gpu_layers=settings.model_n_gpu_layers,
-            verbose=settings.debug
         )
-        logger.info("Model loaded successfully")
     return _llm_model
-# Islamic reflection system prompt
 ISLAMIC_SYSTEM_PROMPT = """You are Nur, a knowledgeable and compassionate Islamic assistant.
-Your purpose is to provide thoughtful Islamic reflections, guidance based on the Quran and authentic
-Sunnah, and spiritual wisdom rooted in the teachings of Islam.
 Guidelines:
-1. Always base your responses on authentic Islamic sources (Quran and Sahih Hadith)
-2. Be respectful, gentle, and encouraging in your tone
-3. When citing Quran verses, include the Surah name and verse number
-4. When citing Hadith, mention the source (Bukhari, Muslim, etc.) when known
-5. Acknowledge different scholarly opinions when relevant
-6. Encourage seeking knowledge from qualified scholars for complex matters
-7. Focus on bringing hearts closer to Allah (SWT) and promoting good character
-Begin each response with "Bismillah" when appropriate."""
-def generate_islamic_reflection(
-    prompt: str,
-    max_tokens: Optional[int] = None,
-    temperature: Optional[float] = None,
-    top_p: Optional[float] = None
-) -> dict:
-    """
-    Generate an Islamic reflection based on the user's prompt.
-    Args:
-        prompt: The user's question or topic
-        max_tokens: Maximum tokens to generate
-        temperature: Sampling temperature
-        top_p: Top-p sampling parameter
-    Returns:
-        Dictionary containing the reflection and metadata
-    """
-    settings = get_settings()
     llm = get_llm()
-    # Use defaults if not specified
-    max_tokens = max_tokens or settings.default_max_tokens
-    temperature = temperature or settings.default_temperature
-    top_p = top_p or settings.default_top_p
-    # Format the prompt with the system instruction
     full_prompt = f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
 {ISLAMIC_SYSTEM_PROMPT}<|eot_id|><|start_header_id|>user<|end_header_id|>
 {prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
 """
-    logger.info(f"Generating reflection for prompt: {prompt[:100]}...")
-    # Generate response
     response = llm(
         full_prompt,
         max_tokens=max_tokens,
         temperature=temperature,
-        top_p=top_p,
         stop=["<|eot_id|>", "<|end_of_text|>"],
         echo=False
     )
-    # Extract the generated text
-    generated_text = response["choices"][0]["text"].strip()
     return {
-        "reflection": generated_text,
-        "usage": {
-            "prompt_tokens": response["usage"]["prompt_tokens"],
-            "completion_tokens": response["usage"]["completion_tokens"],
-            "total_tokens": response["usage"]["total_tokens"]
-        },
-        "model": "llama-3-8b-instruct"
-    }
-def health_check() -> bool:
-    """Check if the LLM model is loaded and functional."""
-    try:
-        llm = get_llm()
-        return llm is not None
-    except Exception as e:
-        logger.error(f"Health check failed: {e}")
-        return False

 import logging
 from typing import Optional
 from llama_cpp import Llama
 from app.config import get_settings
 logger = logging.getLogger(__name__)
 _llm_model: Optional[Llama] = None
 def get_llm() -> Llama:
     global _llm_model
     if _llm_model is None:
         settings = get_settings()
+        if not settings.model_path:
+            raise ValueError("Model path is empty. Download failed?")
+        logger.info(f"⚡ Loading Llama-3 from: {settings.model_path}")
+        # High-Performance Configuration
         _llm_model = Llama(
             model_path=settings.model_path,
             n_ctx=settings.model_n_ctx,
             n_threads=settings.model_n_threads,
+            n_parallel=settings.model_n_parallel, # CRITICAL for 1000 users
+            n_batch=512, # Processes tokens in chunks (Faster)
+            verbose=False
         )
+        logger.info("✅ Brain Loaded with Parallel Powers")
     return _llm_model
 ISLAMIC_SYSTEM_PROMPT = """You are Nur, a knowledgeable and compassionate Islamic assistant.
 Guidelines:
+1. Base responses on Quran and Sahih Hadith.
+2. Be respectful and gentle.
+3. Cite sources clearly.
+4. Begin with "Bismillah"."""
+def generate_islamic_reflection(prompt: str, max_tokens: int = 512, temperature: float = 0.7):
     llm = get_llm()
     full_prompt = f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
 {ISLAMIC_SYSTEM_PROMPT}<|eot_id|><|start_header_id|>user<|end_header_id|>
 {prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
 """
     response = llm(
         full_prompt,
         max_tokens=max_tokens,
         temperature=temperature,
         stop=["<|eot_id|>", "<|end_of_text|>"],
         echo=False
     )
     return {
+        "reflection": response["choices"][0]["text"].strip(),
+        "usage": response["usage"]
+    }