isam0 commited on
Commit
0396090
·
verified ·
1 Parent(s): 220ea84

Update app/llm_service.py

Browse files
Files changed (1) hide show
  1. app/llm_service.py +18 -78
app/llm_service.py CHANGED
@@ -1,120 +1,60 @@
1
- """
2
- LLM Service for Islamic Reflections using Llama-3
3
- """
4
  import logging
5
  from typing import Optional
6
  from llama_cpp import Llama
7
  from app.config import get_settings
8
 
9
  logger = logging.getLogger(__name__)
10
-
11
- # Global model instance
12
  _llm_model: Optional[Llama] = None
13
 
14
-
15
  def get_llm() -> Llama:
16
- """Get or initialize the LLM model."""
17
  global _llm_model
18
-
19
  if _llm_model is None:
20
  settings = get_settings()
21
- logger.info(f"Loading model from: {settings.model_path}")
22
 
 
 
 
 
 
 
23
  _llm_model = Llama(
24
  model_path=settings.model_path,
25
  n_ctx=settings.model_n_ctx,
26
  n_threads=settings.model_n_threads,
27
- n_gpu_layers=settings.model_n_gpu_layers,
28
- verbose=settings.debug
 
29
  )
30
- logger.info("Model loaded successfully")
31
 
32
  return _llm_model
33
 
34
-
35
- # Islamic reflection system prompt
36
  ISLAMIC_SYSTEM_PROMPT = """You are Nur, a knowledgeable and compassionate Islamic assistant.
37
- Your purpose is to provide thoughtful Islamic reflections, guidance based on the Quran and authentic
38
- Sunnah, and spiritual wisdom rooted in the teachings of Islam.
39
-
40
  Guidelines:
41
- 1. Always base your responses on authentic Islamic sources (Quran and Sahih Hadith)
42
- 2. Be respectful, gentle, and encouraging in your tone
43
- 3. When citing Quran verses, include the Surah name and verse number
44
- 4. When citing Hadith, mention the source (Bukhari, Muslim, etc.) when known
45
- 5. Acknowledge different scholarly opinions when relevant
46
- 6. Encourage seeking knowledge from qualified scholars for complex matters
47
- 7. Focus on bringing hearts closer to Allah (SWT) and promoting good character
48
-
49
- Begin each response with "Bismillah" when appropriate."""
50
 
51
-
52
- def generate_islamic_reflection(
53
- prompt: str,
54
- max_tokens: Optional[int] = None,
55
- temperature: Optional[float] = None,
56
- top_p: Optional[float] = None
57
- ) -> dict:
58
- """
59
- Generate an Islamic reflection based on the user's prompt.
60
-
61
- Args:
62
- prompt: The user's question or topic
63
- max_tokens: Maximum tokens to generate
64
- temperature: Sampling temperature
65
- top_p: Top-p sampling parameter
66
-
67
- Returns:
68
- Dictionary containing the reflection and metadata
69
- """
70
- settings = get_settings()
71
  llm = get_llm()
72
 
73
- # Use defaults if not specified
74
- max_tokens = max_tokens or settings.default_max_tokens
75
- temperature = temperature or settings.default_temperature
76
- top_p = top_p or settings.default_top_p
77
-
78
- # Format the prompt with the system instruction
79
  full_prompt = f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
80
 
81
  {ISLAMIC_SYSTEM_PROMPT}<|eot_id|><|start_header_id|>user<|end_header_id|>
82
 
83
  {prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
84
-
85
  """
86
 
87
- logger.info(f"Generating reflection for prompt: {prompt[:100]}...")
88
-
89
- # Generate response
90
  response = llm(
91
  full_prompt,
92
  max_tokens=max_tokens,
93
  temperature=temperature,
94
- top_p=top_p,
95
  stop=["<|eot_id|>", "<|end_of_text|>"],
96
  echo=False
97
  )
98
 
99
- # Extract the generated text
100
- generated_text = response["choices"][0]["text"].strip()
101
-
102
  return {
103
- "reflection": generated_text,
104
- "usage": {
105
- "prompt_tokens": response["usage"]["prompt_tokens"],
106
- "completion_tokens": response["usage"]["completion_tokens"],
107
- "total_tokens": response["usage"]["total_tokens"]
108
- },
109
- "model": "llama-3-8b-instruct"
110
- }
111
-
112
-
113
- def health_check() -> bool:
114
- """Check if the LLM model is loaded and functional."""
115
- try:
116
- llm = get_llm()
117
- return llm is not None
118
- except Exception as e:
119
- logger.error(f"Health check failed: {e}")
120
- return False
 
 
 
 
1
  import logging
2
  from typing import Optional
3
  from llama_cpp import Llama
4
  from app.config import get_settings
5
 
6
  logger = logging.getLogger(__name__)
 
 
7
  _llm_model: Optional[Llama] = None
8
 
 
9
  def get_llm() -> Llama:
 
10
  global _llm_model
 
11
  if _llm_model is None:
12
  settings = get_settings()
 
13
 
14
+ if not settings.model_path:
15
+ raise ValueError("Model path is empty. Download failed?")
16
+
17
+ logger.info(f"⚡ Loading Llama-3 from: {settings.model_path}")
18
+
19
+ # High-Performance Configuration
20
  _llm_model = Llama(
21
  model_path=settings.model_path,
22
  n_ctx=settings.model_n_ctx,
23
  n_threads=settings.model_n_threads,
24
+ n_parallel=settings.model_n_parallel, # CRITICAL for 1000 users
25
+ n_batch=512, # Processes tokens in chunks (Faster)
26
+ verbose=False
27
  )
28
+ logger.info(" Brain Loaded with Parallel Powers")
29
 
30
  return _llm_model
31
 
 
 
32
  ISLAMIC_SYSTEM_PROMPT = """You are Nur, a knowledgeable and compassionate Islamic assistant.
 
 
 
33
  Guidelines:
34
+ 1. Base responses on Quran and Sahih Hadith.
35
+ 2. Be respectful and gentle.
36
+ 3. Cite sources clearly.
37
+ 4. Begin with "Bismillah"."""
 
 
 
 
 
38
 
39
+ def generate_islamic_reflection(prompt: str, max_tokens: int = 512, temperature: float = 0.7):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  llm = get_llm()
41
 
 
 
 
 
 
 
42
  full_prompt = f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
43
 
44
  {ISLAMIC_SYSTEM_PROMPT}<|eot_id|><|start_header_id|>user<|end_header_id|>
45
 
46
  {prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
 
47
  """
48
 
 
 
 
49
  response = llm(
50
  full_prompt,
51
  max_tokens=max_tokens,
52
  temperature=temperature,
 
53
  stop=["<|eot_id|>", "<|end_of_text|>"],
54
  echo=False
55
  )
56
 
 
 
 
57
  return {
58
+ "reflection": response["choices"][0]["text"].strip(),
59
+ "usage": response["usage"]
60
+ }