Ashanasri commited on
Commit
35da6df
·
verified ·
1 Parent(s): 1a95c80

Upload app/rag/utils.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app/rag/utils.py +10 -11
app/rag/utils.py CHANGED
@@ -38,7 +38,7 @@ def get_llm():
38
  logger.info(f"[LLM] Loading {MODEL_PATH} ...")
39
  _llm = Llama(
40
  model_path=MODEL_PATH,
41
- n_ctx=2048, # ✅ FIX: was 1024 — too small, prompt was being cut
42
  n_threads=os.cpu_count() or 4,
43
  n_gpu_layers=0,
44
  verbose=False,
@@ -72,7 +72,7 @@ SYSTEM_PROMPT = (
72
  )
73
 
74
 
75
- def build_context(hits: List[Dict[str, Any]], max_chars: int = 2000) -> str:
76
  parts = []
77
  total = 0
78
  for i, hit in enumerate(hits, 1):
@@ -105,8 +105,8 @@ def generate_answer(
105
  question: str,
106
  hits: List[Dict[str, Any]],
107
  max_tokens: int = 400,
108
- temperature: float = 0.2,
109
- top_p: float = 0.9,
110
  presence_penalty: float = 0.0,
111
  frequency_penalty: float = 0.0,
112
  repeat_penalty: float = 1.1,
@@ -147,11 +147,11 @@ def generate_answer(
147
  def answer_query(
148
  searcher,
149
  query: str,
150
- top_k: int = 3,
151
  rerank: bool = True,
152
  max_tokens: int = 400,
153
- temperature: float = 0.2,
154
- top_p: float = 0.9,
155
  presence_penalty: float = 0.0,
156
  frequency_penalty: float = 0.0,
157
  ) -> Dict[str, Any]:
@@ -188,11 +188,11 @@ def answer_query(
188
  async def answer_query_async(
189
  searcher,
190
  query: str,
191
- top_k: int = 3,
192
  rerank: bool = True,
193
  max_tokens: int = 400,
194
- temperature: float = 0.2,
195
- top_p: float = 0.9,
196
  presence_penalty: float = 0.0,
197
  frequency_penalty: float = 0.0,
198
  ) -> Dict[str, Any]:
@@ -209,7 +209,6 @@ async def answer_query_async(
209
  print(hit["text"])
210
  print("-" * 80)
211
 
212
- # ✅ FIX — lambda inapeleka ALL parameters vizuri
213
  # Kabla top_p na repeat_penalty hazikupelekwa — ndio sababu jibu lilibadilika
214
  answer = await loop.run_in_executor(
215
  None,
 
38
  logger.info(f"[LLM] Loading {MODEL_PATH} ...")
39
  _llm = Llama(
40
  model_path=MODEL_PATH,
41
+ n_ctx=4096,
42
  n_threads=os.cpu_count() or 4,
43
  n_gpu_layers=0,
44
  verbose=False,
 
72
  )
73
 
74
 
75
+ def build_context(hits: List[Dict[str, Any]], max_chars: int = 3000) -> str:
76
  parts = []
77
  total = 0
78
  for i, hit in enumerate(hits, 1):
 
105
  question: str,
106
  hits: List[Dict[str, Any]],
107
  max_tokens: int = 400,
108
+ temperature: float = 0.1,
109
+ top_p: float = 0.95,
110
  presence_penalty: float = 0.0,
111
  frequency_penalty: float = 0.0,
112
  repeat_penalty: float = 1.1,
 
147
  def answer_query(
148
  searcher,
149
  query: str,
150
+ top_k: int = 4,
151
  rerank: bool = True,
152
  max_tokens: int = 400,
153
+ temperature: float = 0.1,
154
+ top_p: float = 0.95,
155
  presence_penalty: float = 0.0,
156
  frequency_penalty: float = 0.0,
157
  ) -> Dict[str, Any]:
 
188
  async def answer_query_async(
189
  searcher,
190
  query: str,
191
+ top_k: int = 4,
192
  rerank: bool = True,
193
  max_tokens: int = 400,
194
+ temperature: float = 0.1,
195
+ top_p: float = 0.95,
196
  presence_penalty: float = 0.0,
197
  frequency_penalty: float = 0.0,
198
  ) -> Dict[str, Any]:
 
209
  print(hit["text"])
210
  print("-" * 80)
211
 
 
212
  # Kabla top_p na repeat_penalty hazikupelekwa — ndio sababu jibu lilibadilika
213
  answer = await loop.run_in_executor(
214
  None,