joelg commited on
Commit
19a6387
·
1 Parent(s): 3036f93

Better models choice

Browse files
Files changed (2) hide show
  1. app.py +4 -4
  2. rag_system.py +11 -5
app.py CHANGED
@@ -169,11 +169,11 @@ def create_interface():
169
 
170
  llm_model = gr.Dropdown(
171
  choices=[
172
- "HuggingFaceH4/zephyr-7b-beta",
173
- "mistralai/Mistral-7B-Instruct-v0.2",
174
- "meta-llama/Llama-2-7b-chat-hf",
175
  ],
176
- value="HuggingFaceH4/zephyr-7b-beta",
177
  label=t('llm_model')
178
  )
179
 
 
169
 
170
  llm_model = gr.Dropdown(
171
  choices=[
172
+ "meta-llama/Llama-3.2-1B-Instruct",
173
+ "Qwen/Qwen3-1.7B",
174
+ "ibm-granite/granite-4.0-micro",
175
  ],
176
+ value="meta-llama/Llama-3.2-1B-Instruct",
177
  label=t('llm_model')
178
  )
179
 
rag_system.py CHANGED
@@ -177,14 +177,13 @@ class RAGSystem:
177
  ) -> Tuple[str, str]:
178
  """Generate answer using LLM"""
179
  if self.llm_client is None:
180
- self.set_llm_model("HuggingFaceH4/zephyr-7b-beta")
181
 
182
  # Build context from retrieved chunks
183
  context = "\n\n".join([chunk for chunk, _ in retrieved_chunks])
184
 
185
  # Create prompt
186
- prompt = f"""You are a helpful assistant. Use the following context to answer the question.
187
- If you cannot answer based on the context, say so.
188
 
189
  Context:
190
  {context}
@@ -199,9 +198,16 @@ Answer:"""
199
  prompt,
200
  max_new_tokens=max_tokens,
201
  temperature=temperature,
202
- return_full_text=False
203
  )
204
- return response, prompt
 
 
 
 
 
 
 
205
 
206
  except Exception as e:
207
  return f"Error generating response: {str(e)}", prompt
 
177
  ) -> Tuple[str, str]:
178
  """Generate answer using LLM"""
179
  if self.llm_client is None:
180
+ self.set_llm_model("meta-llama/Llama-3.2-1B-Instruct")
181
 
182
  # Build context from retrieved chunks
183
  context = "\n\n".join([chunk for chunk, _ in retrieved_chunks])
184
 
185
  # Create prompt
186
+ prompt = f"""Use the following context to answer the question. If you cannot answer based on the context, say so.
 
187
 
188
  Context:
189
  {context}
 
198
  prompt,
199
  max_new_tokens=max_tokens,
200
  temperature=temperature,
201
+ do_sample=temperature > 0,
202
  )
203
+
204
+ # Clean up response
205
+ if isinstance(response, str):
206
+ answer = response.strip()
207
+ else:
208
+ answer = str(response).strip()
209
+
210
+ return answer, prompt
211
 
212
  except Exception as e:
213
  return f"Error generating response: {str(e)}", prompt