Spaces:

joelg
/

discover_rag

Sleeping

joelg commited on Oct 8

Commit

19a6387

1 Parent(s): 3036f93

Better models choice

Files changed (2) hide show

app.py CHANGED Viewed

@@ -169,11 +169,11 @@ def create_interface():
                 llm_model = gr.Dropdown(
                     choices=[
-                        "HuggingFaceH4/zephyr-7b-beta",
-                        "mistralai/Mistral-7B-Instruct-v0.2",
-                        "meta-llama/Llama-2-7b-chat-hf",
                     ],
-                    value="HuggingFaceH4/zephyr-7b-beta",
                     label=t('llm_model')
                 )

                 llm_model = gr.Dropdown(
                     choices=[
+                        "meta-llama/Llama-3.2-1B-Instruct",
+                        "Qwen/Qwen3-1.7B",
+                        "ibm-granite/granite-4.0-micro",
                     ],
+                    value="meta-llama/Llama-3.2-1B-Instruct",
                     label=t('llm_model')
                 )

rag_system.py CHANGED Viewed

@@ -177,14 +177,13 @@ class RAGSystem:
     ) -> Tuple[str, str]:
         """Generate answer using LLM"""
         if self.llm_client is None:
-            self.set_llm_model("HuggingFaceH4/zephyr-7b-beta")
         # Build context from retrieved chunks
         context = "\n\n".join([chunk for chunk, _ in retrieved_chunks])
         # Create prompt
-        prompt = f"""You are a helpful assistant. Use the following context to answer the question.
-If you cannot answer based on the context, say so.
 Context:
 {context}
@@ -199,9 +198,16 @@ Answer:"""
                 prompt,
                 max_new_tokens=max_tokens,
                 temperature=temperature,
-                return_full_text=False
             )
-            return response, prompt
         except Exception as e:
             return f"Error generating response: {str(e)}", prompt

     ) -> Tuple[str, str]:
         """Generate answer using LLM"""
         if self.llm_client is None:
+            self.set_llm_model("meta-llama/Llama-3.2-1B-Instruct")
         # Build context from retrieved chunks
         context = "\n\n".join([chunk for chunk, _ in retrieved_chunks])
         # Create prompt
+        prompt = f"""Use the following context to answer the question. If you cannot answer based on the context, say so.
 Context:
 {context}
                 prompt,
                 max_new_tokens=max_tokens,
                 temperature=temperature,
+                do_sample=temperature > 0,
             )
+            # Clean up response
+            if isinstance(response, str):
+                answer = response.strip()
+            else:
+                answer = str(response).strip()
+            return answer, prompt
         except Exception as e:
             return f"Error generating response: {str(e)}", prompt