Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -64,7 +64,8 @@ def initialize_qa_chain(temperature, max_tokens, top_k, vector_db):
|
|
| 64 |
|
| 65 |
llm = HuggingFaceEndpoint(
|
| 66 |
# repo_id="deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
|
| 67 |
-
repo_id="Qwen/Qwen2.5-1.5B-Instruct",
|
|
|
|
| 68 |
huggingfacehub_api_token=HF_TOKEN, # Only needed if the model endpoint requires authentication
|
| 69 |
temperature=temperature,
|
| 70 |
max_new_tokens=max_tokens,
|
|
@@ -191,7 +192,8 @@ if not HF_TOKEN:
|
|
| 191 |
# Global InferenceClient for plain chat (fallback)
|
| 192 |
client = InferenceClient(
|
| 193 |
# "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
|
| 194 |
-
"Qwen/Qwen2.5-1.5B-Instruct",
|
|
|
|
| 195 |
token=HF_TOKEN)
|
| 196 |
|
| 197 |
# --- Auto-load vector database and initialize QA chain at startup ---
|
|
|
|
| 64 |
|
| 65 |
llm = HuggingFaceEndpoint(
|
| 66 |
# repo_id="deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
|
| 67 |
+
# repo_id="Qwen/Qwen2.5-1.5B-Instruct",
|
| 68 |
+
repo_id="google/gemma-2b-it",
|
| 69 |
huggingfacehub_api_token=HF_TOKEN, # Only needed if the model endpoint requires authentication
|
| 70 |
temperature=temperature,
|
| 71 |
max_new_tokens=max_tokens,
|
|
|
|
| 192 |
# Global InferenceClient for plain chat (fallback)
|
| 193 |
client = InferenceClient(
|
| 194 |
# "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
|
| 195 |
+
# "Qwen/Qwen2.5-1.5B-Instruct",
|
| 196 |
+
"google/gemma-2b-it",
|
| 197 |
token=HF_TOKEN)
|
| 198 |
|
| 199 |
# --- Auto-load vector database and initialize QA chain at startup ---
|