Update app.py
Browse files
app.py
CHANGED
|
@@ -151,7 +151,6 @@ def finetune_small_subset():
|
|
| 151 |
|
| 152 |
return "Finetuning complete. Model loaded for inference."
|
| 153 |
|
| 154 |
-
|
| 155 |
def ensure_pipeline():
|
| 156 |
"""
|
| 157 |
If we haven't finetuned yet (TEXT_PIPELINE is None),
|
|
@@ -178,7 +177,6 @@ def ensure_pipeline():
|
|
| 178 |
TEXT_PIPELINE = pipeline("text-generation", model=base_model, tokenizer=tokenizer)
|
| 179 |
return TEXT_PIPELINE
|
| 180 |
|
| 181 |
-
|
| 182 |
def ensure_comparison_pipeline():
|
| 183 |
"""
|
| 184 |
Load the DeepSeek model pipeline if not already loaded.
|
|
@@ -195,7 +193,6 @@ def ensure_comparison_pipeline():
|
|
| 195 |
COMPARISON_PIPELINE = pipeline("text-generation", model=model, tokenizer=tokenizer)
|
| 196 |
return COMPARISON_PIPELINE
|
| 197 |
|
| 198 |
-
|
| 199 |
@spaces.GPU(duration=120)
|
| 200 |
def predict(prompt, temperature, top_p, min_new_tokens, max_new_tokens):
|
| 201 |
"""
|
|
@@ -212,7 +209,6 @@ def predict(prompt, temperature, top_p, min_new_tokens, max_new_tokens):
|
|
| 212 |
)
|
| 213 |
return out[0]["generated_text"]
|
| 214 |
|
| 215 |
-
|
| 216 |
@spaces.GPU(duration=120)
|
| 217 |
def compare_models(prompt, temperature, top_p, min_new_tokens, max_new_tokens):
|
| 218 |
"""
|
|
@@ -239,7 +235,6 @@ def compare_models(prompt, temperature, top_p, min_new_tokens, max_new_tokens):
|
|
| 239 |
)
|
| 240 |
return local_out[0]["generated_text"], comp_out[0]["generated_text"]
|
| 241 |
|
| 242 |
-
|
| 243 |
###############################################################################
|
| 244 |
# Retrieval-Augmented Memory with FAISS
|
| 245 |
###############################################################################
|
|
@@ -300,7 +295,6 @@ class ConversationRetriever:
|
|
| 300 |
results.append((self.texts[idx], dist))
|
| 301 |
return results
|
| 302 |
|
| 303 |
-
|
| 304 |
###############################################################################
|
| 305 |
# Build a Chat that uses RAG
|
| 306 |
###############################################################################
|
|
@@ -325,7 +319,6 @@ def build_rag_prompt(user_query, retrieved_chunks):
|
|
| 325 |
)
|
| 326 |
return prompt
|
| 327 |
|
| 328 |
-
|
| 329 |
@spaces.GPU(duration=120)
|
| 330 |
def chat_rag(user_input, history, temperature, top_p, min_new_tokens, max_new_tokens):
|
| 331 |
"""
|
|
@@ -373,7 +366,6 @@ def chat_rag(user_input, history, temperature, top_p, min_new_tokens, max_new_to
|
|
| 373 |
history.append([user_input, assistant_reply])
|
| 374 |
return history, history
|
| 375 |
|
| 376 |
-
|
| 377 |
###############################################################################
|
| 378 |
# Gradio UI
|
| 379 |
###############################################################################
|
|
@@ -438,4 +430,4 @@ with gr.Blocks() as demo:
|
|
| 438 |
outputs=[chat_state, chatbot]
|
| 439 |
)
|
| 440 |
|
| 441 |
-
demo.launch()
|
|
|
|
| 151 |
|
| 152 |
return "Finetuning complete. Model loaded for inference."
|
| 153 |
|
|
|
|
| 154 |
def ensure_pipeline():
|
| 155 |
"""
|
| 156 |
If we haven't finetuned yet (TEXT_PIPELINE is None),
|
|
|
|
| 177 |
TEXT_PIPELINE = pipeline("text-generation", model=base_model, tokenizer=tokenizer)
|
| 178 |
return TEXT_PIPELINE
|
| 179 |
|
|
|
|
| 180 |
def ensure_comparison_pipeline():
|
| 181 |
"""
|
| 182 |
Load the DeepSeek model pipeline if not already loaded.
|
|
|
|
| 193 |
COMPARISON_PIPELINE = pipeline("text-generation", model=model, tokenizer=tokenizer)
|
| 194 |
return COMPARISON_PIPELINE
|
| 195 |
|
|
|
|
| 196 |
@spaces.GPU(duration=120)
|
| 197 |
def predict(prompt, temperature, top_p, min_new_tokens, max_new_tokens):
|
| 198 |
"""
|
|
|
|
| 209 |
)
|
| 210 |
return out[0]["generated_text"]
|
| 211 |
|
|
|
|
| 212 |
@spaces.GPU(duration=120)
|
| 213 |
def compare_models(prompt, temperature, top_p, min_new_tokens, max_new_tokens):
|
| 214 |
"""
|
|
|
|
| 235 |
)
|
| 236 |
return local_out[0]["generated_text"], comp_out[0]["generated_text"]
|
| 237 |
|
|
|
|
| 238 |
###############################################################################
|
| 239 |
# Retrieval-Augmented Memory with FAISS
|
| 240 |
###############################################################################
|
|
|
|
| 295 |
results.append((self.texts[idx], dist))
|
| 296 |
return results
|
| 297 |
|
|
|
|
| 298 |
###############################################################################
|
| 299 |
# Build a Chat that uses RAG
|
| 300 |
###############################################################################
|
|
|
|
| 319 |
)
|
| 320 |
return prompt
|
| 321 |
|
|
|
|
| 322 |
@spaces.GPU(duration=120)
|
| 323 |
def chat_rag(user_input, history, temperature, top_p, min_new_tokens, max_new_tokens):
|
| 324 |
"""
|
|
|
|
| 366 |
history.append([user_input, assistant_reply])
|
| 367 |
return history, history
|
| 368 |
|
|
|
|
| 369 |
###############################################################################
|
| 370 |
# Gradio UI
|
| 371 |
###############################################################################
|
|
|
|
| 430 |
outputs=[chat_state, chatbot]
|
| 431 |
)
|
| 432 |
|
| 433 |
+
demo.launch()
|