Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -476,7 +476,7 @@ class Phi3MiniEducationalLLM(Runnable):
|
|
| 476 |
text = self._format_chat_template(prompt)
|
| 477 |
|
| 478 |
try:
|
| 479 |
-
max_input_length =
|
| 480 |
inputs = self.tokenizer(
|
| 481 |
text,
|
| 482 |
return_tensors="pt",
|
|
@@ -505,7 +505,7 @@ class Phi3MiniEducationalLLM(Runnable):
|
|
| 505 |
outputs = model.generate(
|
| 506 |
input_ids=inputs['input_ids'],
|
| 507 |
attention_mask=inputs.get('attention_mask', None),
|
| 508 |
-
max_new_tokens=
|
| 509 |
do_sample=True,
|
| 510 |
temperature=0.7,
|
| 511 |
top_p=0.9,
|
|
@@ -580,7 +580,7 @@ class Phi3MiniEducationalLLM(Runnable):
|
|
| 580 |
return_tensors="pt",
|
| 581 |
padding=True,
|
| 582 |
truncation=True,
|
| 583 |
-
max_length=
|
| 584 |
)
|
| 585 |
if 'input_ids' not in inputs:
|
| 586 |
yield "I encountered an error processing your request. Please try again."
|
|
@@ -606,7 +606,7 @@ class Phi3MiniEducationalLLM(Runnable):
|
|
| 606 |
generation_kwargs = {
|
| 607 |
"input_ids": inputs['input_ids'],
|
| 608 |
"attention_mask": inputs.get('attention_mask', None),
|
| 609 |
-
"max_new_tokens":
|
| 610 |
"do_sample": True,
|
| 611 |
"temperature": 0.7,
|
| 612 |
"top_p": 0.9,
|
|
@@ -991,9 +991,15 @@ def respond_and_update(message, history):
|
|
| 991 |
# Add user message to history
|
| 992 |
history.append({"role": "user", "content": message})
|
| 993 |
|
|
|
|
|
|
|
|
|
|
| 994 |
# Add empty assistant message that will be updated
|
| 995 |
history.append({"role": "assistant", "content": ""})
|
| 996 |
|
|
|
|
|
|
|
|
|
|
| 997 |
try:
|
| 998 |
# Generate streaming response
|
| 999 |
full_response = ""
|
|
|
|
| 476 |
text = self._format_chat_template(prompt)
|
| 477 |
|
| 478 |
try:
|
| 479 |
+
max_input_length = 2000 - 400
|
| 480 |
inputs = self.tokenizer(
|
| 481 |
text,
|
| 482 |
return_tensors="pt",
|
|
|
|
| 505 |
outputs = model.generate(
|
| 506 |
input_ids=inputs['input_ids'],
|
| 507 |
attention_mask=inputs.get('attention_mask', None),
|
| 508 |
+
max_new_tokens=250,
|
| 509 |
do_sample=True,
|
| 510 |
temperature=0.7,
|
| 511 |
top_p=0.9,
|
|
|
|
| 580 |
return_tensors="pt",
|
| 581 |
padding=True,
|
| 582 |
truncation=True,
|
| 583 |
+
max_length=2000
|
| 584 |
)
|
| 585 |
if 'input_ids' not in inputs:
|
| 586 |
yield "I encountered an error processing your request. Please try again."
|
|
|
|
| 606 |
generation_kwargs = {
|
| 607 |
"input_ids": inputs['input_ids'],
|
| 608 |
"attention_mask": inputs.get('attention_mask', None),
|
| 609 |
+
"max_new_tokens": 250,
|
| 610 |
"do_sample": True,
|
| 611 |
"temperature": 0.7,
|
| 612 |
"top_p": 0.9,
|
|
|
|
| 991 |
# Add user message to history
|
| 992 |
history.append({"role": "user", "content": message})
|
| 993 |
|
| 994 |
+
# yield to show user message
|
| 995 |
+
yield history, ""
|
| 996 |
+
|
| 997 |
# Add empty assistant message that will be updated
|
| 998 |
history.append({"role": "assistant", "content": ""})
|
| 999 |
|
| 1000 |
+
# Yield again to show empty assistant bubble
|
| 1001 |
+
yield history, ""
|
| 1002 |
+
|
| 1003 |
try:
|
| 1004 |
# Generate streaming response
|
| 1005 |
full_response = ""
|