jdesiree commited on
Commit
f44f7c5
·
verified ·
1 Parent(s): bf62672

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -4
app.py CHANGED
@@ -476,7 +476,7 @@ class Phi3MiniEducationalLLM(Runnable):
476
  text = self._format_chat_template(prompt)
477
 
478
  try:
479
- max_input_length = 4096 - 300
480
  inputs = self.tokenizer(
481
  text,
482
  return_tensors="pt",
@@ -505,7 +505,7 @@ class Phi3MiniEducationalLLM(Runnable):
505
  outputs = model.generate(
506
  input_ids=inputs['input_ids'],
507
  attention_mask=inputs.get('attention_mask', None),
508
- max_new_tokens=400,
509
  do_sample=True,
510
  temperature=0.7,
511
  top_p=0.9,
@@ -580,7 +580,7 @@ class Phi3MiniEducationalLLM(Runnable):
580
  return_tensors="pt",
581
  padding=True,
582
  truncation=True,
583
- max_length=4096
584
  )
585
  if 'input_ids' not in inputs:
586
  yield "I encountered an error processing your request. Please try again."
@@ -606,7 +606,7 @@ class Phi3MiniEducationalLLM(Runnable):
606
  generation_kwargs = {
607
  "input_ids": inputs['input_ids'],
608
  "attention_mask": inputs.get('attention_mask', None),
609
- "max_new_tokens": 400,
610
  "do_sample": True,
611
  "temperature": 0.7,
612
  "top_p": 0.9,
@@ -991,9 +991,15 @@ def respond_and_update(message, history):
991
  # Add user message to history
992
  history.append({"role": "user", "content": message})
993
 
 
 
 
994
  # Add empty assistant message that will be updated
995
  history.append({"role": "assistant", "content": ""})
996
 
 
 
 
997
  try:
998
  # Generate streaming response
999
  full_response = ""
 
476
  text = self._format_chat_template(prompt)
477
 
478
  try:
479
+ max_input_length = 2000 - 400
480
  inputs = self.tokenizer(
481
  text,
482
  return_tensors="pt",
 
505
  outputs = model.generate(
506
  input_ids=inputs['input_ids'],
507
  attention_mask=inputs.get('attention_mask', None),
508
+ max_new_tokens=250,
509
  do_sample=True,
510
  temperature=0.7,
511
  top_p=0.9,
 
580
  return_tensors="pt",
581
  padding=True,
582
  truncation=True,
583
+ max_length=2000
584
  )
585
  if 'input_ids' not in inputs:
586
  yield "I encountered an error processing your request. Please try again."
 
606
  generation_kwargs = {
607
  "input_ids": inputs['input_ids'],
608
  "attention_mask": inputs.get('attention_mask', None),
609
+ "max_new_tokens": 250,
610
  "do_sample": True,
611
  "temperature": 0.7,
612
  "top_p": 0.9,
 
991
  # Add user message to history
992
  history.append({"role": "user", "content": message})
993
 
994
+ # yield to show user message
995
+ yield history, ""
996
+
997
  # Add empty assistant message that will be updated
998
  history.append({"role": "assistant", "content": ""})
999
 
1000
+ # Yield again to show empty assistant bubble
1001
+ yield history, ""
1002
+
1003
  try:
1004
  # Generate streaming response
1005
  full_response = ""