Spaces:

jdesiree
/

Mimir

Sleeping

App Files Files Community

jdesiree commited on Sep 7, 2025

Commit

f44f7c5

verified ·

1 Parent(s): bf62672

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -4

app.py CHANGED Viewed

@@ -476,7 +476,7 @@ class Phi3MiniEducationalLLM(Runnable):
             text = self._format_chat_template(prompt)
             try:
-                max_input_length = 4096 - 300
                 inputs = self.tokenizer(
                     text,
                     return_tensors="pt",
@@ -505,7 +505,7 @@ class Phi3MiniEducationalLLM(Runnable):
                     outputs = model.generate(
                         input_ids=inputs['input_ids'],
                         attention_mask=inputs.get('attention_mask', None),
-                        max_new_tokens=400,
                         do_sample=True,
                         temperature=0.7,
                         top_p=0.9,
@@ -580,7 +580,7 @@ class Phi3MiniEducationalLLM(Runnable):
                     return_tensors="pt",
                     padding=True,
                     truncation=True,
-                    max_length=4096
                 )
                 if 'input_ids' not in inputs:
                     yield "I encountered an error processing your request. Please try again."
@@ -606,7 +606,7 @@ class Phi3MiniEducationalLLM(Runnable):
             generation_kwargs = {
                 "input_ids": inputs['input_ids'],
                 "attention_mask": inputs.get('attention_mask', None),
-                "max_new_tokens": 400,
                 "do_sample": True,
                 "temperature": 0.7,
                 "top_p": 0.9,
@@ -991,9 +991,15 @@ def respond_and_update(message, history):
     # Add user message to history
     history.append({"role": "user", "content": message})
     # Add empty assistant message that will be updated
     history.append({"role": "assistant", "content": ""})
     try:
         # Generate streaming response
         full_response = ""

             text = self._format_chat_template(prompt)
             try:
+                max_input_length = 2000 - 400
                 inputs = self.tokenizer(
                     text,
                     return_tensors="pt",
                     outputs = model.generate(
                         input_ids=inputs['input_ids'],
                         attention_mask=inputs.get('attention_mask', None),
+                        max_new_tokens=250,
                         do_sample=True,
                         temperature=0.7,
                         top_p=0.9,
                     return_tensors="pt",
                     padding=True,
                     truncation=True,
+                    max_length=2000
                 )
                 if 'input_ids' not in inputs:
                     yield "I encountered an error processing your request. Please try again."
             generation_kwargs = {
                 "input_ids": inputs['input_ids'],
                 "attention_mask": inputs.get('attention_mask', None),
+                "max_new_tokens": 250,
                 "do_sample": True,
                 "temperature": 0.7,
                 "top_p": 0.9,
     # Add user message to history
     history.append({"role": "user", "content": message})
+    # yield to show user message
+    yield history, ""
     # Add empty assistant message that will be updated
     history.append({"role": "assistant", "content": ""})
+    #  Yield again to show empty assistant bubble
+    yield history, ""
     try:
         # Generate streaming response
         full_response = ""