Spaces:

SallySims
/

AnthroBot

Sleeping

App Files Files Community

SallySims commited on Apr 21, 2025

Commit

fb83f3f

verified ·

1 Parent(s): 3a9ffb7

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -4

app.py CHANGED Viewed

@@ -43,13 +43,17 @@ model, tokenizer = load_model()
 device = "cuda" if torch.cuda.is_available() else "cpu"
 def get_prediction(prompt):
-    st.write(f"Received prompt: {prompt}")  # Log the prompt received
-    # Tokenize the input prompt
-    inputs = tokenizer.encode(prompt, return_tensors="pt").to(device)
     st.write(f"Tokenized input: {inputs}")  # Log the tokenized inputs
-    # Check if model is on the correct device
     model.to(device)
     # Generate output from the model
@@ -64,6 +68,20 @@ def get_prediction(prompt):
         num_return_sequences=1  # Only generate 1 sequence
     )
     st.write(f"Output: {output}")  # Log the raw output from the model
     # Decode the output to readable text
     decoded = tokenizer.decode(output[0], skip_special_tokens=True)
@@ -129,3 +147,4 @@ with tab2:
                 csv_output = df.to_csv(index=False).encode("utf-8")
                 st.download_button("📤 Download Predictions", data=csv_output, file_name="predictions.csv")

 device = "cuda" if torch.cuda.is_available() else "cpu"
 def get_prediction(prompt):
+    # Log the received prompt
+    st.write(f"Received prompt: {prompt}")
+    # Add special tokens (if the model is expecting them)
+    prompt_with_special_tokens = f"{prompt}<|eot_id|><|start_header_id|>"
+    # Tokenize the input with the added special tokens
+    inputs = tokenizer.encode(prompt_with_special_tokens, return_tensors="pt").to(device)
     st.write(f"Tokenized input: {inputs}")  # Log the tokenized inputs
+    # Ensure model is on the correct device (CUDA or CPU)
     model.to(device)
     # Generate output from the model
         num_return_sequences=1  # Only generate 1 sequence
     )
+    # Log the raw output from the model
+    st.write(f"Raw output: {output}")
+    # Decode the output to readable text
+    decoded = tokenizer.decode(output[0], skip_special_tokens=True)
+    st.write(f"Decoded output: {decoded}")  # Log the decoded output
+    # Ensure the output is properly formatted
+    if "<|eot_id|>" in decoded:
+        # If expected token is found, split the output
+        decoded = decoded.split("<|eot_id|>")[-1].strip()
+    return decoded
     st.write(f"Output: {output}")  # Log the raw output from the model
     # Decode the output to readable text
     decoded = tokenizer.decode(output[0], skip_special_tokens=True)
                 csv_output = df.to_csv(index=False).encode("utf-8")
                 st.download_button("📤 Download Predictions", data=csv_output, file_name="predictions.csv")