Spaces:

SallySims
/

AnthroBot

Sleeping

App Files Files Community

SallySims commited on Apr 21, 2025

Commit

ecb5b5d

verified ·

1 Parent(s): fb83f3f

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -37

app.py CHANGED Viewed

@@ -1,4 +1,5 @@
 ## Deploying on HuggingFace
 import streamlit as st
 import pandas as pd
 import torch
@@ -39,55 +40,46 @@ def load_model():
         raise e
 model, tokenizer = load_model()
 # Prediction function
 device = "cuda" if torch.cuda.is_available() else "cpu"
 def get_prediction(prompt):
-    # Log the received prompt
     st.write(f"Received prompt: {prompt}")
-    # Add special tokens (if the model is expecting them)
-    prompt_with_special_tokens = f"{prompt}<|eot_id|><|start_header_id|>"
-    # Tokenize the input with the added special tokens
-    inputs = tokenizer.encode(prompt_with_special_tokens, return_tensors="pt").to(device)
-    st.write(f"Tokenized input: {inputs}")  # Log the tokenized inputs
-    # Ensure model is on the correct device (CUDA or CPU)
-    model.to(device)
-    # Generate output from the model
     output = model.generate(
-        inputs,
-        max_length=200,  # Set a reasonable max length for output
-        max_new_tokens=150,  # Limit output to avoid too long generations
-        temperature=0.7,  # Control randomness
-        top_p=0.95,  # Top-p sampling for diversity
-        do_sample=True,  # Enable sampling (for more diverse answers)
-        pad_token_id=tokenizer.eos_token_id,  # Ensure padding is handled
-        num_return_sequences=1  # Only generate 1 sequence
     )
-    # Log the raw output from the model
-    st.write(f"Raw output: {output}")
-    # Decode the output to readable text
-    decoded = tokenizer.decode(output[0], skip_special_tokens=True)
-    st.write(f"Decoded output: {decoded}")  # Log the decoded output
-    # Ensure the output is properly formatted
-    if "<|eot_id|>" in decoded:
-        # If expected token is found, split the output
-        decoded = decoded.split("<|eot_id|>")[-1].strip()
-    return decoded
     st.write(f"Output: {output}")  # Log the raw output from the model
-    # Decode the output to readable text
     decoded = tokenizer.decode(output[0], skip_special_tokens=True)
-    st.write(f"Decoded output: {decoded}")  # Log the decoded output
-    return decoded.strip()
 # UI Header
@@ -147,4 +139,3 @@ with tab2:
                 csv_output = df.to_csv(index=False).encode("utf-8")
                 st.download_button("📤 Download Predictions", data=csv_output, file_name="predictions.csv")

 ## Deploying on HuggingFace
+## Deploying on HuggingFace
 import streamlit as st
 import pandas as pd
 import torch
         raise e
 model, tokenizer = load_model()
+# Prediction function
 # Prediction function
 device = "cuda" if torch.cuda.is_available() else "cpu"
 def get_prediction(prompt):
     st.write(f"Received prompt: {prompt}")
+    # Create a message structure
+    messages = [{"role": "user", "content": prompt}]
+    # Tokenize the input
+    inputs = tokenizer.apply_chat_template(
+        messages,
+        tokenize=True,
+        add_generation_prompt=True,  # This is needed for generation
+        return_tensors="pt",
+    ).to(device)
+    # Log the tokenized input
+    st.write(f"Tokenized input: {inputs}")
+    # Initialize TextStreamer for real-time streaming
+    text_streamer = TextStreamer(tokenizer)
+    # Generate output using the model with streaming
     output = model.generate(
+        inputs["input_ids"],  # Use the tokenized input
+        max_new_tokens=150,  # Limit the number of tokens
+        temperature=0.7,  # Control randomness of output
+        top_p=0.95,  # Sampling parameter
+        do_sample=True,  # Ensure sampling for diverse output
+        streamer=text_streamer,  # Use the TextStreamer for output
     )
     st.write(f"Output: {output}")  # Log the raw output from the model
+    # Decode the output
     decoded = tokenizer.decode(output[0], skip_special_tokens=True)
+    # Log decoded output
+    st.write(f"Decoded output: {decoded}")
+    return decoded
 # UI Header
                 csv_output = df.to_csv(index=False).encode("utf-8")
                 st.download_button("📤 Download Predictions", data=csv_output, file_name="predictions.csv")