Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -2,9 +2,9 @@ import streamlit as st
|
|
| 2 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 3 |
import torch
|
| 4 |
|
| 5 |
-
# Load the model and tokenizer
|
| 6 |
model_name = "Tom158/Nutri_Assist"
|
| 7 |
-
model = AutoModelForCausalLM.from_pretrained(model_name)
|
| 8 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 9 |
|
| 10 |
# Set pad token if not already set
|
|
@@ -16,25 +16,17 @@ st.title("Nutrition Chatbot")
|
|
| 16 |
user_input = st.text_input("Ask me about nutrition:")
|
| 17 |
|
| 18 |
if user_input:
|
| 19 |
-
#
|
| 20 |
-
inputs = tokenizer.encode_plus(user_input, return_tensors="pt", padding=True, truncation=True)
|
| 21 |
input_ids = inputs['input_ids']
|
| 22 |
attention_mask = inputs['attention_mask']
|
| 23 |
-
|
| 24 |
-
# Print the input tensors for debugging
|
| 25 |
-
st.write("Input IDs:", input_ids)
|
| 26 |
-
st.write("Attention Mask:", attention_mask)
|
| 27 |
-
|
| 28 |
# Generate output with attention mask and pad token ID
|
| 29 |
try:
|
| 30 |
-
#
|
| 31 |
-
|
| 32 |
-
outputs = model.generate(input_ids, attention_mask=attention_mask, max_length=150,
|
| 33 |
temperature=0.7, top_k=50, num_return_sequences=1)
|
| 34 |
-
|
| 35 |
-
# Debugging model raw output (just the token ids)
|
| 36 |
-
st.write("Model Output (Raw Token IDs):", outputs)
|
| 37 |
-
|
| 38 |
# Decode the output and display
|
| 39 |
decoded_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 40 |
st.write("Decoded Answer:", decoded_output)
|
|
|
|
| 2 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 3 |
import torch
|
| 4 |
|
| 5 |
+
# Load the model and tokenizer with memory optimizations
|
| 6 |
model_name = "Tom158/Nutri_Assist"
|
| 7 |
+
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, device_map="auto")
|
| 8 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 9 |
|
| 10 |
# Set pad token if not already set
|
|
|
|
| 16 |
user_input = st.text_input("Ask me about nutrition:")
|
| 17 |
|
| 18 |
if user_input:
|
| 19 |
+
# Truncate input and convert to tensors
|
| 20 |
+
inputs = tokenizer.encode_plus(user_input, return_tensors="pt", padding=True, truncation=True, max_length=512)
|
| 21 |
input_ids = inputs['input_ids']
|
| 22 |
attention_mask = inputs['attention_mask']
|
| 23 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
# Generate output with attention mask and pad token ID
|
| 25 |
try:
|
| 26 |
+
# Limit output length to save memory
|
| 27 |
+
outputs = model.generate(input_ids, attention_mask=attention_mask, max_length=100,
|
|
|
|
| 28 |
temperature=0.7, top_k=50, num_return_sequences=1)
|
| 29 |
+
|
|
|
|
|
|
|
|
|
|
| 30 |
# Decode the output and display
|
| 31 |
decoded_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 32 |
st.write("Decoded Answer:", decoded_output)
|