Spaces:

larrysim
/

demo

Sleeping

App Files Files Community

larrysim commited on Aug 24, 2025

Commit

b86722f

verified ·

1 Parent(s): e077248

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -158

app.py CHANGED Viewed

@@ -1,165 +1,28 @@
 import streamlit as st
-import tensorflow as tf
-from tensorflow.keras.models import load_model
-from tensorflow.keras.preprocessing.sequence import pad_sequences
-import numpy as np
-import pickle
-import re
-import os
-# Set Streamlit configuration (instead of using .streamlit/config.toml)
-st.set_page_config(
-    page_title="Next Word Predictor",
-    page_icon="🔮",
-    layout="centered"
-)
-# Set other Streamlit configurations
-st.set_option('server.headless', True)
-st.set_option('server.port', 8501)
-st.set_option('server.enableCORS', False)
-st.set_option('server.enableXsrfProtection', False)
-# Custom CSS for styling
-st.markdown("""
-    <style>
-    .main {
-        background-color: #f5f5f5;
-    }
-    .stTextInput>div>div>input {
-        background-color: #ffffff;
-        color: #000000;
-    }
-    .prediction-box {
-        background-color: #e6f7ff;
-        padding: 15px;
-        border-radius: 10px;
-        border-left: 5px solid #1890ff;
-        margin-top: 20px;
-    }
-    </style>
-    """, unsafe_allow_html=True)
 @st.cache_resource
-def load_models():
-    """Load the model and tokenizer with caching"""
-    try:
-        # Check if files exist
-        if not os.path.exists('nextword_lstm_model.h5'):
-            st.error("Model file not found!")
-            return None, None
-        if not os.path.exists('tokenizer.pkl'):
-            st.error("Tokenizer file not found!")
-            return None, None
-        # Load model with custom objects if needed
-        model = load_model('nextword_lstm_model.h5', compile=False)
-        model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
-        # Load tokenizer
-        with open('tokenizer.pkl', 'rb') as f:
-            tokenizer = pickle.load(f)
-        st.success("Model and tokenizer loaded successfully!")
-        return model, tokenizer
-    except Exception as e:
-        st.error(f"Error loading model: {str(e)}")
-        return None, None
-def predict_next_word(model, tokenizer, seed_text, max_seq_len):
-    """Predict the next word given a seed text"""
-    try:
-        # Clean and preprocess the input text
-        seed_text = re.sub(r'[^\w\s]', '', seed_text.lower()).strip()
-        if not seed_text:
-            return "Please enter some text"
-        # Convert text to sequence
-        token_list = tokenizer.texts_to_sequences([seed_text])
-        if not token_list or not token_list[0]:
-            return "Please enter more meaningful text"
-        token_list = token_list[0]
-        # Pad sequences
-        token_list = pad_sequences([token_list], maxlen=max_seq_len-1, padding='pre')
-        # Make prediction
-        predicted = model.predict(token_list, verbose=0)
-        predicted_word_index = np.argmax(predicted, axis=-1)[0]
-        # Find the word corresponding to the predicted index
-        for word, index in tokenizer.word_index.items():
-            if index == predicted_word_index:
-                return word.capitalize()
-        return "No prediction available"
-    except Exception as e:
-        return f"Error in prediction: {str(e)}"
-def main():
-    st.title("🔮 Next Word Predictor")
-    st.markdown("Enter some text and I'll predict the next word using an LSTM model trained on a large corpus.")
-    # Debug: Show files in directory
-    st.sidebar.write("Debug Info:")
-    st.sidebar.write("Files in directory:", os.listdir('.'))
-    # Load model and tokenizer
-    with st.spinner("Loading model..."):
-        model, tokenizer = load_models()
-    if model is None or tokenizer is None:
-        st.error("Failed to load the model. Please check if model files are available.")
-        return
-    # Calculate max sequence length (you might want to set this based on your training)
-    max_seq_len = 20  # Adjust based on your model's training parameters
-    # Input section
-    st.subheader("Enter your text")
-    seed_text = st.text_input(
-        "Start typing...",
-        placeholder="Type something like 'I am going to'",
-        key="text_input"
-    )
-    # Prediction button
-    if st.button("Predict Next Word", type="primary"):
-        if seed_text.strip():
-            with st.spinner("Predicting..."):
-                next_word = predict_next_word(model, tokenizer, seed_text, max_seq_len)
-                # Display result
-                st.markdown(f"""
-                <div class="prediction-box">
-                    <h3>Prediction</h3>
-                    <p style="font-size: 20px; margin-bottom: 0;"><strong>{seed_text} <span style="color: #1890ff;">{next_word}</span></strong></p>
-                </div>
-                """, unsafe_allow_html=True)
-        else:
-            st.warning("Please enter some text first!")
-    # Information section
-    st.markdown("---")
-    st.subheader("About")
-    st.markdown("""
-    This app uses an LSTM neural network trained on a large text corpus to predict the next word in a sequence.
-    **How it works:**
-    - The model was trained on 20,000 text samples
-    - Uses word embeddings and LSTM layers
-    - Predicts the most likely next word based on context
-    **Try phrases like:**
-    - "I am going to"
-    - "The weather is"
-    - "Machine learning is"
-    """)
-if __name__ == "__main__":
-    main()

 import streamlit as st
+from transformers import AutoTokenizer, AutoModelForCausalLM
+import torch
+# Load model & tokenizer
 @st.cache_resource
+def load_model():
+    tokenizer = AutoTokenizer.from_pretrained(".")
+    model = AutoModelForCausalLM.from_pretrained(".")
+    return tokenizer, model
+tokenizer, model = load_model()
+st.title("📝 Next Word Prediction App")
+st.write("Type a sentence and let the model suggest the next word!")
+# User input
+text = st.text_input("Enter your sentence:", "")
+if st.button("Predict Next Word") and text:
+    inputs = tokenizer(text, return_tensors="pt")
+    with torch.no_grad():
+        outputs = model.generate(**inputs, max_new_tokens=1)
+    prediction = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    # Extract only the new part
+    predicted_next = prediction[len(text):].strip()
+    st.success(f"**Predicted next word:** {predicted_next}")