Spaces:

Preethamreddy799
/

NLP_PROJECT

Sleeping

App Files Files Community

Preethamreddy799 commited on Dec 18, 2024

Commit

4682e8b

1 Parent(s): 3720c80

new update

Browse files

Files changed (1) hide show

app.py +62 -46

app.py CHANGED Viewed

@@ -1,16 +1,61 @@
 import streamlit as st
 import numpy as np
-from tensorflow.keras.preprocessing.text import Tokenizer
-from tensorflow.keras.preprocessing.sequence import pad_sequences
 from tensorflow.keras.models import load_model
 from huggingface_hub import hf_hub_download
-# Load model from Hugging Face
 def load_model_test_steps():
     repo_id = 'Preethamreddy799/NLP_MODEL'
-    filename = 'model_test_steps.h5'  # Assuming the model is in HDF5 format
-    # Download the model from Hugging Face repository
     cached_model_path = hf_hub_download(repo_id=repo_id, filename=filename)
     # Load the model from the cached path
@@ -19,45 +64,13 @@ def load_model_test_steps():
     print(f"Model loaded successfully from {cached_model_path}")
     return model
-model = load_model_test_steps()
-# Initialize Tokenizer (Should match training tokenizer)
-tokenizer = Tokenizer(num_words=1000)
-# Function to preprocess text data
-def preprocess_text(input_text):
-    # Convert text to lowercase
-    input_text = input_text.lower()
-    # Remove punctuation (Optional, depending on model)
-    input_text = ''.join([char for char in input_text if char.isalnum() or char.isspace()])
-    # Fit tokenizer (you should ideally fit the tokenizer during training and save it)
-    tokenizer.fit_on_texts([input_text])
-    # Convert the input text to a sequence of integers
-    sequence = tokenizer.texts_to_sequences([input_text])
-    # Pad the sequence to ensure uniform input size
-    input_features = pad_sequences(sequence, maxlen=100)  # Ensure length matches the expected input (100)
-    # Reshape input to match model input shape: (batch_size, time_steps, features)
-    input_features = np.reshape(input_features, (input_features.shape[0], input_features.shape[1], 1))
-    return input_features
-# Function to generate test steps
-def generate_test_steps(acceptance_criteria):
-    # Preprocess the input text
-    input_features = preprocess_text(f"{acceptance_criteria}")
-    # Generate prediction
-    predicted_steps = model.predict(input_features)
-    return predicted_steps
 # Streamlit App
 st.title("Test Case Steps Generator")
-st.write("This app generates test steps based on Test Case Acceptance Criteria")
 # Input section
 acceptance_criteria = st.text_area("Enter Test Case Acceptance Criteria")
@@ -65,14 +78,17 @@ acceptance_criteria = st.text_area("Enter Test Case Acceptance Criteria")
 # Generate Test Steps
 if st.button("Generate Test Steps"):
     if acceptance_criteria:
-        if model:
-            # Call the function to generate predictions
-            test_steps = generate_test_steps(acceptance_criteria)
             # Display the results
             st.subheader("Generated Test Steps")
-            st.write(test_steps)
         else:
             st.error("Model not loaded. Please check the model repository and file.")
     else:
-        st.warning("Please fill Acceptance Criteria")

 import streamlit as st
 import numpy as np
 from tensorflow.keras.models import load_model
 from huggingface_hub import hf_hub_download
+from nltk.corpus import stopwords
+from nltk.stem import WordNetLemmatizer
+import gensim
+import json
+# Load stop words and lemmatizer
+stop_words = set(stopwords.words('english'))
+lemmatizer = WordNetLemmatizer()
+# Function to preprocess input text (matching training pipeline)
+def preprocess_text(input_text, word2vec_model):
+    # Convert to lowercase
+    input_text = input_text.lower()
+    # Tokenize words
+    tokens = input_text.split()
+    # Remove stop words
+    tokens = [token for token in tokens if token not in stop_words]
+    # Lemmatize tokens
+    tokens = [lemmatizer.lemmatize(token, pos='v') for token in tokens]
+    # Generate Word2Vec embeddings for tokens
+    embeddings = []
+    for token in tokens:
+        if token in word2vec_model.wv:
+            embeddings.append(word2vec_model.wv[token])
+        else:
+            embeddings.append(np.zeros(word2vec_model.vector_size))  # Handle OOV words
+    # Pad or truncate embeddings to match time_steps (e.g., 100)
+    max_timesteps = 100
+    if len(embeddings) > max_timesteps:
+        embeddings = embeddings[:max_timesteps]
+    else:
+        padding = [np.zeros(word2vec_model.vector_size)] * (max_timesteps - len(embeddings))
+        embeddings.extend(padding)
+    # Convert to NumPy array and reshape
+    input_features = np.array(embeddings).reshape((1, max_timesteps, word2vec_model.vector_size))
+    return input_features
+# Load Word2Vec model
+def load_word2vec_model():
+    word2vec_path = '/Users/preethamreddygollapalli/Downloads/word2vec_model.bin'  # Update with actual Word2Vec model path
+    return gensim.models.Word2Vec.load(word2vec_path)
+# Load LSTM model from Hugging Face
 def load_model_test_steps():
     repo_id = 'Preethamreddy799/NLP_MODEL'
+    filename = 'model_test_steps.h5'  # Update with actual file name
+    # Download the model from Hugging Face
     cached_model_path = hf_hub_download(repo_id=repo_id, filename=filename)
     # Load the model from the cached path
     print(f"Model loaded successfully from {cached_model_path}")
     return model
+# Initialize models
+word2vec_model = load_word2vec_model()
+lstm_model = load_model_test_steps()
 # Streamlit App
 st.title("Test Case Steps Generator")
+st.write("This app generates test steps based on Test Case Acceptance Criteria.")
 # Input section
 acceptance_criteria = st.text_area("Enter Test Case Acceptance Criteria")
 # Generate Test Steps
 if st.button("Generate Test Steps"):
     if acceptance_criteria:
+        if lstm_model:
+            # Preprocess input text
+            input_features = preprocess_text(acceptance_criteria, word2vec_model)
+            # Generate prediction
+            predicted_steps = lstm_model.predict(input_features)
             # Display the results
             st.subheader("Generated Test Steps")
+            st.write(predicted_steps)
         else:
             st.error("Model not loaded. Please check the model repository and file.")
     else:
+        st.warning("Please fill Acceptance Criteria.")