Preethamreddy799 commited on
Commit
d40f814
·
1 Parent(s): 935cbd6

new update

Browse files
Files changed (1) hide show
  1. app.py +19 -7
app.py CHANGED
@@ -18,26 +18,38 @@ lemmatizer = WordNetLemmatizer()
18
 
19
  # Function to preprocess input text
20
  def preprocess_text(input_text, word2vec_model):
 
21
  input_text = input_text.lower()
 
 
22
  tokens = input_text.split()
 
 
23
  tokens = [token for token in tokens if token not in stop_words]
 
 
24
  tokens = [lemmatizer.lemmatize(token, pos='v') for token in tokens]
25
- embeddings = []
26
 
 
 
27
  for token in tokens:
28
  if token in word2vec_model.wv:
29
  embeddings.append(word2vec_model.wv[token])
30
  else:
31
- embeddings.append(np.zeros(word2vec_model.vector_size))
 
 
 
 
32
 
33
- max_timesteps = 100
34
- if len(embeddings) > max_timesteps:
35
- embeddings = embeddings[:max_timesteps]
36
  else:
37
- padding = [np.zeros(word2vec_model.vector_size)] * (max_timesteps - len(embeddings))
38
  embeddings.extend(padding)
39
 
40
- input_features = np.array(embeddings).reshape((1, max_timesteps, word2vec_model.vector_size))
 
41
  return input_features
42
 
43
  # Load Word2Vec model
 
18
 
19
  # Function to preprocess input text
20
  def preprocess_text(input_text, word2vec_model):
21
+ # Convert to lowercase
22
  input_text = input_text.lower()
23
+
24
+ # Tokenize words
25
  tokens = input_text.split()
26
+
27
+ # Remove stop words
28
  tokens = [token for token in tokens if token not in stop_words]
29
+
30
+ # Lemmatize tokens
31
  tokens = [lemmatizer.lemmatize(token, pos='v') for token in tokens]
 
32
 
33
+ # Generate Word2Vec embeddings for tokens
34
+ embeddings = []
35
  for token in tokens:
36
  if token in word2vec_model.wv:
37
  embeddings.append(word2vec_model.wv[token])
38
  else:
39
+ embeddings.append(np.zeros(word2vec_model.vector_size)) # Handle OOV words
40
+
41
+ # Pad or truncate embeddings to match model's time_steps
42
+ time_steps = lstm_model.input_shape[1] # Dynamically get time_steps from model
43
+ vector_size = word2vec_model.vector_size
44
 
45
+ if len(embeddings) > time_steps:
46
+ embeddings = embeddings[:time_steps]
 
47
  else:
48
+ padding = [np.zeros(vector_size)] * (time_steps - len(embeddings))
49
  embeddings.extend(padding)
50
 
51
+ # Convert to NumPy array with shape (1, time_steps, vector_size)
52
+ input_features = np.array(embeddings).reshape((1, time_steps, vector_size))
53
  return input_features
54
 
55
  # Load Word2Vec model