Preethamreddy799 commited on
Commit
4634560
·
1 Parent(s): d40f814

new update

Browse files
Files changed (1) hide show
  1. app.py +7 -19
app.py CHANGED
@@ -18,38 +18,26 @@ lemmatizer = WordNetLemmatizer()
18
 
19
  # Function to preprocess input text
20
  def preprocess_text(input_text, word2vec_model):
21
- # Convert to lowercase
22
  input_text = input_text.lower()
23
-
24
- # Tokenize words
25
  tokens = input_text.split()
26
-
27
- # Remove stop words
28
  tokens = [token for token in tokens if token not in stop_words]
29
-
30
- # Lemmatize tokens
31
  tokens = [lemmatizer.lemmatize(token, pos='v') for token in tokens]
32
-
33
- # Generate Word2Vec embeddings for tokens
34
  embeddings = []
 
35
  for token in tokens:
36
  if token in word2vec_model.wv:
37
  embeddings.append(word2vec_model.wv[token])
38
  else:
39
- embeddings.append(np.zeros(word2vec_model.vector_size)) # Handle OOV words
40
-
41
- # Pad or truncate embeddings to match model's time_steps
42
- time_steps = lstm_model.input_shape[1] # Dynamically get time_steps from model
43
- vector_size = word2vec_model.vector_size
44
 
45
- if len(embeddings) > time_steps:
46
- embeddings = embeddings[:time_steps]
 
47
  else:
48
- padding = [np.zeros(vector_size)] * (time_steps - len(embeddings))
49
  embeddings.extend(padding)
50
 
51
- # Convert to NumPy array with shape (1, time_steps, vector_size)
52
- input_features = np.array(embeddings).reshape((1, time_steps, vector_size))
53
  return input_features
54
 
55
  # Load Word2Vec model
 
18
 
19
  # Function to preprocess input text
20
  def preprocess_text(input_text, word2vec_model):
 
21
  input_text = input_text.lower()
 
 
22
  tokens = input_text.split()
 
 
23
  tokens = [token for token in tokens if token not in stop_words]
 
 
24
  tokens = [lemmatizer.lemmatize(token, pos='v') for token in tokens]
 
 
25
  embeddings = []
26
+
27
  for token in tokens:
28
  if token in word2vec_model.wv:
29
  embeddings.append(word2vec_model.wv[token])
30
  else:
31
+ embeddings.append(np.zeros(word2vec_model.vector_size))
 
 
 
 
32
 
33
+ max_timesteps = 100
34
+ if len(embeddings) > max_timesteps:
35
+ embeddings = embeddings[:max_timesteps]
36
  else:
37
+ padding = [np.zeros(word2vec_model.vector_size)] * (max_timesteps - len(embeddings))
38
  embeddings.extend(padding)
39
 
40
+ input_features = np.array(embeddings).reshape((1, max_timesteps, word2vec_model.vector_size))
 
41
  return input_features
42
 
43
  # Load Word2Vec model