Blessmore commited on
Commit
e4ee49a
·
verified ·
1 Parent(s): aff0f0f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +154 -0
app.py CHANGED
@@ -173,3 +173,157 @@ def main():
173
  epochs=100,
174
  bucket=2000000,
175
  min_n=3,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
173
  epochs=100,
174
  bucket=2000000,
175
  min_n=3,
176
+ max_n=6
177
+ )
178
+ end_time = time.time()
179
+
180
+ # Calculate the elapsed time
181
+ elapsed_time = end_time - start_time
182
+ st.write("Time taken: {:.2f} minutes".format(elapsed_time / 60))
183
+
184
+ st.write("Model trained successfully!")
185
+
186
+ # Zip the model files in memory
187
+ zip_buffer = zip_model(model)
188
+
189
+ # Provide download link
190
+ st.download_button(
191
+ label="Download Model",
192
+ data=zip_buffer,
193
+ file_name="fasttext_model.zip",
194
+ mime="application/zip"
195
+ )
196
+ except Exception as e:
197
+ st.error(f"An error occurred: {str(e)}")
198
+ st.error("Check the server logs for more details.")
199
+
200
+ elif option == "Generate Embeddings":
201
+ st.header("Generate Embeddings with Pretrained FastText Model")
202
+
203
+ # Specify the path to the model folder
204
+ model_folder = "Fast_text_50_dim"
205
+
206
+ # Load the model from the specified folder
207
+ model = load_fasttext_model(model_folder)
208
+
209
+ st.subheader("Generate Word Embedding")
210
+ word = st.text_input("Enter a word:")
211
+ if word:
212
+ embedding = generate_word_embedding(word, model)
213
+ if embedding is not None:
214
+ st.write(f"Embedding for '{word}':", embedding)
215
+ else:
216
+ st.write(f"'{word}' not in vocabulary")
217
+
218
+ st.subheader("Find Similar Words")
219
+ word_for_similar = st.text_input("Enter a word to find similar words:")
220
+ if word_for_similar:
221
+ similar_words = find_similar_words(word_for_similar, model)
222
+ if similar_words:
223
+ st.write("Similar words:")
224
+ for word, similarity in similar_words:
225
+ st.write(f"{word}: {similarity}")
226
+ else:
227
+ st.write(f"No similar words found for '{word_for_similar}'")
228
+
229
+ st.subheader("Generate Embeddings for Words in a Sentence")
230
+ sentence = st.text_input("Enter a sentence:")
231
+ if sentence:
232
+ word_embeddings = generate_embeddings_for_sentence(sentence, model, r'\b\w+\b')
233
+ if word_embeddings:
234
+ for idx, embedding in enumerate(word_embeddings):
235
+ st.write(f"Word {idx+1} embedding:", embedding)
236
+ else:
237
+ st.write("No embeddings could be generated for the words in the sentence.")
238
+
239
+ st.subheader("Generate Embedding for a Sentence")
240
+ sentence_for_embedding = st.text_input("Enter a sentence to generate its embedding:")
241
+ if sentence_for_embedding:
242
+ sentence_embedding = generate_sentence_embedding(sentence_for_embedding, model, r'\b\w+\b')
243
+ if sentence_embedding is not None:
244
+ st.write("Sentence embedding:", sentence_embedding)
245
+ else:
246
+ st.write("No embedding could be generated for the sentence.")
247
+
248
+ st.subheader("Find Most Similar Sentence Pairs")
249
+ uploaded_sentences_file = st.file_uploader("Upload a text file with sentences (one per line)", type=["txt"])
250
+ if uploaded_sentences_file:
251
+ sentences = uploaded_sentences_file.read().decode('utf-8').splitlines()
252
+ sentence_embeddings = generate_sentence_embeddings(sentences, model, r'\b\w+\b')
253
+ sentence_pairs = []
254
+ for i in range(len(sentences)):
255
+ for j in range(i + 1, len(sentences)):
256
+ if sentence_embeddings[i] is not None and sentence_embeddings[j] is not None:
257
+ similarity = cosine_similarity([sentence_embeddings[i]], [sentence_embeddings[j]])[0][0]
258
+ sentence_pairs.append((sentences[i], sentences[j], similarity))
259
+ sentence_pairs = sorted(sentence_pairs, key=lambda x: x[2], reverse=True)
260
+ st.write("Most similar sentence pairs:")
261
+ for sent1, sent2, sim in sentence_pairs[:5]:
262
+ st.write(f"Sentence 1: {sent1}")
263
+ st.write(f"Sentence 2: {sent2}")
264
+ st.write(f"Similarity: {sim}")
265
+ st.write("-----")
266
+
267
+ # Sub-sidebar under "Generate Embeddings" option
268
+ if option == "Generate Embeddings":
269
+ st.sidebar.title("Embeddings Operations")
270
+ st.sidebar.subheader("Generate Word Embedding")
271
+ word_operation = st.sidebar.text_input("Enter a word for embedding:")
272
+ if word_operation:
273
+ word_embedding = generate_word_embedding(word_operation, model)
274
+ if word_embedding is not None:
275
+ st.sidebar.write(f"Embedding for '{word_operation}':", word_embedding)
276
+ else:
277
+ st.sidebar.write(f"'{word_operation}' not in vocabulary")
278
+
279
+ st.sidebar.subheader("Find Similar Words")
280
+ similar_word_operation = st.sidebar.text_input("Enter a word to find similar words:")
281
+ if similar_word_operation:
282
+ similar_words = find_similar_words(similar_word_operation, model)
283
+ if similar_words:
284
+ st.sidebar.write("Similar words:")
285
+ for word, similarity in similar_words:
286
+ st.sidebar.write(f"{word}: {similarity}")
287
+ else:
288
+ st.sidebar.write(f"No similar words found for '{similar_word_operation}'")
289
+
290
+ st.sidebar.subheader("Generate Embeddings for Words in a Sentence")
291
+ sentence_operation = st.sidebar.text_input("Enter a sentence for word embeddings:")
292
+ if sentence_operation:
293
+ word_embeddings = generate_embeddings_for_sentence(sentence_operation, model, r'\b\w+\b')
294
+ if word_embeddings:
295
+ for idx, embedding in enumerate(word_embeddings):
296
+ st.sidebar.write(f"Word {idx+1} embedding:", embedding)
297
+ else:
298
+ st.sidebar.write("No embeddings could be generated for the words in the sentence.")
299
+
300
+ st.sidebar.subheader("Generate Embedding for a Sentence")
301
+ sentence_embedding_operation = st.sidebar.text_input("Enter a sentence for its embedding:")
302
+ if sentence_embedding_operation:
303
+ sentence_emb = generate_sentence_embedding(sentence_embedding_operation, model, r'\b\w+\b')
304
+ if sentence_emb is not None:
305
+ st.sidebar.write("Sentence embedding:", sentence_emb)
306
+ else:
307
+ st.sidebar.write("No embedding could be generated for the sentence.")
308
+
309
+ st.sidebar.subheader("Find Most Similar Sentence Pairs")
310
+ uploaded_sentences_file_operation = st.sidebar.file_uploader("Upload a text file with sentences (one per line)", type=["txt"])
311
+ if uploaded_sentences_file_operation:
312
+ sentences_operation = uploaded_sentences_file_operation.read().decode('utf-8').splitlines()
313
+ sentence_embeddings_operation = generate_sentence_embeddings(sentences_operation, model, r'\b\w+\b')
314
+ sentence_pairs_operation = []
315
+ for i in range(len(sentences_operation)):
316
+ for j in range(i + 1, len(sentences_operation)):
317
+ if sentence_embeddings_operation[i] is not None and sentence_embeddings_operation[j] is not None:
318
+ similarity = cosine_similarity([sentence_embeddings_operation[i]], [sentence_embeddings_operation[j]])[0][0]
319
+ sentence_pairs_operation.append((sentences_operation[i], sentences_operation[j], similarity))
320
+ sentence_pairs_operation = sorted(sentence_pairs_operation, key=lambda x: x[2], reverse=True)
321
+ st.sidebar.write("Most similar sentence pairs:")
322
+ for sent1, sent2, sim in sentence_pairs_operation[:5]:
323
+ st.sidebar.write(f"Sentence 1: {sent1}")
324
+ st.sidebar.write(f"Sentence 2: {sent2}")
325
+ st.sidebar.write(f"Similarity: {sim}")
326
+ st.sidebar.write("-----")
327
+
328
+ if __name__ == "__main__":
329
+ main()