Spaces:

Blessmore
/

fasttext_embedding_Pipeline

Build error

App Files Files Community

Blessmore commited on May 23, 2024

Commit

e4ee49a

verified ·

1 Parent(s): aff0f0f

Update app.py

Browse files

Files changed (1) hide show

app.py +154 -0

app.py CHANGED Viewed

@@ -173,3 +173,157 @@ def main():
                         epochs=100,
                         bucket=2000000,
                         min_n=3,

                         epochs=100,
                         bucket=2000000,
                         min_n=3,
+                        max_n=6
+                    )
+                    end_time = time.time()
+                    # Calculate the elapsed time
+                    elapsed_time = end_time - start_time
+                    st.write("Time taken: {:.2f} minutes".format(elapsed_time / 60))
+                    st.write("Model trained successfully!")
+                    # Zip the model files in memory
+                    zip_buffer = zip_model(model)
+                    # Provide download link
+                    st.download_button(
+                        label="Download Model",
+                        data=zip_buffer,
+                        file_name="fasttext_model.zip",
+                        mime="application/zip"
+                    )
+                except Exception as e:
+                    st.error(f"An error occurred: {str(e)}")
+                    st.error("Check the server logs for more details.")
+    elif option == "Generate Embeddings":
+        st.header("Generate Embeddings with Pretrained FastText Model")
+        # Specify the path to the model folder
+        model_folder = "Fast_text_50_dim"
+        # Load the model from the specified folder
+        model = load_fasttext_model(model_folder)
+        st.subheader("Generate Word Embedding")
+        word = st.text_input("Enter a word:")
+        if word:
+            embedding = generate_word_embedding(word, model)
+            if embedding is not None:
+                st.write(f"Embedding for '{word}':", embedding)
+            else:
+                st.write(f"'{word}' not in vocabulary")
+        st.subheader("Find Similar Words")
+        word_for_similar = st.text_input("Enter a word to find similar words:")
+        if word_for_similar:
+            similar_words = find_similar_words(word_for_similar, model)
+            if similar_words:
+                st.write("Similar words:")
+                for word, similarity in similar_words:
+                    st.write(f"{word}: {similarity}")
+            else:
+                st.write(f"No similar words found for '{word_for_similar}'")
+        st.subheader("Generate Embeddings for Words in a Sentence")
+        sentence = st.text_input("Enter a sentence:")
+        if sentence:
+            word_embeddings = generate_embeddings_for_sentence(sentence, model, r'\b\w+\b')
+            if word_embeddings:
+                for idx, embedding in enumerate(word_embeddings):
+                    st.write(f"Word {idx+1} embedding:", embedding)
+            else:
+                st.write("No embeddings could be generated for the words in the sentence.")
+        st.subheader("Generate Embedding for a Sentence")
+        sentence_for_embedding = st.text_input("Enter a sentence to generate its embedding:")
+        if sentence_for_embedding:
+            sentence_embedding = generate_sentence_embedding(sentence_for_embedding, model, r'\b\w+\b')
+            if sentence_embedding is not None:
+                st.write("Sentence embedding:", sentence_embedding)
+            else:
+                st.write("No embedding could be generated for the sentence.")
+        st.subheader("Find Most Similar Sentence Pairs")
+        uploaded_sentences_file = st.file_uploader("Upload a text file with sentences (one per line)", type=["txt"])
+        if uploaded_sentences_file:
+            sentences = uploaded_sentences_file.read().decode('utf-8').splitlines()
+            sentence_embeddings = generate_sentence_embeddings(sentences, model, r'\b\w+\b')
+            sentence_pairs = []
+            for i in range(len(sentences)):
+                for j in range(i + 1, len(sentences)):
+                    if sentence_embeddings[i] is not None and sentence_embeddings[j] is not None:
+                        similarity = cosine_similarity([sentence_embeddings[i]], [sentence_embeddings[j]])[0][0]
+                        sentence_pairs.append((sentences[i], sentences[j], similarity))
+            sentence_pairs = sorted(sentence_pairs, key=lambda x: x[2], reverse=True)
+            st.write("Most similar sentence pairs:")
+            for sent1, sent2, sim in sentence_pairs[:5]:
+                st.write(f"Sentence 1: {sent1}")
+                st.write(f"Sentence 2: {sent2}")
+                st.write(f"Similarity: {sim}")
+                st.write("-----")
+    # Sub-sidebar under "Generate Embeddings" option
+    if option == "Generate Embeddings":
+        st.sidebar.title("Embeddings Operations")
+        st.sidebar.subheader("Generate Word Embedding")
+        word_operation = st.sidebar.text_input("Enter a word for embedding:")
+        if word_operation:
+            word_embedding = generate_word_embedding(word_operation, model)
+            if word_embedding is not None:
+                st.sidebar.write(f"Embedding for '{word_operation}':", word_embedding)
+            else:
+                st.sidebar.write(f"'{word_operation}' not in vocabulary")
+        st.sidebar.subheader("Find Similar Words")
+        similar_word_operation = st.sidebar.text_input("Enter a word to find similar words:")
+        if similar_word_operation:
+            similar_words = find_similar_words(similar_word_operation, model)
+            if similar_words:
+                st.sidebar.write("Similar words:")
+                for word, similarity in similar_words:
+                    st.sidebar.write(f"{word}: {similarity}")
+            else:
+                st.sidebar.write(f"No similar words found for '{similar_word_operation}'")
+        st.sidebar.subheader("Generate Embeddings for Words in a Sentence")
+        sentence_operation = st.sidebar.text_input("Enter a sentence for word embeddings:")
+        if sentence_operation:
+            word_embeddings = generate_embeddings_for_sentence(sentence_operation, model, r'\b\w+\b')
+            if word_embeddings:
+                for idx, embedding in enumerate(word_embeddings):
+                    st.sidebar.write(f"Word {idx+1} embedding:", embedding)
+            else:
+                st.sidebar.write("No embeddings could be generated for the words in the sentence.")
+        st.sidebar.subheader("Generate Embedding for a Sentence")
+        sentence_embedding_operation = st.sidebar.text_input("Enter a sentence for its embedding:")
+        if sentence_embedding_operation:
+            sentence_emb = generate_sentence_embedding(sentence_embedding_operation, model, r'\b\w+\b')
+            if sentence_emb is not None:
+                st.sidebar.write("Sentence embedding:", sentence_emb)
+            else:
+                st.sidebar.write("No embedding could be generated for the sentence.")
+        st.sidebar.subheader("Find Most Similar Sentence Pairs")
+        uploaded_sentences_file_operation = st.sidebar.file_uploader("Upload a text file with sentences (one per line)", type=["txt"])
+        if uploaded_sentences_file_operation:
+            sentences_operation = uploaded_sentences_file_operation.read().decode('utf-8').splitlines()
+            sentence_embeddings_operation = generate_sentence_embeddings(sentences_operation, model, r'\b\w+\b')
+            sentence_pairs_operation = []
+            for i in range(len(sentences_operation)):
+                for j in range(i + 1, len(sentences_operation)):
+                    if sentence_embeddings_operation[i] is not None and sentence_embeddings_operation[j] is not None:
+                        similarity = cosine_similarity([sentence_embeddings_operation[i]], [sentence_embeddings_operation[j]])[0][0]
+                        sentence_pairs_operation.append((sentences_operation[i], sentences_operation[j], similarity))
+            sentence_pairs_operation = sorted(sentence_pairs_operation, key=lambda x: x[2], reverse=True)
+            st.sidebar.write("Most similar sentence pairs:")
+            for sent1, sent2, sim in sentence_pairs_operation[:5]:
+                st.sidebar.write(f"Sentence 1: {sent1}")
+                st.sidebar.write(f"Sentence 2: {sent2}")
+                st.sidebar.write(f"Similarity: {sim}")
+                st.sidebar.write("-----")
+if __name__ == "__main__":
+    main()