tatar2vec-demo

Sleeping

App Files Files Community

ArabovMK commited on Mar 18

Commit

deb7578

verified ·

1 Parent(s): f3c53e4

Update app.py

Browse files

Files changed (1) hide show

app.py +249 -95

app.py CHANGED Viewed

@@ -12,10 +12,13 @@ from plotly.subplots import make_subplots
 import os
 from typing import List, Dict, Tuple, Optional
 import gc
 # Import for model loading from Hugging Face Hub
-from huggingface_hub import snapshot_download
-from gensim.models import FastText, Word2Vec
 # Page configuration
 st.set_page_config(
@@ -61,61 +64,104 @@ class Tatar2VecExplorer:
         # Model names and their paths in Hugging Face repo
         self.available_models = {
             "Word2Vec": {
-                "best": "w2v_cbow_100",      # Best overall for analogies
-                "alternative": "w2v_sg_100"   # Skip-gram alternative
             },
             "FastText": {
-                "best": "ft_cbow_100",        # Best FastText
-                "alternative": "ft_cbow_200"  # Larger FastText
             }
         }
         # Model paths in the Hugging Face repository
         self.model_configs = {
             "w2v_cbow_100": {
                 "subdir": "word2vec/cbow100",
-                "file": "w2v_cbow_100.model",
                 "type": "word2vec",
                 "dim": 100,
-                "description": "Word2Vec CBOW 100-dim - Best for analogies",
                 "analogy_accuracy": 0.60,
-                "semantic_similarity": 0.568
             },
             "w2v_cbow_200": {
                 "subdir": "word2vec/cbow200",
-                "file": "w2v_cbow_200.model",
                 "type": "word2vec",
                 "dim": 200,
-                "description": "Word2Vec CBOW 200-dim - Higher dimensionality",
                 "analogy_accuracy": None,
-                "semantic_similarity": None
             },
             "w2v_sg_100": {
                 "subdir": "word2vec/sg100",
-                "file": "w2v_sg_100.model",
                 "type": "word2vec",
                 "dim": 100,
-                "description": "Word2Vec Skip-gram 100-dim - Better for rare words",
                 "analogy_accuracy": None,
-                "semantic_similarity": None
             },
             "ft_cbow_100": {
                 "subdir": "fasttext/cbow100",
-                "file": "ft_cbow_100.model",
                 "type": "fasttext",
                 "dim": 100,
-                "description": "FastText CBOW 100-dim - Handles morphology",
                 "analogy_accuracy": 0.0,
-                "semantic_similarity": 0.582
             },
             "ft_cbow_200": {
                 "subdir": "fasttext/cbow200",
-                "file": "ft_cbow_200.model",
                 "type": "fasttext",
                 "dim": 200,
                 "description": "FastText CBOW 200-dim - Larger FastText model",
                 "analogy_accuracy": 0.0,
-                "semantic_similarity": None
             }
         }
@@ -134,44 +180,101 @@ class Tatar2VecExplorer:
             progress_bar = st.progress(0)
             status_text = st.empty()
-            status_text.text(f"Downloading {model_key} from Hugging Face...")
-            progress_bar.progress(20)
-            # Download only the specific model files
-            model_dir = snapshot_download(
-                repo_id=repo_id,
-                allow_patterns=[f"{config['subdir']}/*"],
-                ignore_patterns=["*.git*", "README.md", "*.txt"],
-                local_files_only=False
-            )
-            progress_bar.progress(60)
-            status_text.text(f"Files downloaded, loading model...")
-            # Full path to the model file
-            model_path = os.path.join(model_dir, config['subdir'], config['file'])
-            # Check if model file exists
-            if not os.path.exists(model_path):
-                # Try to find any .model file in the directory
-                model_dir_path = os.path.join(model_dir, config['subdir'])
-                if os.path.exists(model_dir_path):
-                    model_files = [f for f in os.listdir(model_dir_path) if f.endswith('.model')]
-                    if model_files:
-                        model_path = os.path.join(model_dir_path, model_files[0])
-                        status_text.text(f"Found model file: {model_files[0]}")
             progress_bar.progress(80)
-            # Load the model
             try:
-                if config['type'] == "fasttext":
-                    model = FastText.load(model_path)
                 else:
-                    model = Word2Vec.load(model_path)
                 progress_bar.progress(100)
-                status_text.text(f"✅ Successfully loaded {model_key}!")
                 # Clean up progress indicators after 2 seconds
                 import time
@@ -182,7 +285,9 @@ class Tatar2VecExplorer:
                 return model
             except Exception as e:
-                st.error(f"Error loading model from {model_path}: {str(e)}")
                 return None
         except Exception as e:
@@ -194,7 +299,7 @@ class Tatar2VecExplorer:
         names = {
             "w2v_cbow_100": "🥇 Word2Vec CBOW (100-dim)",
             "w2v_cbow_200": "📈 Word2Vec CBOW (200-dim)",
-            "w2v_sg_100": "🎯 Word2Vec Skip-gram (100-dim)",
             "ft_cbow_100": "⚡ FastText CBOW (100-dim)",
             "ft_cbow_200": "🚀 FastText CBOW (200-dim)"
         }
@@ -204,13 +309,20 @@ class Tatar2VecExplorer:
         """Get model information"""
         return self.model_configs.get(model_key, {})
     def find_similar_words(self, model, word: str, topn: int = 10):
         """Find semantically similar words"""
         try:
             if hasattr(model, 'wv'):
                 return model.wv.most_similar(word, topn=topn)
-            else:
                 return model.most_similar(word, topn=topn)
         except KeyError:
             return []
         except Exception as e:
@@ -222,8 +334,10 @@ class Tatar2VecExplorer:
         try:
             if hasattr(model, 'wv'):
                 return model.wv.most_similar(positive=positive, negative=negative, topn=topn)
-            else:
                 return model.most_similar(positive=positive, negative=negative, topn=topn)
         except Exception as e:
             st.error(f"Error performing analogy: {e}")
             return []
@@ -233,8 +347,12 @@ class Tatar2VecExplorer:
         try:
             if hasattr(model, 'wv'):
                 return model.wv[word]
-            else:
                 return model[word]
         except KeyError:
             return None
@@ -247,6 +365,10 @@ class Tatar2VecExplorer:
                 in_vocab = False
                 if hasattr(model, 'wv'):
                     in_vocab = word in model.wv.key_to_index
                 similar = self.find_similar_words(model, word, 3) if in_vocab else []
                 results.append({
@@ -261,12 +383,6 @@ class Tatar2VecExplorer:
                     'similar_words': []
                 })
         return results
-    def unload_model(self, model_key: str):
-        """Unload model to free memory"""
-        if model_key in self.loaded_models:
-            del self.loaded_models[model_key]
-            gc.collect()
 def create_performance_comparison():
     """Create model performance comparison charts"""
@@ -292,7 +408,7 @@ def create_performance_comparison():
             x=['Word2Vec CBOW 100', 'FastText CBOW 100'],
             y=analogy_scores,
             marker_color=['#1f77b4', '#d62728'],
-            text=[f"{score*100:.1f}%" if score > 0 else "0%" for score in analogy_scores],
             textposition='auto',
         ),
         row=1, col=1
@@ -312,7 +428,7 @@ def create_performance_comparison():
     )
     fig.update_layout(
-        title_text="Model Performance Comparison",
         showlegend=False,
         height=400,
         width=800
@@ -341,14 +457,38 @@ def main():
             index=0
         )
-        # Model variant selection
-        model_variant = st.radio(
-            "Model Variant:",
-            ["best", "alternative"],
-            format_func=lambda x: "🥇 Best Model (CBOW 100)" if x == "best" else "🥈 Alternative Model"
-        )
-        model_key = explorer.available_models[model_type][model_variant]
         # Model information section
         st.markdown("---")
@@ -359,11 +499,14 @@ def main():
             st.markdown(f"**{explorer.get_model_display_name(model_key)}**")
             st.caption(model_info.get('description', ''))
             col1, col2 = st.columns(2)
             with col1:
                 if model_info.get('analogy_accuracy') is not None:
                     acc = model_info['analogy_accuracy']
-                    st.metric("Analogy Accuracy", f"{acc*100:.1f}%" if acc > 0 else "N/A")
             with col2:
                 if model_info.get('semantic_similarity') is not None:
@@ -371,6 +514,10 @@ def main():
                     st.metric("Semantic Similarity", f"{sim:.3f}" if sim else "N/A")
             st.metric("Vector Dimension", model_info.get('dim', 'N/A'))
         # Quick search examples
         st.markdown("---")
@@ -378,9 +525,8 @@ def main():
         quick_words = ["татар", "Казан", "тел", "мәктәп", "китап", "уку", "язу", "бәйрәм"]
         selected_quick = st.selectbox("Example words:", quick_words)
-        if st.button("Quick Similarity Search"):
             st.session_state.quick_search = selected_quick
-            st.session_state.active_tab = "Word Search"
     # Main content area with tabs
     tab1, tab2, tab3, tab4 = st.tabs(["🔍 Word Search", "🧠 Analogies", "📊 Analysis", "ℹ️ About"])
@@ -403,7 +549,7 @@ def main():
         with col2:
             top_n = st.slider("Number of similar words:", 5, 20, 10)
-        if st.button("Find Similar Words", type="primary") or (search_word and search_word != default_word):
             if search_word.strip():
                 with st.spinner(f"Finding words similar to '{search_word}'..."):
                     model = explorer.load_model(model_key)
@@ -469,6 +615,10 @@ def main():
     with tab2:
         st.header("Word Analogies")
         st.info("""
         **Example:** Париж - Франция + Татарстан = Казан?
         (Paris - France + Tatarstan = Kazan?)
@@ -562,20 +712,25 @@ def main():
                 "Model": explorer.get_model_display_name(key),
                 "Type": "Word2Vec" if "w2v" in key else "FastText",
                 "Dimensions": config['dim'],
-                "Analogy Accuracy": f"{config['analogy_accuracy']*100:.1f}%" if config['analogy_accuracy'] else "N/A",
-                "Semantic Similarity": f"{config['semantic_similarity']:.3f}" if config['semantic_similarity'] else "N/A"
             })
         df_specs = pd.DataFrame(specs_data)
         st.dataframe(df_specs, use_container_width=True)
-        # OOV words testing
         st.subheader("🔤 OOV (Out-of-Vocabulary) Testing")
-        st.info("""
-        **FastText models** can handle words not seen during training thanks to subword information.
-        Word2Vec models cannot generate vectors for OOV words.
-        """)
         oov_words = st.text_area(
             "Enter words for OOV testing (one per line):",
@@ -637,10 +792,8 @@ def main():
         ### 📁 Model Files Structure:
-        Each model consists of three files:
-        - `*.model` - Main model file
-        - `*.model.syn1neg.npy` - Weights file
-        - `*.model.wv.vectors.npy` - Word vectors file
         ### 📜 Certificate:
@@ -653,21 +806,22 @@ def main():
         ### 🚀 Usage Example:
         ```python
-        from huggingface_hub import snapshot_download
-        from gensim.models import Word2Vec
-        # Download the model
-        model_path = snapshot_download(
             repo_id="TatarNLPWorld/Tatar2Vec",
-            allow_patterns="word2vec/cbow100/*"
         )
-        # Load the model
-        model = Word2Vec.load("word2vec/cbow100/w2v_cbow_100.model")
-        # Find similar words
-        similar = model.wv.most_similar("татар")
-        print(similar)
         ```
         ### 📝 License:

 import os
 from typing import List, Dict, Tuple, Optional
 import gc
+import tempfile
+import shutil
 # Import for model loading from Hugging Face Hub
+from huggingface_hub import snapshot_download, hf_hub_download
+from gensim.models import FastText, Word2Vec, KeyedVectors
+import gensim
 # Page configuration
 st.set_page_config(
         # Model names and their paths in Hugging Face repo
         self.available_models = {
             "Word2Vec": {
+                "cbow_100": "w2v_cbow_100",      # CBOW 100-dim
+                "sg_100": "w2v_sg_100",          # Skip-gram 100-dim
+                "cbow_200": "w2v_cbow_200"       # CBOW 200-dim
             },
             "FastText": {
+                "cbow_100": "ft_cbow_100",        # FastText CBOW 100-dim
+                "cbow_200": "ft_cbow_200"         # FastText CBOW 200-dim
             }
         }
+        # Human-readable names for variants
+        self.variant_names = {
+            "cbow_100": "🥇 CBOW (100-dim) - Best for analogies",
+            "sg_100": "🎯 Skip-gram (100-dim) - Better for rare words",
+            "cbow_200": "📈 CBOW (200-dim) - Higher dimensionality"
+        }
         # Model paths in the Hugging Face repository
         self.model_configs = {
             "w2v_cbow_100": {
                 "subdir": "word2vec/cbow100",
+                "has_main_file": True,
+                "main_file": "w2v_cbow_100.model",
+                "files": [
+                    "w2v_cbow_100.model",
+                    "w2v_cbow_100.model.syn1neg.npy",
+                    "w2v_cbow_100.model.wv.vectors.npy"
+                ],
                 "type": "word2vec",
                 "dim": 100,
+                "description": "Word2Vec CBOW 100-dim - Best for analogies (60% accuracy)",
                 "analogy_accuracy": 0.60,
+                "semantic_similarity": 0.568,
+                "variant": "cbow_100"
             },
             "w2v_cbow_200": {
                 "subdir": "word2vec/cbow200",
+                "has_main_file": True,
+                "main_file": "w2v_cbow_200.model",
+                "files": [
+                    "w2v_cbow_200.model",
+                    "w2v_cbow_200.model.syn1neg.npy",
+                    "w2v_cbow_200.model.wv.vectors.npy"
+                ],
                 "type": "word2vec",
                 "dim": 200,
+                "description": "Word2Vec CBOW 200-dim - Higher dimensionality, more expressive",
                 "analogy_accuracy": None,
+                "semantic_similarity": None,
+                "variant": "cbow_200"
             },
             "w2v_sg_100": {
                 "subdir": "word2vec/sg100",
+                "has_main_file": False,  # No main .model file
+                "main_file": None,
+                "files": [
+                    "w2v_sg_100.model.syn1neg.npy",
+                    "w2v_sg_100.model.wv.vectors.npy"
+                ],
                 "type": "word2vec",
                 "dim": 100,
+                "description": "Word2Vec Skip-gram 100-dim - Better for rare words (only vectors available)",
                 "analogy_accuracy": None,
+                "semantic_similarity": None,
+                "variant": "sg_100",
+                "note": "Only word vectors available, full model with training weights not included"
             },
             "ft_cbow_100": {
                 "subdir": "fasttext/cbow100",
+                "has_main_file": True,
+                "main_file": "ft_cbow_100.model",
+                "files": [
+                    "ft_cbow_100.model",
+                    "ft_cbow_100.model.syn1neg.npy",
+                    "ft_cbow_100.model.wv.vectors.npy"
+                ],
                 "type": "fasttext",
                 "dim": 100,
+                "description": "FastText CBOW 100-dim - Handles morphology, good for OOV words",
                 "analogy_accuracy": 0.0,
+                "semantic_similarity": 0.582,
+                "variant": "cbow_100"
             },
             "ft_cbow_200": {
                 "subdir": "fasttext/cbow200",
+                "has_main_file": True,
+                "main_file": "ft_cbow_200.model",
+                "files": [
+                    "ft_cbow_200.model",
+                    "ft_cbow_200.model.syn1neg.npy",
+                    "ft_cbow_200.model.wv.vectors.npy"
+                ],
                 "type": "fasttext",
                 "dim": 200,
                 "description": "FastText CBOW 200-dim - Larger FastText model",
                 "analogy_accuracy": 0.0,
+                "semantic_similarity": None,
+                "variant": "cbow_200"
             }
         }
             progress_bar = st.progress(0)
             status_text = st.empty()
+            # Create a temporary directory for this model
+            temp_dir = tempfile.mkdtemp()
+            model_dir = os.path.join(temp_dir, config['subdir'])
+            os.makedirs(model_dir, exist_ok=True)
+            status_text.text(f"Downloading {_self.get_model_display_name(model_key)} from Hugging Face...")
+            progress_bar.progress(10)
+            # Download all required files for the model
+            total_files = len(config['files'])
+            for i, filename in enumerate(config['files']):
+                file_path = os.path.join(config['subdir'], filename)
+                status_text.text(f"Downloading {filename}... ({i+1}/{total_files})")
+                try:
+                    # Download the file
+                    downloaded_path = hf_hub_download(
+                        repo_id=repo_id,
+                        filename=file_path,
+                        repo_type="model",
+                        local_dir=temp_dir,
+                        local_dir_use_symlinks=False
+                    )
+                    # Update progress
+                    progress = 10 + (i + 1) * 60 // total_files
+                    progress_bar.progress(progress)
+                except Exception as e:
+                    st.warning(f"Note: {filename} may be downloaded differently: {e}")
+                    continue
             progress_bar.progress(80)
+            status_text.text("Files downloaded, loading model...")
+            # Load the model based on available files
             try:
+                if config['has_main_file'] and config['main_file']:
+                    # Full model with main file
+                    model_path = os.path.join(temp_dir, config['subdir'], config['main_file'])
+                    if os.path.exists(model_path):
+                        if config['type'] == "fasttext":
+                            model = FastText.load(model_path)
+                        else:
+                            model = Word2Vec.load(model_path)
+                    else:
+                        # Try to find any .model file
+                        model_files = [f for f in os.listdir(os.path.join(temp_dir, config['subdir']))
+                                     if f.endswith('.model')]
+                        if model_files:
+                            model_path = os.path.join(temp_dir, config['subdir'], model_files[0])
+                            if config['type'] == "fasttext":
+                                model = FastText.load(model_path)
+                            else:
+                                model = Word2Vec.load(model_path)
+                        else:
+                            # If no model file, try to load just the vectors
+                            status_text.text("Loading word vectors only...")
+                            vectors_file = None
+                            for file in config['files']:
+                                if 'vectors' in file:
+                                    vectors_file = os.path.join(temp_dir, config['subdir'], file)
+                                    break
+                            if vectors_file and os.path.exists(vectors_file):
+                                # Create a KeyedVectors instance
+                                model = KeyedVectors.load(vectors_file)
+                                # Add a dummy train method to maintain compatibility
+                                model.train = lambda *args, **kwargs: None
+                            else:
+                                raise Exception("No model or vectors file found")
                 else:
+                    # Model with only vectors (like sg100)
+                    status_text.text("Loading word vectors only (Skip-gram model)...")
+                    vectors_file = None
+                    for file in config['files']:
+                        if 'vectors' in file:
+                            vectors_file = os.path.join(temp_dir, config['subdir'], file)
+                            break
+                    if vectors_file and os.path.exists(vectors_file):
+                        # Create a KeyedVectors instance
+                        model = KeyedVectors.load(vectors_file)
+                        # Add a dummy train method to maintain compatibility
+                        model.train = lambda *args, **kwargs: None
+                        # Add warning about limited functionality
+                        st.info("⚠️ Skip-gram model loaded in vectors-only mode. Some training features are not available.")
+                    else:
+                        raise Exception("No vectors file found for Skip-gram model")
                 progress_bar.progress(100)
+                status_text.text(f"✅ Successfully loaded {_self.get_model_display_name(model_key)}!")
+                # Store temp dir to clean up later if needed
+                model._temp_dir = temp_dir
                 # Clean up progress indicators after 2 seconds
                 import time
                 return model
             except Exception as e:
+                st.error(f"Error loading model: {str(e)}")
+                # Clean up temp dir
+                shutil.rmtree(temp_dir, ignore_errors=True)
                 return None
         except Exception as e:
         names = {
             "w2v_cbow_100": "🥇 Word2Vec CBOW (100-dim)",
             "w2v_cbow_200": "📈 Word2Vec CBOW (200-dim)",
+            "w2v_sg_100": "🎯 Word2Vec Skip-gram (100-dim) [Vectors Only]",
             "ft_cbow_100": "⚡ FastText CBOW (100-dim)",
             "ft_cbow_200": "🚀 FastText CBOW (200-dim)"
         }
         """Get model information"""
         return self.model_configs.get(model_key, {})
+    def get_variant_name(self, variant_key: str) -> str:
+        """Get human-readable variant name"""
+        return self.variant_names.get(variant_key, variant_key)
     def find_similar_words(self, model, word: str, topn: int = 10):
         """Find semantically similar words"""
         try:
+            # Handle both Word2Vec/FastText models and KeyedVectors
             if hasattr(model, 'wv'):
                 return model.wv.most_similar(word, topn=topn)
+            elif hasattr(model, 'most_similar'):
                 return model.most_similar(word, topn=topn)
+            else:
+                return []
         except KeyError:
             return []
         except Exception as e:
         try:
             if hasattr(model, 'wv'):
                 return model.wv.most_similar(positive=positive, negative=negative, topn=topn)
+            elif hasattr(model, 'most_similar'):
                 return model.most_similar(positive=positive, negative=negative, topn=topn)
+            else:
+                return []
         except Exception as e:
             st.error(f"Error performing analogy: {e}")
             return []
         try:
             if hasattr(model, 'wv'):
                 return model.wv[word]
+            elif hasattr(model, 'get_vector'):
+                return model.get_vector(word)
+            elif hasattr(model, '__getitem__'):
                 return model[word]
+            else:
+                return None
         except KeyError:
             return None
                 in_vocab = False
                 if hasattr(model, 'wv'):
                     in_vocab = word in model.wv.key_to_index
+                elif hasattr(model, 'key_to_index'):
+                    in_vocab = word in model.key_to_index
+                elif hasattr(model, 'vocab'):
+                    in_vocab = word in model.vocab
                 similar = self.find_similar_words(model, word, 3) if in_vocab else []
                 results.append({
                     'similar_words': []
                 })
         return results
 def create_performance_comparison():
     """Create model performance comparison charts"""
             x=['Word2Vec CBOW 100', 'FastText CBOW 100'],
             y=analogy_scores,
             marker_color=['#1f77b4', '#d62728'],
+            text=[f"{score*100:.1f}%" if score and score > 0 else "0%" for score in analogy_scores],
             textposition='auto',
         ),
         row=1, col=1
     )
     fig.update_layout(
+        title_text="Model Performance Comparison (Best Models)",
         showlegend=False,
         height=400,
         width=800
             index=0
         )
+        st.markdown("---")
+        st.subheader("Model Variant:")
+        # Model variant selection based on type
+        if model_type == "Word2Vec":
+            # Three variants for Word2Vec
+            variant_options = ["cbow_100", "sg_100", "cbow_200"]
+            selected_variant = st.radio(
+                "Select Word2Vec variant:",
+                options=variant_options,
+                format_func=lambda x: explorer.get_variant_name(x),
+                index=0  # Default to CBOW 100
+            )
+            # Show note for Skip-gram
+            if selected_variant == "sg_100":
+                st.info("ℹ️ Skip-gram model is available in vectors-only mode")
+        else:  # FastText
+            # Two variants for FastText
+            variant_options = ["cbow_100", "cbow_200"]
+            selected_variant = st.radio(
+                "Select FastText variant:",
+                options=variant_options,
+                format_func=lambda x: "⚡ CBOW (100-dim)" if x == "cbow_100" else "🚀 CBOW (200-dim)",
+                index=0
+            )
+        # Get model key based on type and variant
+        model_key = explorer.available_models[model_type][selected_variant]
         # Model information section
         st.markdown("---")
             st.markdown(f"**{explorer.get_model_display_name(model_key)}**")
             st.caption(model_info.get('description', ''))
+            if 'note' in model_info:
+                st.caption(f"*Note: {model_info['note']}*")
             col1, col2 = st.columns(2)
             with col1:
                 if model_info.get('analogy_accuracy') is not None:
                     acc = model_info['analogy_accuracy']
+                    st.metric("Analogy Accuracy", f"{acc*100:.1f}%" if acc and acc > 0 else "N/A")
             with col2:
                 if model_info.get('semantic_similarity') is not None:
                     st.metric("Semantic Similarity", f"{sim:.3f}" if sim else "N/A")
             st.metric("Vector Dimension", model_info.get('dim', 'N/A'))
+            # Show file info
+            file_count = len(model_info.get('files', []))
+            st.caption(f"📁 {file_count} file(s) in model")
         # Quick search examples
         st.markdown("---")
         quick_words = ["татар", "Казан", "тел", "мәктәп", "китап", "уку", "язу", "бәйрәм"]
         selected_quick = st.selectbox("Example words:", quick_words)
+        if st.button("Quick Similarity Search", use_container_width=True):
             st.session_state.quick_search = selected_quick
     # Main content area with tabs
     tab1, tab2, tab3, tab4 = st.tabs(["🔍 Word Search", "🧠 Analogies", "📊 Analysis", "ℹ️ About"])
         with col2:
             top_n = st.slider("Number of similar words:", 5, 20, 10)
+        if st.button("Find Similar Words", type="primary", use_container_width=True):
             if search_word.strip():
                 with st.spinner(f"Finding words similar to '{search_word}'..."):
                     model = explorer.load_model(model_key)
     with tab2:
         st.header("Word Analogies")
+        # Check if model supports analogies (Skip-gram in vectors mode might have limitations)
+        if model_key == "w2v_sg_100":
+            st.warning("⚠️ Skip-gram model is in vectors-only mode. Analogies might not work perfectly.")
         st.info("""
         **Example:** Париж - Франция + Татарстан = Казан?
         (Paris - France + Tatarstan = Kazan?)
                 "Model": explorer.get_model_display_name(key),
                 "Type": "Word2Vec" if "w2v" in key else "FastText",
                 "Dimensions": config['dim'],
+                "Files": len(config['files']),
+                "Analogy Accuracy": f"{config['analogy_accuracy']*100:.1f}%" if config.get('analogy_accuracy') else "N/A",
+                "Semantic Similarity": f"{config['semantic_similarity']:.3f}" if config.get('semantic_similarity') else "N/A"
             })
         df_specs = pd.DataFrame(specs_data)
         st.dataframe(df_specs, use_container_width=True)
+        # OOV words testing (only for FastText)
         st.subheader("🔤 OOV (Out-of-Vocabulary) Testing")
+        if model_type == "FastText":
+            st.info("""
+            **FastText models** can handle words not seen during training thanks to subword information.
+            """)
+        else:
+            st.info("""
+            **Word2Vec models** cannot generate vectors for OOV words. Only words in vocabulary will show results.
+            """)
         oov_words = st.text_area(
             "Enter words for OOV testing (one per line):",
         ### 📁 Model Files Structure:
+        - **CBOW models**: 3 files (`.model`, `.syn1neg.npy`, `.wv.vectors.npy`)
+        - **Skip-gram model**: 2 files (`.syn1neg.npy`, `.wv.vectors.npy`) - vectors only
         ### 📜 Certificate:
         ### 🚀 Usage Example:
         ```python
+        from huggingface_hub import hf_hub_download
+        from gensim.models import Word2Vec, KeyedVectors
+        # For CBOW models with full model
+        model_path = hf_hub_download(
             repo_id="TatarNLPWorld/Tatar2Vec",
+            filename="word2vec/cbow100/w2v_cbow_100.model"
         )
+        model = Word2Vec.load(model_path)
+        # For Skip-gram with vectors only
+        vectors_path = hf_hub_download(
+            repo_id="TatarNLPWorld/Tatar2Vec",
+            filename="word2vec/sg100/w2v_sg_100.model.wv.vectors.npy"
+        )
+        vectors = KeyedVectors.load(vectors_path)
         ```
         ### 📝 License: