Spaces:

heerjtdev
/

LSTM-CRF_Train

Sleeping

App Files Files Community

aagamjtdev commited on Oct 28, 2025

Commit

9b410c3

1 Parent(s): 005126e

Gradio App

Browse files

Files changed (1) hide show

app.py +743 -0

app.py ADDED Viewed

	@@ -0,0 +1,743 @@

+# import os
+# import shutil
+# import tempfile
+# import gradio as gr
+# from huggingface_hub import hf_hub_download, upload_file, HfApi
+# import subprocess
+# import sys
+#
+# # Configuration
+# OUTPUT_DIR = "output2_data"
+# MODEL_FILE = "model_enhanced.pt"
+# VOCAB_FILE = "vocabs_enhanced.pkl"
+# REPO_ID = os.environ.get("SPACE_ID", "heerjtdev/LSTM_CRF")  # Replace with your repo ID
+# HF_TOKEN = os.environ.get("HF_TOKEN")  # Set this as a secret in your Space settings
+#
+#
+# def download_existing_models():
+#     """Download existing model files from the Hugging Face Hub if available."""
+#     try:
+#         api = HfApi()
+#         files = api.list_repo_files(REPO_ID, token=HF_TOKEN)
+#
+#         os.makedirs(OUTPUT_DIR, exist_ok=True)
+#
+#         downloaded_files = []
+#         if MODEL_FILE in files:
+#             model_path = hf_hub_download(
+#                 repo_id=REPO_ID,
+#                 filename=MODEL_FILE,
+#                 token=HF_TOKEN,
+#                 local_dir=OUTPUT_DIR,
+#                 local_dir_use_symlinks=False
+#             )
+#             downloaded_files.append(MODEL_FILE)
+#
+#         if VOCAB_FILE in files:
+#             vocab_path = hf_hub_download(
+#                 repo_id=REPO_ID,
+#                 filename=VOCAB_FILE,
+#                 token=HF_TOKEN,
+#                 local_dir=OUTPUT_DIR,
+#                 local_dir_use_symlinks=False
+#             )
+#             downloaded_files.append(VOCAB_FILE)
+#
+#         if downloaded_files:
+#             return f"✅ Downloaded existing files: {', '.join(downloaded_files)}"
+#         else:
+#             return "ℹ️ No existing model files found in repository."
+#     except Exception as e:
+#         return f"⚠️ Could not download existing models: {str(e)}"
+#
+#
+# def train_model(dataset_file, progress=gr.Progress()):
+#     """Train the model with the uploaded dataset."""
+#     if dataset_file is None:
+#         return "❌ Please upload a dataset file!", None, None
+#
+#     try:
+#         # Step 1: Download existing models (if any)
+#         progress(0.1, desc="Checking for existing models...")
+#         download_status = download_existing_models()
+#         yield f"📥 {download_status}\n", None, None
+#
+#         # Step 2: Save uploaded file
+#         progress(0.2, desc="Processing dataset...")
+#         dataset_path = dataset_file.name
+#         yield f"📥 {download_status}\n📂 Dataset uploaded: {os.path.basename(dataset_path)}\n", None, None
+#
+#         # Step 3: Import and run training
+#         progress(0.3, desc="Starting training...")
+#         yield f"📥 {download_status}\n📂 Dataset uploaded: {os.path.basename(dataset_path)}\n🚀 Training started...\n", None, None
+#
+#         # Import the training function
+#         try:
+#             # Import your training script (assumes it's named train_model.py)
+#             import train_model as tm
+#
+#             # Run training
+#             progress(0.4, desc="Training in progress...")
+#             tm.train_from_json(dataset_path)
+#
+#             yield f"📥 {download_status}\n📂 Dataset uploaded: {os.path.basename(dataset_path)}\n✅ Training completed!\n", None, None
+#
+#         except ImportError:
+#             # If direct import fails, try running as subprocess
+#             progress(0.4, desc="Training in progress...")
+#             result = subprocess.run(
+#                 [sys.executable, "train_model.py", dataset_path],
+#                 capture_output=True,
+#                 text=True
+#             )
+#
+#             if result.returncode != 0:
+#                 yield f"❌ Training failed:\n{result.stderr}", None, None
+#                 return
+#
+#             yield f"📥 {download_status}\n📂 Dataset uploaded: {os.path.basename(dataset_path)}\n✅ Training completed!\n", None, None
+#
+#         # Step 4: Upload trained models to Hub
+#         progress(0.8, desc="Uploading models to Hub...")
+#         model_path = os.path.join(OUTPUT_DIR, MODEL_FILE)
+#         vocab_path = os.path.join(OUTPUT_DIR, VOCAB_FILE)
+#
+#         upload_status = []
+#         if os.path.exists(model_path):
+#             upload_file(
+#                 path_or_fileobj=model_path,
+#                 path_in_repo=MODEL_FILE,
+#                 repo_id=REPO_ID,
+#                 token=HF_TOKEN
+#             )
+#             upload_status.append(MODEL_FILE)
+#
+#         if os.path.exists(vocab_path):
+#             upload_file(
+#                 path_or_fileobj=vocab_path,
+#                 path_in_repo=VOCAB_FILE,
+#                 repo_id=REPO_ID,
+#                 token=HF_TOKEN
+#             )
+#             upload_status.append(VOCAB_FILE)
+#
+#         # Step 5: Copy to temp directory for download
+#         progress(0.9, desc="Preparing downloads...")
+#         temp_dir = tempfile.mkdtemp()
+#
+#         model_download = None
+#         vocab_download = None
+#
+#         if os.path.exists(model_path):
+#             temp_model = os.path.join(temp_dir, MODEL_FILE)
+#             shutil.copy2(model_path, temp_model)
+#             model_download = temp_model
+#
+#         if os.path.exists(vocab_path):
+#             temp_vocab = os.path.join(temp_dir, VOCAB_FILE)
+#             shutil.copy2(vocab_path, temp_vocab)
+#             vocab_download = temp_vocab
+#
+#         progress(1.0, desc="Complete!")
+#
+#         final_message = (
+#             f"📥 {download_status}\n"
+#             f"📂 Dataset uploaded: {os.path.basename(dataset_path)}\n"
+#             f"✅ Training completed!\n"
+#             f"☁️ Uploaded to Hub: {', '.join(upload_status)}\n"
+#             f"📦 Files ready for download!"
+#         )
+#
+#         yield final_message, model_download, vocab_download
+#
+#     except Exception as e:
+#         yield f"❌ Error during training: {str(e)}", None, None
+#
+#
+# def download_models_from_hub():
+#     """Download the latest models from the Hugging Face Hub."""
+#     try:
+#         os.makedirs(OUTPUT_DIR, exist_ok=True)
+#
+#         # Download model
+#         model_path = hf_hub_download(
+#             repo_id=REPO_ID,
+#             filename=MODEL_FILE,
+#             token=HF_TOKEN,
+#             local_dir=OUTPUT_DIR,
+#             local_dir_use_symlinks=False,
+#             force_download=True
+#         )
+#
+#         # Download vocab
+#         vocab_path = hf_hub_download(
+#             repo_id=REPO_ID,
+#             filename=VOCAB_FILE,
+#             token=HF_TOKEN,
+#             local_dir=OUTPUT_DIR,
+#             local_dir_use_symlinks=False,
+#             force_download=True
+#         )
+#
+#         # Copy to temp for download
+#         temp_dir = tempfile.mkdtemp()
+#         temp_model = os.path.join(temp_dir, MODEL_FILE)
+#         temp_vocab = os.path.join(temp_dir, VOCAB_FILE)
+#
+#         shutil.copy2(model_path, temp_model)
+#         shutil.copy2(vocab_path, temp_vocab)
+#
+#         return (
+#             "✅ Successfully downloaded models from Hugging Face Hub!",
+#             temp_model,
+#             temp_vocab
+#         )
+#     except Exception as e:
+#         return f"❌ Error downloading models: {str(e)}", None, None
+#
+#
+# # Create Gradio interface
+# with gr.Blocks(title="MCQ Structure Extraction - Model Training", theme=gr.themes.Soft()) as demo:
+#     gr.Markdown(
+#         """
+#         # 🎓 MCQ Structure Extraction - Model Training
+#
+#         Train a BiLSTM-CRF model with deep layout understanding for extracting structured information from MCQ documents.
+#
+#         ## 📋 Instructions:
+#         1. **Upload Dataset**: Provide your unified JSON file containing tokens, bounding boxes, and labels
+#         2. **Train Model**: Click "Start Training" and wait for completion (this may take a while)
+#         3. **Download Models**: Once training is complete, download the trained model and vocabulary files
+#
+#         ## 📥 Or Download Existing Models:
+#         If you just want to download the latest trained models from the repository, use the "Download from Hub" button.
+#         """
+#     )
+#
+#     with gr.Tab("Train New Model"):
+#         with gr.Row():
+#             with gr.Column():
+#                 dataset_input = gr.File(
+#                     label="Upload Training Dataset (JSON)",
+#                     file_types=[".json"],
+#                     type="filepath"
+#                 )
+#                 train_button = gr.Button("🚀 Start Training", variant="primary", size="lg")
+#
+#             with gr.Column():
+#                 status_output = gr.Textbox(
+#                     label="Training Status",
+#                     lines=8,
+#                     interactive=False
+#                 )
+#
+#         with gr.Row():
+#             model_output = gr.File(label="📥 Download Trained Model (.pt)")
+#             vocab_output = gr.File(label="📥 Download Vocabulary (.pkl)")
+#
+#         train_button.click(
+#             fn=train_model,
+#             inputs=[dataset_input],
+#             outputs=[status_output, model_output, vocab_output]
+#         )
+#
+#     with gr.Tab("Download from Hub"):
+#         gr.Markdown(
+#             """
+#             Download the latest trained models directly from the Hugging Face Hub.
+#             This is useful if you want to use pre-trained models without training from scratch.
+#             """
+#         )
+#
+#         download_button = gr.Button("☁️ Download from Hugging Face Hub", variant="primary", size="lg")
+#
+#         download_status = gr.Textbox(
+#             label="Download Status",
+#             lines=3,
+#             interactive=False
+#         )
+#
+#         with gr.Row():
+#             hub_model_output = gr.File(label="📥 Model File (.pt)")
+#             hub_vocab_output = gr.File(label="📥 Vocabulary File (.pkl)")
+#
+#         download_button.click(
+#             fn=download_models_from_hub,
+#             outputs=[download_status, hub_model_output, hub_vocab_output]
+#         )
+#
+#     gr.Markdown(
+#         """
+#         ---
+#         ### ⚙️ Model Configuration:
+#         - **Architecture**: BiLSTM-CRF with spatial attention
+#         - **Features**: Word embeddings, character CNN, bounding box encoding, spatial & context features
+#         - **Output**: 13 entity labels (Questions, Options, Answers, Images, Section Headings, Passages)
+#
+#         ### 📊 Training Details:
+#         - Batch Size: 8
+#         - Epochs: 10 (with early stopping)
+#         - Learning Rate: 5e-4 (with OneCycleLR scheduler)
+#         - Optimizer: AdamW with weight decay
+#
+#         **Note**: Training requires a GPU for reasonable speed. CPU training is supported but will be significantly slower.
+#         """
+#     )
+#
+# # Launch the app
+# if __name__ == "__main__":
+#     demo.launch()
+import os
+import shutil
+import tempfile
+import gradio as gr
+from huggingface_hub import hf_hub_download, upload_file, HfApi
+import sys
+# Add current directory to path to import train_model
+sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
+# Configuration
+OUTPUT_DIR = "output_data"
+MODEL_FILE = "model_enhanced.pt"
+VOCAB_FILE = "vocabs_enhanced.pkl"
+CHECKPOINT_FILE = "checkpoint_enhanced.pt"
+# IMPORTANT: Update this with your actual Hugging Face repository ID
+REPO_ID = os.environ.get("SPACE_ID", "heerjtdev/LSTM_CRF")  # Replace with your repo ID
+HF_TOKEN = os.environ.get("HF_TOKEN")  # Set this as a secret in your Space settings
+def download_existing_models():
+    """Download existing model files from the Hugging Face Hub if available."""
+    try:
+        api = HfApi()
+        files = api.list_repo_files(REPO_ID, token=HF_TOKEN)
+        os.makedirs(OUTPUT_DIR, exist_ok=True)
+        downloaded_files = []
+        # Download model file
+        if MODEL_FILE in files:
+            print(f"📥 Downloading {MODEL_FILE} from Hub...")
+            model_path = hf_hub_download(
+                repo_id=REPO_ID,
+                filename=MODEL_FILE,
+                token=HF_TOKEN,
+                local_dir=OUTPUT_DIR,
+                force_download=True  # Always get latest version
+            )
+            downloaded_files.append(MODEL_FILE)
+            print(f"✅ Downloaded {MODEL_FILE}")
+        # Download vocab file
+        if VOCAB_FILE in files:
+            print(f"📥 Downloading {VOCAB_FILE} from Hub...")
+            vocab_path = hf_hub_download(
+                repo_id=REPO_ID,
+                filename=VOCAB_FILE,
+                token=HF_TOKEN,
+                local_dir=OUTPUT_DIR,
+                force_download=True  # Always get latest version
+            )
+            downloaded_files.append(VOCAB_FILE)
+            print(f"✅ Downloaded {VOCAB_FILE}")
+        # Download checkpoint file (optional, for resuming training)
+        if CHECKPOINT_FILE in files:
+            print(f"📥 Downloading {CHECKPOINT_FILE} from Hub...")
+            checkpoint_path = hf_hub_download(
+                repo_id=REPO_ID,
+                filename=CHECKPOINT_FILE,
+                token=HF_TOKEN,
+                local_dir=OUTPUT_DIR,
+                force_download=True
+            )
+            downloaded_files.append(CHECKPOINT_FILE)
+            print(f"✅ Downloaded {CHECKPOINT_FILE}")
+        if downloaded_files:
+            return f"✅ Downloaded from Hub: {', '.join(downloaded_files)}"
+        else:
+            return "ℹ️ No existing model files found in repository. Starting fresh."
+    except Exception as e:
+        error_msg = f"⚠️ Could not download existing models: {str(e)}"
+        print(error_msg)
+        return error_msg
+def train_model(dataset_file, progress=gr.Progress()):
+    """Train the model with the uploaded dataset."""
+    if dataset_file is None:
+        return "❌ Please upload a dataset file!", None, None
+    try:
+        # Step 1: Download existing models from Hub (if any) BEFORE training starts
+        progress(0.05, desc="Checking Hugging Face Hub for existing models...")
+        download_status = download_existing_models()
+        status_log = f"{download_status}\n\n"
+        yield status_log, None, None
+        # Step 2: Save uploaded file
+        progress(0.1, desc="Processing uploaded dataset...")
+        dataset_path = dataset_file.name
+        status_log += f"📂 Dataset uploaded: {os.path.basename(dataset_path)}\n\n"
+        yield status_log, None, None
+        # Step 3: Import and run training
+        progress(0.15, desc="Initializing training...")
+        status_log += "🚀 Starting training...\n"
+        status_log += "📊 This may take a while. Training progress will appear in the terminal.\n\n"
+        yield status_log, None, None
+        # Import the training module
+        try:
+            import train_model as tm
+            print("=" * 80)
+            print("TRAINING STARTED")
+            print("=" * 80)
+            # Run training - this will handle model loading internally
+            progress(0.2, desc="Training in progress... (check terminal for details)")
+            tm.train_from_json(dataset_path)
+            print("=" * 80)
+            print("TRAINING COMPLETED")
+            print("=" * 80)
+            status_log += "✅ Training completed successfully!\n\n"
+            yield status_log, None, None
+        except ImportError as ie:
+            error_msg = f"❌ Failed to import training module: {str(ie)}\n"
+            error_msg += "Make sure train_model.py is in the same directory as app.py"
+            yield status_log + error_msg, None, None
+            return
+        except Exception as train_error:
+            error_msg = f"❌ Training failed with error:\n{str(train_error)}\n"
+            yield status_log + error_msg, None, None
+            return
+        # Step 4: Verify files exist
+        progress(0.85, desc="Verifying trained model files...")
+        model_path = os.path.join(OUTPUT_DIR, MODEL_FILE)
+        vocab_path = os.path.join(OUTPUT_DIR, VOCAB_FILE)
+        checkpoint_path = os.path.join(OUTPUT_DIR, CHECKPOINT_FILE)
+        files_exist = []
+        if os.path.exists(model_path):
+            files_exist.append(MODEL_FILE)
+        if os.path.exists(vocab_path):
+            files_exist.append(VOCAB_FILE)
+        if not files_exist:
+            error_msg = "❌ Error: Model files were not created. Check training logs."
+            yield status_log + error_msg, None, None
+            return
+        status_log += f"✅ Found trained files: {', '.join(files_exist)}\n\n"
+        yield status_log, None, None
+        # Step 5: Upload to Hub
+        progress(0.9, desc="Uploading models to Hugging Face Hub...")
+        status_log += "☁️ Uploading to Hugging Face Hub...\n"
+        yield status_log, None, None
+        upload_status = []
+        if os.path.exists(model_path):
+            try:
+                upload_file(
+                    path_or_fileobj=model_path,
+                    path_in_repo=MODEL_FILE,
+                    repo_id=REPO_ID,
+                    token=HF_TOKEN,
+                    commit_message="Update trained model"
+                )
+                upload_status.append(MODEL_FILE)
+                print(f"✅ Uploaded {MODEL_FILE} to Hub")
+            except Exception as e:
+                print(f"⚠️ Failed to upload {MODEL_FILE}: {e}")
+        if os.path.exists(vocab_path):
+            try:
+                upload_file(
+                    path_or_fileobj=vocab_path,
+                    path_in_repo=VOCAB_FILE,
+                    repo_id=REPO_ID,
+                    token=HF_TOKEN,
+                    commit_message="Update vocabulary"
+                )
+                upload_status.append(VOCAB_FILE)
+                print(f"✅ Uploaded {VOCAB_FILE} to Hub")
+            except Exception as e:
+                print(f"⚠️ Failed to upload {VOCAB_FILE}: {e}")
+        # Also upload checkpoint for future resume capability
+        if os.path.exists(checkpoint_path):
+            try:
+                upload_file(
+                    path_or_fileobj=checkpoint_path,
+                    path_in_repo=CHECKPOINT_FILE,
+                    repo_id=REPO_ID,
+                    token=HF_TOKEN,
+                    commit_message="Update checkpoint"
+                )
+                upload_status.append(CHECKPOINT_FILE)
+                print(f"✅ Uploaded {CHECKPOINT_FILE} to Hub")
+            except Exception as e:
+                print(f"⚠️ Failed to upload {CHECKPOINT_FILE}: {e}")
+        if upload_status:
+            status_log += f"✅ Uploaded to Hub: {', '.join(upload_status)}\n\n"
+        else:
+            status_log += "⚠️ Warning: No files were uploaded to Hub\n\n"
+        yield status_log, None, None
+        # Step 6: Copy to temp directory for download
+        progress(0.95, desc="Preparing download files...")
+        temp_dir = tempfile.mkdtemp()
+        model_download = None
+        vocab_download = None
+        if os.path.exists(model_path):
+            temp_model = os.path.join(temp_dir, MODEL_FILE)
+            shutil.copy2(model_path, temp_model)
+            model_download = temp_model
+            print(f"📦 Prepared {MODEL_FILE} for download")
+        if os.path.exists(vocab_path):
+            temp_vocab = os.path.join(temp_dir, VOCAB_FILE)
+            shutil.copy2(vocab_path, temp_vocab)
+            vocab_download = temp_vocab
+            print(f"📦 Prepared {VOCAB_FILE} for download")
+        progress(1.0, desc="Complete!")
+        status_log += "📦 Files ready for download below!\n"
+        status_log += "\n" + "=" * 50 + "\n"
+        status_log += "TRAINING COMPLETE - You can now download the model files\n"
+        status_log += "=" * 50
+        yield status_log, model_download, vocab_download
+    except Exception as e:
+        error_msg = f"❌ Unexpected error: {str(e)}\n"
+        import traceback
+        error_msg += f"\nTraceback:\n{traceback.format_exc()}"
+        yield error_msg, None, None
+def download_models_from_hub():
+    """Download the latest models from the Hugging Face Hub."""
+    try:
+        os.makedirs(OUTPUT_DIR, exist_ok=True)
+        api = HfApi()
+        files = api.list_repo_files(REPO_ID, token=HF_TOKEN)
+        downloaded_files = []
+        # Download model
+        if MODEL_FILE in files:
+            print(f"📥 Downloading {MODEL_FILE} from Hub...")
+            model_path = hf_hub_download(
+                repo_id=REPO_ID,
+                filename=MODEL_FILE,
+                token=HF_TOKEN,
+                local_dir=OUTPUT_DIR,
+                force_download=True
+            )
+            downloaded_files.append(MODEL_FILE)
+        else:
+            return f"❌ {MODEL_FILE} not found in repository", None, None
+        # Download vocab
+        if VOCAB_FILE in files:
+            print(f"📥 Downloading {VOCAB_FILE} from Hub...")
+            vocab_path = hf_hub_download(
+                repo_id=REPO_ID,
+                filename=VOCAB_FILE,
+                token=HF_TOKEN,
+                local_dir=OUTPUT_DIR,
+                force_download=True
+            )
+            downloaded_files.append(VOCAB_FILE)
+        else:
+            return f"❌ {VOCAB_FILE} not found in repository", None, None
+        # Copy to temp for download
+        temp_dir = tempfile.mkdtemp()
+        temp_model = os.path.join(temp_dir, MODEL_FILE)
+        temp_vocab = os.path.join(temp_dir, VOCAB_FILE)
+        shutil.copy2(os.path.join(OUTPUT_DIR, MODEL_FILE), temp_model)
+        shutil.copy2(os.path.join(OUTPUT_DIR, VOCAB_FILE), temp_vocab)
+        success_msg = f"✅ Successfully downloaded from Hub:\n"
+        success_msg += f"   • {MODEL_FILE}\n"
+        success_msg += f"   • {VOCAB_FILE}\n\n"
+        success_msg += "📦 Files are ready to download below!"
+        return success_msg, temp_model, temp_vocab
+    except Exception as e:
+        error_msg = f"❌ Error downloading models: {str(e)}\n\n"
+        error_msg += f"Make sure:\n"
+        error_msg += f"1. REPO_ID is set correctly: {REPO_ID}\n"
+        error_msg += f"2. HF_TOKEN is set in Space secrets\n"
+        error_msg += f"3. Model files exist in the repository"
+        return error_msg, None, None
+# Create Gradio interface
+with gr.Blocks(title="MCQ Structure Extraction - Model Training", theme=gr.themes.Soft()) as demo:
+    gr.Markdown(
+        """
+        # 🎓 MCQ Structure Extraction - Model Training
+        Train a BiLSTM-CRF model with deep layout understanding for extracting structured information from MCQ documents.
+        ## 📋 Instructions:
+        1. **Upload Dataset**: Provide your unified JSON file containing tokens, bounding boxes, and labels
+        2. **Train Model**: Click "Start Training" and wait for completion (this may take a while)
+        3. **Download Models**: Once training is complete, download the trained model and vocabulary files
+        ## 📥 Or Download Existing Models:
+        If you just want to download the latest trained models from the repository, use the "Download from Hub" tab.
+        ---
+        """
+    )
+    with gr.Tab("🚀 Train New Model"):
+        gr.Markdown(
+            """
+            ### Training Process:
+            The app will automatically:
+            1. ✅ Download any existing models from Hugging Face Hub (for resuming training)
+            2. 🎯 Train the model on your uploaded dataset
+            3. ☁️ Upload the trained models back to the Hub
+            4. 📥 Provide download links for the trained files
+            **Note**: Training progress details appear in the terminal/logs. The status box shows major milestones.
+            """
+        )
+        with gr.Row():
+            with gr.Column():
+                dataset_input = gr.File(
+                    label="📂 Upload Training Dataset (JSON)",
+                    file_types=[".json"],
+                    type="filepath"
+                )
+                train_button = gr.Button("🚀 Start Training", variant="primary", size="lg")
+            with gr.Column():
+                status_output = gr.Textbox(
+                    label="📊 Training Status",
+                    lines=12,
+                    interactive=False,
+                    show_copy_button=True
+                )
+        gr.Markdown("### 📦 Download Trained Models")
+        with gr.Row():
+            model_output = gr.File(label="💾 Model File (.pt)")
+            vocab_output = gr.File(label="📚 Vocabulary File (.pkl)")
+        train_button.click(
+            fn=train_model,
+            inputs=[dataset_input],
+            outputs=[status_output, model_output, vocab_output]
+        )
+    with gr.Tab("☁️ Download from Hub"):
+        gr.Markdown(
+            """
+            ### Download Pre-trained Models
+            Download the latest trained models directly from your Hugging Face repository.
+            This is useful if:
+            - You want to use pre-trained models without training
+            - You need to download models trained in a previous session
+            - You want to get the latest version from the Hub
+            The downloaded files can be used for inference with your MCQ extraction pipeline.
+            """
+        )
+        download_button = gr.Button("☁️ Download Latest Models from Hub", variant="primary", size="lg")
+        download_status = gr.Textbox(
+            label="Download Status",
+            lines=6,
+            interactive=False,
+            show_copy_button=True
+        )
+        gr.Markdown("### 📦 Downloaded Files")
+        with gr.Row():
+            hub_model_output = gr.File(label="💾 Model File (.pt)")
+            hub_vocab_output = gr.File(label="📚 Vocabulary File (.pkl)")
+        download_button.click(
+            fn=download_models_from_hub,
+            outputs=[download_status, hub_model_output, hub_vocab_output]
+        )
+    gr.Markdown(
+        """
+        ---
+        ### ⚙️ Model Configuration:
+        **Architecture:**
+        - BiLSTM-CRF with spatial attention mechanism
+        - Word embeddings + Character-level CNN
+        - Bounding box encoding with MLP
+        - Spatial & context feature extraction
+        - Learnable positional embeddings
+        **Features Used:**
+        - Token text (word-level and character-level)
+        - Bounding box coordinates (normalized)
+        - Spatial features: vertical spacing, alignment, dimensions (11 features)
+        - Context features: surrounding question/option markers (8 features)
+        **Output Labels (13 total):**
+        - Questions, Options, Answers, Images, Section Headings, Passages (BIO tagging)
+        **Training Parameters:**
+        - Batch Size: 8
+        - Epochs: 10 (with early stopping after 10 epochs without improvement)
+        - Learning Rate: 5e-4 (AdamW optimizer with OneCycleLR scheduler)
+        - Hidden Size: 768
+        - Total Parameters: ~15.6M
+        **Hardware Requirements:**
+        - GPU recommended for reasonable training speed
+        - CPU training supported but significantly slower
+        ---
+        ### 🔧 Setup Notes:
+        **Environment Variables Required:**
+        - `SPACE_ID`: Your Hugging Face Space/Repo ID (auto-set in Spaces)
+        - `HF_TOKEN`: Your Hugging Face write token (set as a secret)
+        **Model Persistence:**
+        - Models are automatically saved to `output_data/` directory
+        - Best model is uploaded to Hugging Face Hub after each improvement
+        - Training can be resumed from checkpoints
+        """
+    )
+# Launch the app
+if __name__ == "__main__":
+    demo.launch()