Architech

Runtime error

App Files Files Community

Or4cl3-2 commited on Jan 26

Commit

ef57b75

verified ·

1 Parent(s): dffb711

Update app.py

Browse files

Files changed (1) hide show

app.py +437 -7

app.py CHANGED Viewed

@@ -419,7 +419,13 @@ class ArchitechAgent:
                 synthetic_data = json.load(f)
             texts = [item["text"] for item in synthetic_data]
         else:
-            texts = [t.strip() for t in training_data.split("\n\n") if t.strip()]
         if not texts:
             raise ModelTrainingError("No training data available!")
@@ -797,7 +803,263 @@ def add_model_management_tab():
         """)
 # This function should be called in create_gradio_interface()
-# Add it right before the "About" tab# # ==================== GRADIO INTERFACE ====================
 def create_gradio_interface():
     agent = ArchitechAgent()
@@ -840,7 +1102,58 @@ def create_gradio_interface():
                         task_desc_train = gr.Textbox(label="Task Description", lines=2)
                         model_name = gr.Textbox(label="Model Name", placeholder="my-awesome-model")
                         hf_token = gr.Textbox(label="HuggingFace Token", type="password")
-                        use_synthetic = gr.Checkbox(label="Use Synthetic Data", value=True)
                         with gr.Accordion("⚙️ Advanced", open=False):
                             base_model = gr.Dropdown(
@@ -855,11 +1168,37 @@ def create_gradio_interface():
                     with gr.Column():
                         train_output = gr.Markdown()
                 train_btn.click(
-                    fn=agent.train_custom_model,
-                    inputs=[task_desc_train, gr.State(""), model_name, hf_token,
-                           base_model, use_synthetic, gr.State("general"),
-                           gr.State(100), learning_rate, num_epochs, batch_size],
                     outputs=train_output
                 )
@@ -897,6 +1236,97 @@ def create_gradio_interface():
                     outputs=test_output
                 )
             # Model Management Tab
             with gr.Tab("💾 Model Management"):
                 gr.Markdown("""

                 synthetic_data = json.load(f)
             texts = [item["text"] for item in synthetic_data]
         else:
+            # Check if training_data is a file path or raw text
+            if training_data.strip().endswith('.json') and os.path.exists(training_data.strip()):
+                # Load from file
+                texts = dataset_manager.load_dataset_for_training(training_data.strip())
+            else:
+                # Parse as raw text
+                texts = [t.strip() for t in training_data.split("\n\n") if t.strip()]
         if not texts:
             raise ModelTrainingError("No training data available!")
         """)
 # This function should be called in create_gradio_interface()
+# Add it right before the "About" tab# ==================== DATASET MANAGER ====================
+class DatasetManager:
+    def __init__(self):
+        self.datasets_dir = Path("./synthetic_datasets")
+        self.datasets_dir.mkdir(exist_ok=True)
+    def list_available_datasets(self) -> List[Tuple[str, str]]:
+        """List all available synthetic datasets"""
+        datasets = []
+        if self.datasets_dir.exists():
+            for file in self.datasets_dir.glob("*.json"):
+                datasets.append((file.name, str(file)))
+        return datasets
+    def get_dataset_preview(self, dataset_path: str) -> str:
+        """Get preview of dataset contents"""
+        try:
+            with open(dataset_path, 'r') as f:
+                data = json.load(f)
+            if not data:
+                return "Dataset is empty"
+            preview = f"**Dataset:** `{Path(dataset_path).name}`\n\n"
+            preview += f"**Total Examples:** {len(data)}\n\n"
+            preview += "**First 3 Examples:**\n\n"
+            for i, example in enumerate(data[:3], 1):
+                preview += f"**Example {i}:**\n```\n{example.get('text', 'No text field')}\n```\n\n"
+            return preview
+        except Exception as e:
+            return f"Error loading dataset: {str(e)}"
+    def load_dataset_for_training(self, dataset_path: str) -> List[str]:
+        """Load dataset texts for training"""
+        with open(dataset_path, 'r') as f:
+            data = json.load(f)
+        return [item["text"] for item in data if "text" in item]
+dataset_manager = DatasetManager()
+# ==================== REPOSITORY CHAT SYSTEM ====================
+class RepositoryChat:
+    def __init__(self):
+        self.hf_api = HfApi()
+        self.chat_history = []
+        self.current_user_token = None
+        self.current_username = None
+    def initialize_session(self, hf_token: str) -> Tuple[bool, str]:
+        """Initialize chat session with HF token"""
+        is_valid, message, username = auth_manager.validate_hf_token(hf_token)
+        if is_valid:
+            self.current_user_token = hf_token
+            self.current_username = username
+            self.chat_history = []
+        return is_valid, message
+    @handle_errors("repository_chat")
+    def list_user_models(self) -> str:
+        """List all models in user's HuggingFace account"""
+        if not self.current_user_token:
+            raise ArchitechError("Please initialize session with your HuggingFace token first!")
+        try:
+            models = self.hf_api.list_models(author=self.current_username, token=self.current_user_token)
+            model_list = list(models)
+            if not model_list:
+                return f"📭 No models found in {self.current_username}'s account"
+            result = f"## 🤖 Your Models ({len(model_list)})\n\n"
+            for model in model_list[:20]:  # Limit to 20 for display
+                model_id = model.modelId
+                downloads = getattr(model, 'downloads', 0)
+                likes = getattr(model, 'likes', 0)
+                result += f"- **{model_id}**\n"
+                result += f"  - Downloads: {downloads} | Likes: {likes}\n"
+                result += f"  - [View on Hub](https://huggingface.co/{model_id})\n\n"
+            return result
+        except Exception as e:
+            return f"Error fetching models: {str(e)}"
+    @handle_errors("repository_chat")
+    def list_user_datasets(self) -> str:
+        """List all datasets in user's HuggingFace account"""
+        if not self.current_user_token:
+            raise ArchitechError("Please initialize session first!")
+        try:
+            datasets = self.hf_api.list_datasets(author=self.current_username, token=self.current_user_token)
+            dataset_list = list(datasets)
+            if not dataset_list:
+                return f"📭 No datasets found in {self.current_username}'s account"
+            result = f"## 📊 Your Datasets ({len(dataset_list)})\n\n"
+            for dataset in dataset_list[:20]:
+                dataset_id = dataset.id
+                downloads = getattr(dataset, 'downloads', 0)
+                result += f"- **{dataset_id}**\n"
+                result += f"  - Downloads: {downloads}\n"
+                result += f"  - [View on Hub](https://huggingface.co/datasets/{dataset_id})\n\n"
+            return result
+        except Exception as e:
+            return f"Error fetching datasets: {str(e)}"
+    @handle_errors("repository_chat")
+    def get_model_info(self, model_id: str) -> str:
+        """Get detailed information about a specific model"""
+        if not self.current_user_token:
+            raise ArchitechError("Please initialize session first!")
+        try:
+            # Add username if not in model_id
+            if "/" not in model_id and self.current_username:
+                model_id = f"{self.current_username}/{model_id}"
+            model_info = self.hf_api.model_info(model_id, token=self.current_user_token)
+            result = f"## 🤖 Model: {model_id}\n\n"
+            result += f"**Model ID:** {model_info.modelId}\n"
+            result += f"**Downloads:** {getattr(model_info, 'downloads', 0)}\n"
+            result += f"**Likes:** {getattr(model_info, 'likes', 0)}\n"
+            result += f"**Created:** {getattr(model_info, 'created_at', 'Unknown')}\n"
+            result += f"**Last Modified:** {getattr(model_info, 'last_modified', 'Unknown')}\n\n"
+            if hasattr(model_info, 'tags') and model_info.tags:
+                result += f"**Tags:** {', '.join(model_info.tags[:10])}\n\n"
+            result += f"**🔗 [View on HuggingFace](https://huggingface.co/{model_id})**\n"
+            return result
+        except Exception as e:
+            return f"Error fetching model info: {str(e)}"
+    @handle_errors("repository_chat")
+    def delete_repo(self, repo_id: str, repo_type: str = "model") -> str:
+        """Delete a repository (model or dataset)"""
+        if not self.current_user_token:
+            raise ArchitechError("Please initialize session first!")
+        # Add username if not in repo_id
+        if "/" not in repo_id and self.current_username:
+            repo_id = f"{self.current_username}/{repo_id}"
+        try:
+            self.hf_api.delete_repo(
+                repo_id=repo_id,
+                token=self.current_user_token,
+                repo_type=repo_type
+            )
+            return f"✅ Successfully deleted {repo_type}: {repo_id}"
+        except Exception as e:
+            return f"❌ Error deleting {repo_type}: {str(e)}"
+    @handle_errors("repository_chat")
+    def chat_with_repos(self, user_message: str) -> str:
+        """Conversational interface for repository management"""
+        if not self.current_user_token:
+            return "⚠️ Please initialize your session with a HuggingFace token first!"
+        # Add to history
+        self.chat_history.append({"role": "user", "content": user_message})
+        # Parse intent
+        message_lower = user_message.lower()
+        response = ""
+        # List models
+        if any(word in message_lower for word in ["list models", "show models", "my models", "what models"]):
+            response = self.list_user_models()
+        # List datasets
+        elif any(word in message_lower for word in ["list datasets", "show datasets", "my datasets", "what datasets"]):
+            response = self.list_user_datasets()
+        # Model info
+        elif any(word in message_lower for word in ["info about", "details about", "tell me about", "information on"]):
+            # Extract model name (simple extraction)
+            words = user_message.split()
+            if len(words) > 2:
+                potential_model = words[-1].strip("?.,!")
+                response = self.get_model_info(potential_model)
+            else:
+                response = "Please specify which model you want info about. Example: 'info about my-model-name'"
+        # Delete model
+        elif "delete" in message_lower and "model" in message_lower:
+            words = user_message.split()
+            if len(words) > 2:
+                model_name = words[-1].strip("?.,!")
+                response = f"⚠️ Are you sure you want to delete model '{model_name}'? This action cannot be undone!\n\n"
+                response += "To confirm, use the Delete Repository section below."
+            else:
+                response = "Please specify which model to delete. Example: 'delete model my-model-name'"
+        # General help
+        elif any(word in message_lower for word in ["help", "what can you do", "commands"]):
+            response = """## 🤖 Architech Repository Assistant
+I can help you manage your HuggingFace repositories! Here's what I can do:
+**📋 Listing:**
+- "List my models" - Show all your models
+- "Show my datasets" - Show all your datasets
+**ℹ️ Information:**
+- "Info about [model-name]" - Get details about a specific model
+- "Tell me about [model-name]" - Model statistics and info
+**🗑️ Management:**
+- Use the Delete Repository section to remove models/datasets
+**💡 Tips:**
+- I have access to your HuggingFace account
+- I can see all your public and private repos
+- All actions respect your permissions
+Try asking: "List my models" or "Show my datasets"!"""
+        # Default response
+        else:
+            response = f"""I'm not sure what you want to do.
+**Quick Commands:**
+- "List my models"
+- "Show my datasets"
+- "Info about [model-name]"
+- "Help" for full command list
+What would you like to do?"""
+        # Add to history
+        self.chat_history.append({"role": "assistant", "content": response})
+        return response
+    def get_chat_history_display(self) -> List[Tuple[str, str]]:
+        """Format chat history for Gradio ChatBot"""
+        history = []
+        for i in range(0, len(self.chat_history), 2):
+            if i + 1 < len(self.chat_history):
+                user_msg = self.chat_history[i]["content"]
+                bot_msg = self.chat_history[i + 1]["content"]
+                history.append((user_msg, bot_msg))
+        return history
+repo_chat = RepositoryChat()# ==================== GRADIO INTERFACE ====================
 def create_gradio_interface():
     agent = ArchitechAgent()
                         task_desc_train = gr.Textbox(label="Task Description", lines=2)
                         model_name = gr.Textbox(label="Model Name", placeholder="my-awesome-model")
                         hf_token = gr.Textbox(label="HuggingFace Token", type="password")
+                        use_synthetic = gr.Checkbox(label="Generate New Synthetic Data", value=True)
+                        with gr.Group(visible=False) as dataset_group:
+                            gr.Markdown("### 📊 Select Existing Dataset")
+                            dataset_dropdown = gr.Dropdown(
+                                label="Choose Dataset",
+                                choices=[],
+                                interactive=True
+                            )
+                            refresh_datasets_btn = gr.Button("🔄 Refresh Datasets", size="sm")
+                            dataset_preview = gr.Markdown()
+                            def refresh_dataset_list():
+                                datasets = dataset_manager.list_available_datasets()
+                                choices = [name for name, path in datasets]
+                                return gr.Dropdown(choices=choices)
+                            def show_dataset_preview(dataset_name):
+                                if dataset_name:
+                                    datasets = dataset_manager.list_available_datasets()
+                                    for name, path in datasets:
+                                        if name == dataset_name:
+                                            return dataset_manager.get_dataset_preview(path)
+                                return "Select a dataset to preview"
+                            refresh_datasets_btn.click(
+                                fn=refresh_dataset_list,
+                                outputs=dataset_dropdown
+                            )
+                            dataset_dropdown.change(
+                                fn=show_dataset_preview,
+                                inputs=dataset_dropdown,
+                                outputs=dataset_preview
+                            )
+                        with gr.Group(visible=False) as custom_data_group:
+                            training_data_input = gr.Textbox(
+                                label="Training Data (one example per line) OR Dataset Path",
+                                placeholder="Human: Hello\nAssistant: Hi!\n\nOR: ./synthetic_datasets/synthetic_general_conversational_20260126.json",
+                                lines=8
+                            )
+                        # Toggle visibility
+                        def toggle_data_source(use_synth):
+                            return gr.update(visible=not use_synth), gr.update(visible=not use_synth)
+                        use_synthetic.change(
+                            fn=toggle_data_source,
+                            inputs=use_synthetic,
+                            outputs=[dataset_group, custom_data_group]
+                        )
                         with gr.Accordion("⚙️ Advanced", open=False):
                             base_model = gr.Dropdown(
                     with gr.Column():
                         train_output = gr.Markdown()
+                def prepare_training_data(use_synth, dataset_name, custom_data):
+                    """Prepare training data based on selection"""
+                    if use_synth:
+                        return ""  # Will generate new data
+                    elif dataset_name:
+                        # Use selected dataset
+                        datasets = dataset_manager.list_available_datasets()
+                        for name, path in datasets:
+                            if name == dataset_name:
+                                return path
+                    return custom_data
                 train_btn.click(
+                    fn=lambda task, dataset_name, custom, model, token, base, synth, lr, epochs, batch: agent.train_custom_model(
+                        task,
+                        prepare_training_data(synth, dataset_name, custom),
+                        model,
+                        token,
+                        base,
+                        synth,
+                        gr.State("general"),
+                        gr.State(100),
+                        lr,
+                        epochs,
+                        batch
+                    ),
+                    inputs=[
+                        task_desc_train, dataset_dropdown, training_data_input,
+                        model_name, hf_token, base_model, use_synthetic,
+                        learning_rate, num_epochs, batch_size
+                    ],
                     outputs=train_output
                 )
                     outputs=test_output
                 )
+            # Repository Chat Tab
+            with gr.Tab("💬 Repository Chat"):
+                gr.Markdown("""
+                ### Chat with Your HuggingFace Repositories
+                Manage your models and datasets conversationally!
+                """)
+                with gr.Row():
+                    with gr.Column():
+                        repo_token = gr.Textbox(
+                            label="HuggingFace Token",
+                            type="password",
+                            placeholder="hf_..."
+                        )
+                        init_btn = gr.Button("🔐 Initialize Session", variant="primary")
+                        init_output = gr.Markdown()
+                        init_btn.click(
+                            fn=lambda token: repo_chat.initialize_session(token)[1],
+                            inputs=repo_token,
+                            outputs=init_output
+                        )
+                gr.Markdown("---")
+                with gr.Row():
+                    with gr.Column(scale=2):
+                        chatbot = gr.Chatbot(
+                            label="Repository Assistant",
+                            height=400
+                        )
+                        with gr.Row():
+                            chat_input = gr.Textbox(
+                                label="Message",
+                                placeholder="Try: 'List my models' or 'Show my datasets'",
+                                scale=4
+                            )
+                            send_btn = gr.Button("Send", variant="primary", scale=1)
+                        gr.Markdown("""
+                        **Quick Commands:**
+                        - "List my models" - Show all your models
+                        - "Show my datasets" - Show all your datasets
+                        - "Info about [model-name]" - Get model details
+                        - "Help" - See all commands
+                        """)
+                    with gr.Column(scale=1):
+                        gr.Markdown("### 🗑️ Delete Repository")
+                        delete_repo_id = gr.Textbox(
+                            label="Repository ID",
+                            placeholder="username/model-name"
+                        )
+                        delete_repo_type = gr.Radio(
+                            choices=["model", "dataset"],
+                            label="Type",
+                            value="model"
+                        )
+                        delete_repo_btn = gr.Button("🗑️ Delete", variant="stop")
+                        delete_repo_output = gr.Markdown()
+                        delete_repo_btn.click(
+                            fn=repo_chat.delete_repo,
+                            inputs=[delete_repo_id, delete_repo_type],
+                            outputs=delete_repo_output
+                        )
+                def chat_respond(message, history):
+                    if not message.strip():
+                        return history, ""
+                    bot_response = repo_chat.chat_with_repos(message)
+                    history.append((message, bot_response))
+                    return history, ""
+                send_btn.click(
+                    fn=chat_respond,
+                    inputs=[chat_input, chatbot],
+                    outputs=[chatbot, chat_input]
+                )
+                chat_input.submit(
+                    fn=chat_respond,
+                    inputs=[chat_input, chatbot],
+                    outputs=[chatbot, chat_input]
+                )
             # Model Management Tab
             with gr.Tab("💾 Model Management"):
                 gr.Markdown("""