Or4cl3-2 commited on
Commit
ef57b75
·
verified ·
1 Parent(s): dffb711

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +437 -7
app.py CHANGED
@@ -419,7 +419,13 @@ class ArchitechAgent:
419
  synthetic_data = json.load(f)
420
  texts = [item["text"] for item in synthetic_data]
421
  else:
422
- texts = [t.strip() for t in training_data.split("\n\n") if t.strip()]
 
 
 
 
 
 
423
 
424
  if not texts:
425
  raise ModelTrainingError("No training data available!")
@@ -797,7 +803,263 @@ def add_model_management_tab():
797
  """)
798
 
799
  # This function should be called in create_gradio_interface()
800
- # Add it right before the "About" tab# # ==================== GRADIO INTERFACE ====================
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
801
 
802
  def create_gradio_interface():
803
  agent = ArchitechAgent()
@@ -840,7 +1102,58 @@ def create_gradio_interface():
840
  task_desc_train = gr.Textbox(label="Task Description", lines=2)
841
  model_name = gr.Textbox(label="Model Name", placeholder="my-awesome-model")
842
  hf_token = gr.Textbox(label="HuggingFace Token", type="password")
843
- use_synthetic = gr.Checkbox(label="Use Synthetic Data", value=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
844
 
845
  with gr.Accordion("⚙️ Advanced", open=False):
846
  base_model = gr.Dropdown(
@@ -855,11 +1168,37 @@ def create_gradio_interface():
855
  with gr.Column():
856
  train_output = gr.Markdown()
857
 
 
 
 
 
 
 
 
 
 
 
 
 
858
  train_btn.click(
859
- fn=agent.train_custom_model,
860
- inputs=[task_desc_train, gr.State(""), model_name, hf_token,
861
- base_model, use_synthetic, gr.State("general"),
862
- gr.State(100), learning_rate, num_epochs, batch_size],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
863
  outputs=train_output
864
  )
865
 
@@ -897,6 +1236,97 @@ def create_gradio_interface():
897
  outputs=test_output
898
  )
899
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
900
  # Model Management Tab
901
  with gr.Tab("💾 Model Management"):
902
  gr.Markdown("""
 
419
  synthetic_data = json.load(f)
420
  texts = [item["text"] for item in synthetic_data]
421
  else:
422
+ # Check if training_data is a file path or raw text
423
+ if training_data.strip().endswith('.json') and os.path.exists(training_data.strip()):
424
+ # Load from file
425
+ texts = dataset_manager.load_dataset_for_training(training_data.strip())
426
+ else:
427
+ # Parse as raw text
428
+ texts = [t.strip() for t in training_data.split("\n\n") if t.strip()]
429
 
430
  if not texts:
431
  raise ModelTrainingError("No training data available!")
 
803
  """)
804
 
805
  # This function should be called in create_gradio_interface()
806
+ # Add it right before the "About" tab# ==================== DATASET MANAGER ====================
807
+
808
+ class DatasetManager:
809
+ def __init__(self):
810
+ self.datasets_dir = Path("./synthetic_datasets")
811
+ self.datasets_dir.mkdir(exist_ok=True)
812
+
813
+ def list_available_datasets(self) -> List[Tuple[str, str]]:
814
+ """List all available synthetic datasets"""
815
+ datasets = []
816
+ if self.datasets_dir.exists():
817
+ for file in self.datasets_dir.glob("*.json"):
818
+ datasets.append((file.name, str(file)))
819
+ return datasets
820
+
821
+ def get_dataset_preview(self, dataset_path: str) -> str:
822
+ """Get preview of dataset contents"""
823
+ try:
824
+ with open(dataset_path, 'r') as f:
825
+ data = json.load(f)
826
+
827
+ if not data:
828
+ return "Dataset is empty"
829
+
830
+ preview = f"**Dataset:** `{Path(dataset_path).name}`\n\n"
831
+ preview += f"**Total Examples:** {len(data)}\n\n"
832
+ preview += "**First 3 Examples:**\n\n"
833
+
834
+ for i, example in enumerate(data[:3], 1):
835
+ preview += f"**Example {i}:**\n```\n{example.get('text', 'No text field')}\n```\n\n"
836
+
837
+ return preview
838
+ except Exception as e:
839
+ return f"Error loading dataset: {str(e)}"
840
+
841
+ def load_dataset_for_training(self, dataset_path: str) -> List[str]:
842
+ """Load dataset texts for training"""
843
+ with open(dataset_path, 'r') as f:
844
+ data = json.load(f)
845
+ return [item["text"] for item in data if "text" in item]
846
+
847
+ dataset_manager = DatasetManager()
848
+
849
+ # ==================== REPOSITORY CHAT SYSTEM ====================
850
+
851
+ class RepositoryChat:
852
+ def __init__(self):
853
+ self.hf_api = HfApi()
854
+ self.chat_history = []
855
+ self.current_user_token = None
856
+ self.current_username = None
857
+
858
+ def initialize_session(self, hf_token: str) -> Tuple[bool, str]:
859
+ """Initialize chat session with HF token"""
860
+ is_valid, message, username = auth_manager.validate_hf_token(hf_token)
861
+ if is_valid:
862
+ self.current_user_token = hf_token
863
+ self.current_username = username
864
+ self.chat_history = []
865
+ return is_valid, message
866
+
867
+ @handle_errors("repository_chat")
868
+ def list_user_models(self) -> str:
869
+ """List all models in user's HuggingFace account"""
870
+ if not self.current_user_token:
871
+ raise ArchitechError("Please initialize session with your HuggingFace token first!")
872
+
873
+ try:
874
+ models = self.hf_api.list_models(author=self.current_username, token=self.current_user_token)
875
+ model_list = list(models)
876
+
877
+ if not model_list:
878
+ return f"📭 No models found in {self.current_username}'s account"
879
+
880
+ result = f"## 🤖 Your Models ({len(model_list)})\n\n"
881
+
882
+ for model in model_list[:20]: # Limit to 20 for display
883
+ model_id = model.modelId
884
+ downloads = getattr(model, 'downloads', 0)
885
+ likes = getattr(model, 'likes', 0)
886
+ result += f"- **{model_id}**\n"
887
+ result += f" - Downloads: {downloads} | Likes: {likes}\n"
888
+ result += f" - [View on Hub](https://huggingface.co/{model_id})\n\n"
889
+
890
+ return result
891
+ except Exception as e:
892
+ return f"Error fetching models: {str(e)}"
893
+
894
+ @handle_errors("repository_chat")
895
+ def list_user_datasets(self) -> str:
896
+ """List all datasets in user's HuggingFace account"""
897
+ if not self.current_user_token:
898
+ raise ArchitechError("Please initialize session first!")
899
+
900
+ try:
901
+ datasets = self.hf_api.list_datasets(author=self.current_username, token=self.current_user_token)
902
+ dataset_list = list(datasets)
903
+
904
+ if not dataset_list:
905
+ return f"📭 No datasets found in {self.current_username}'s account"
906
+
907
+ result = f"## 📊 Your Datasets ({len(dataset_list)})\n\n"
908
+
909
+ for dataset in dataset_list[:20]:
910
+ dataset_id = dataset.id
911
+ downloads = getattr(dataset, 'downloads', 0)
912
+ result += f"- **{dataset_id}**\n"
913
+ result += f" - Downloads: {downloads}\n"
914
+ result += f" - [View on Hub](https://huggingface.co/datasets/{dataset_id})\n\n"
915
+
916
+ return result
917
+ except Exception as e:
918
+ return f"Error fetching datasets: {str(e)}"
919
+
920
+ @handle_errors("repository_chat")
921
+ def get_model_info(self, model_id: str) -> str:
922
+ """Get detailed information about a specific model"""
923
+ if not self.current_user_token:
924
+ raise ArchitechError("Please initialize session first!")
925
+
926
+ try:
927
+ # Add username if not in model_id
928
+ if "/" not in model_id and self.current_username:
929
+ model_id = f"{self.current_username}/{model_id}"
930
+
931
+ model_info = self.hf_api.model_info(model_id, token=self.current_user_token)
932
+
933
+ result = f"## 🤖 Model: {model_id}\n\n"
934
+ result += f"**Model ID:** {model_info.modelId}\n"
935
+ result += f"**Downloads:** {getattr(model_info, 'downloads', 0)}\n"
936
+ result += f"**Likes:** {getattr(model_info, 'likes', 0)}\n"
937
+ result += f"**Created:** {getattr(model_info, 'created_at', 'Unknown')}\n"
938
+ result += f"**Last Modified:** {getattr(model_info, 'last_modified', 'Unknown')}\n\n"
939
+
940
+ if hasattr(model_info, 'tags') and model_info.tags:
941
+ result += f"**Tags:** {', '.join(model_info.tags[:10])}\n\n"
942
+
943
+ result += f"**🔗 [View on HuggingFace](https://huggingface.co/{model_id})**\n"
944
+
945
+ return result
946
+ except Exception as e:
947
+ return f"Error fetching model info: {str(e)}"
948
+
949
+ @handle_errors("repository_chat")
950
+ def delete_repo(self, repo_id: str, repo_type: str = "model") -> str:
951
+ """Delete a repository (model or dataset)"""
952
+ if not self.current_user_token:
953
+ raise ArchitechError("Please initialize session first!")
954
+
955
+ # Add username if not in repo_id
956
+ if "/" not in repo_id and self.current_username:
957
+ repo_id = f"{self.current_username}/{repo_id}"
958
+
959
+ try:
960
+ self.hf_api.delete_repo(
961
+ repo_id=repo_id,
962
+ token=self.current_user_token,
963
+ repo_type=repo_type
964
+ )
965
+ return f"✅ Successfully deleted {repo_type}: {repo_id}"
966
+ except Exception as e:
967
+ return f"❌ Error deleting {repo_type}: {str(e)}"
968
+
969
+ @handle_errors("repository_chat")
970
+ def chat_with_repos(self, user_message: str) -> str:
971
+ """Conversational interface for repository management"""
972
+ if not self.current_user_token:
973
+ return "⚠️ Please initialize your session with a HuggingFace token first!"
974
+
975
+ # Add to history
976
+ self.chat_history.append({"role": "user", "content": user_message})
977
+
978
+ # Parse intent
979
+ message_lower = user_message.lower()
980
+
981
+ response = ""
982
+
983
+ # List models
984
+ if any(word in message_lower for word in ["list models", "show models", "my models", "what models"]):
985
+ response = self.list_user_models()
986
+
987
+ # List datasets
988
+ elif any(word in message_lower for word in ["list datasets", "show datasets", "my datasets", "what datasets"]):
989
+ response = self.list_user_datasets()
990
+
991
+ # Model info
992
+ elif any(word in message_lower for word in ["info about", "details about", "tell me about", "information on"]):
993
+ # Extract model name (simple extraction)
994
+ words = user_message.split()
995
+ if len(words) > 2:
996
+ potential_model = words[-1].strip("?.,!")
997
+ response = self.get_model_info(potential_model)
998
+ else:
999
+ response = "Please specify which model you want info about. Example: 'info about my-model-name'"
1000
+
1001
+ # Delete model
1002
+ elif "delete" in message_lower and "model" in message_lower:
1003
+ words = user_message.split()
1004
+ if len(words) > 2:
1005
+ model_name = words[-1].strip("?.,!")
1006
+ response = f"⚠️ Are you sure you want to delete model '{model_name}'? This action cannot be undone!\n\n"
1007
+ response += "To confirm, use the Delete Repository section below."
1008
+ else:
1009
+ response = "Please specify which model to delete. Example: 'delete model my-model-name'"
1010
+
1011
+ # General help
1012
+ elif any(word in message_lower for word in ["help", "what can you do", "commands"]):
1013
+ response = """## 🤖 Architech Repository Assistant
1014
+
1015
+ I can help you manage your HuggingFace repositories! Here's what I can do:
1016
+
1017
+ **📋 Listing:**
1018
+ - "List my models" - Show all your models
1019
+ - "Show my datasets" - Show all your datasets
1020
+
1021
+ **ℹ️ Information:**
1022
+ - "Info about [model-name]" - Get details about a specific model
1023
+ - "Tell me about [model-name]" - Model statistics and info
1024
+
1025
+ **🗑️ Management:**
1026
+ - Use the Delete Repository section to remove models/datasets
1027
+
1028
+ **💡 Tips:**
1029
+ - I have access to your HuggingFace account
1030
+ - I can see all your public and private repos
1031
+ - All actions respect your permissions
1032
+
1033
+ Try asking: "List my models" or "Show my datasets"!"""
1034
+
1035
+ # Default response
1036
+ else:
1037
+ response = f"""I'm not sure what you want to do.
1038
+
1039
+ **Quick Commands:**
1040
+ - "List my models"
1041
+ - "Show my datasets"
1042
+ - "Info about [model-name]"
1043
+ - "Help" for full command list
1044
+
1045
+ What would you like to do?"""
1046
+
1047
+ # Add to history
1048
+ self.chat_history.append({"role": "assistant", "content": response})
1049
+
1050
+ return response
1051
+
1052
+ def get_chat_history_display(self) -> List[Tuple[str, str]]:
1053
+ """Format chat history for Gradio ChatBot"""
1054
+ history = []
1055
+ for i in range(0, len(self.chat_history), 2):
1056
+ if i + 1 < len(self.chat_history):
1057
+ user_msg = self.chat_history[i]["content"]
1058
+ bot_msg = self.chat_history[i + 1]["content"]
1059
+ history.append((user_msg, bot_msg))
1060
+ return history
1061
+
1062
+ repo_chat = RepositoryChat()# ==================== GRADIO INTERFACE ====================
1063
 
1064
  def create_gradio_interface():
1065
  agent = ArchitechAgent()
 
1102
  task_desc_train = gr.Textbox(label="Task Description", lines=2)
1103
  model_name = gr.Textbox(label="Model Name", placeholder="my-awesome-model")
1104
  hf_token = gr.Textbox(label="HuggingFace Token", type="password")
1105
+ use_synthetic = gr.Checkbox(label="Generate New Synthetic Data", value=True)
1106
+
1107
+ with gr.Group(visible=False) as dataset_group:
1108
+ gr.Markdown("### 📊 Select Existing Dataset")
1109
+ dataset_dropdown = gr.Dropdown(
1110
+ label="Choose Dataset",
1111
+ choices=[],
1112
+ interactive=True
1113
+ )
1114
+ refresh_datasets_btn = gr.Button("🔄 Refresh Datasets", size="sm")
1115
+ dataset_preview = gr.Markdown()
1116
+
1117
+ def refresh_dataset_list():
1118
+ datasets = dataset_manager.list_available_datasets()
1119
+ choices = [name for name, path in datasets]
1120
+ return gr.Dropdown(choices=choices)
1121
+
1122
+ def show_dataset_preview(dataset_name):
1123
+ if dataset_name:
1124
+ datasets = dataset_manager.list_available_datasets()
1125
+ for name, path in datasets:
1126
+ if name == dataset_name:
1127
+ return dataset_manager.get_dataset_preview(path)
1128
+ return "Select a dataset to preview"
1129
+
1130
+ refresh_datasets_btn.click(
1131
+ fn=refresh_dataset_list,
1132
+ outputs=dataset_dropdown
1133
+ )
1134
+
1135
+ dataset_dropdown.change(
1136
+ fn=show_dataset_preview,
1137
+ inputs=dataset_dropdown,
1138
+ outputs=dataset_preview
1139
+ )
1140
+
1141
+ with gr.Group(visible=False) as custom_data_group:
1142
+ training_data_input = gr.Textbox(
1143
+ label="Training Data (one example per line) OR Dataset Path",
1144
+ placeholder="Human: Hello\nAssistant: Hi!\n\nOR: ./synthetic_datasets/synthetic_general_conversational_20260126.json",
1145
+ lines=8
1146
+ )
1147
+
1148
+ # Toggle visibility
1149
+ def toggle_data_source(use_synth):
1150
+ return gr.update(visible=not use_synth), gr.update(visible=not use_synth)
1151
+
1152
+ use_synthetic.change(
1153
+ fn=toggle_data_source,
1154
+ inputs=use_synthetic,
1155
+ outputs=[dataset_group, custom_data_group]
1156
+ )
1157
 
1158
  with gr.Accordion("⚙️ Advanced", open=False):
1159
  base_model = gr.Dropdown(
 
1168
  with gr.Column():
1169
  train_output = gr.Markdown()
1170
 
1171
+ def prepare_training_data(use_synth, dataset_name, custom_data):
1172
+ """Prepare training data based on selection"""
1173
+ if use_synth:
1174
+ return "" # Will generate new data
1175
+ elif dataset_name:
1176
+ # Use selected dataset
1177
+ datasets = dataset_manager.list_available_datasets()
1178
+ for name, path in datasets:
1179
+ if name == dataset_name:
1180
+ return path
1181
+ return custom_data
1182
+
1183
  train_btn.click(
1184
+ fn=lambda task, dataset_name, custom, model, token, base, synth, lr, epochs, batch: agent.train_custom_model(
1185
+ task,
1186
+ prepare_training_data(synth, dataset_name, custom),
1187
+ model,
1188
+ token,
1189
+ base,
1190
+ synth,
1191
+ gr.State("general"),
1192
+ gr.State(100),
1193
+ lr,
1194
+ epochs,
1195
+ batch
1196
+ ),
1197
+ inputs=[
1198
+ task_desc_train, dataset_dropdown, training_data_input,
1199
+ model_name, hf_token, base_model, use_synthetic,
1200
+ learning_rate, num_epochs, batch_size
1201
+ ],
1202
  outputs=train_output
1203
  )
1204
 
 
1236
  outputs=test_output
1237
  )
1238
 
1239
+ # Repository Chat Tab
1240
+ with gr.Tab("💬 Repository Chat"):
1241
+ gr.Markdown("""
1242
+ ### Chat with Your HuggingFace Repositories
1243
+ Manage your models and datasets conversationally!
1244
+ """)
1245
+
1246
+ with gr.Row():
1247
+ with gr.Column():
1248
+ repo_token = gr.Textbox(
1249
+ label="HuggingFace Token",
1250
+ type="password",
1251
+ placeholder="hf_..."
1252
+ )
1253
+ init_btn = gr.Button("🔐 Initialize Session", variant="primary")
1254
+ init_output = gr.Markdown()
1255
+
1256
+ init_btn.click(
1257
+ fn=lambda token: repo_chat.initialize_session(token)[1],
1258
+ inputs=repo_token,
1259
+ outputs=init_output
1260
+ )
1261
+
1262
+ gr.Markdown("---")
1263
+
1264
+ with gr.Row():
1265
+ with gr.Column(scale=2):
1266
+ chatbot = gr.Chatbot(
1267
+ label="Repository Assistant",
1268
+ height=400
1269
+ )
1270
+
1271
+ with gr.Row():
1272
+ chat_input = gr.Textbox(
1273
+ label="Message",
1274
+ placeholder="Try: 'List my models' or 'Show my datasets'",
1275
+ scale=4
1276
+ )
1277
+ send_btn = gr.Button("Send", variant="primary", scale=1)
1278
+
1279
+ gr.Markdown("""
1280
+ **Quick Commands:**
1281
+ - "List my models" - Show all your models
1282
+ - "Show my datasets" - Show all your datasets
1283
+ - "Info about [model-name]" - Get model details
1284
+ - "Help" - See all commands
1285
+ """)
1286
+
1287
+ with gr.Column(scale=1):
1288
+ gr.Markdown("### 🗑️ Delete Repository")
1289
+
1290
+ delete_repo_id = gr.Textbox(
1291
+ label="Repository ID",
1292
+ placeholder="username/model-name"
1293
+ )
1294
+
1295
+ delete_repo_type = gr.Radio(
1296
+ choices=["model", "dataset"],
1297
+ label="Type",
1298
+ value="model"
1299
+ )
1300
+
1301
+ delete_repo_btn = gr.Button("🗑️ Delete", variant="stop")
1302
+ delete_repo_output = gr.Markdown()
1303
+
1304
+ delete_repo_btn.click(
1305
+ fn=repo_chat.delete_repo,
1306
+ inputs=[delete_repo_id, delete_repo_type],
1307
+ outputs=delete_repo_output
1308
+ )
1309
+
1310
+ def chat_respond(message, history):
1311
+ if not message.strip():
1312
+ return history, ""
1313
+
1314
+ bot_response = repo_chat.chat_with_repos(message)
1315
+ history.append((message, bot_response))
1316
+ return history, ""
1317
+
1318
+ send_btn.click(
1319
+ fn=chat_respond,
1320
+ inputs=[chat_input, chatbot],
1321
+ outputs=[chatbot, chat_input]
1322
+ )
1323
+
1324
+ chat_input.submit(
1325
+ fn=chat_respond,
1326
+ inputs=[chat_input, chatbot],
1327
+ outputs=[chatbot, chat_input]
1328
+ )
1329
+
1330
  # Model Management Tab
1331
  with gr.Tab("💾 Model Management"):
1332
  gr.Markdown("""