rogerthat11 commited on
Commit
f8d8f6c
·
1 Parent(s): a49e405

clean all

Browse files
.gitattributes DELETED
@@ -1,36 +0,0 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
36
- .history filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.history/conda_20250202111624.yaml DELETED
@@ -1,13 +0,0 @@
1
- name: chatbot-env
2
- channels:
3
- - pytorch
4
- - conda-forge
5
- - defaults
6
- dependencies:
7
- - python=3.10 # Or you can use python=3.11 or python=3.12 if you prefer
8
- - gradio
9
- - pyyaml
10
- - transformers
11
- - pytorch::torch>=2.0.0 torchvision torchaudio pytorch-cuda=11.8 -c pytorch # Example PyTorch with CUDA 11.8
12
- - accelerate>=0.26.0
13
- - bitsandbytes # Let's try basic 'bitsandbytes' in conda first
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.history/conda_20250202111721.yaml DELETED
@@ -1,13 +0,0 @@
1
- name: chatbot-env
2
- channels:
3
- - pytorch
4
- - conda-forge
5
- - defaults
6
- dependencies:
7
- - python=3.10
8
- - gradio
9
- - pyyaml
10
- - transformers
11
- - pytorch::torch>=2.0.0 torchvision torchaudio pytorch-cuda=11.8 -c pytorch
12
- - accelerate>=0.26.0
13
- - bitsandbytes
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.history/conda_20250202111724.yaml DELETED
@@ -1,13 +0,0 @@
1
- name: chatbot-env
2
- channels:
3
- - pytorch
4
- - conda-forge
5
- - defaults
6
- dependencies:
7
- - python=3.10
8
- - gradio
9
- - pyyaml
10
- - transformers
11
- - pytorch::torch>=2.0.0 torchvision torchaudio pytorch-cuda=11.8 -c pytorch
12
- - accelerate>=0.26.0
13
- - bitsandbytes
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.history/conda_20250202111908.yaml DELETED
@@ -1,19 +0,0 @@
1
- name: chatbot-env
2
- channels:
3
- - pytorch
4
- - conda-forge
5
- - defaults
6
- dependencies:
7
- - python=3.10
8
- - gradio
9
- - pyyaml
10
- - transformers
11
- - pytorch::torch>=2.0.0 torchvision torchaudio pytorch-cuda=11.8 -c pytorch
12
- - accelerate>=0.26.0
13
- - bitsandbytes
14
-
15
- # --- Force environment recreation on each build (Less efficient - use with caution) ---
16
- # This section is NOT standard Conda practice for every startup, but can be used for debugging
17
- run:
18
- - conda env remove -n chatbot-env --yes # Remove existing environment (if it exists)
19
- - conda env create -f conda.yaml --force --yes # Recreate environment from scratch
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.history/configs/chatbot_config_20250202080908.yaml DELETED
@@ -1,24 +0,0 @@
1
- chatbot:
2
- name: "Project Guidance Chatbot"
3
- description: "Your helpful AI assistant for project completion with LLM selection and token control."
4
- default_llm_model_id: "deepseek-r1-distill-llama-8b"
5
- max_response_tokens: 200 # Maximum tokens for LLM generated responses
6
-
7
- available_models:
8
- deepseek-r1-distill-llama-8b:
9
- name: "DeepSeek-R1-Distill-Llama-8B"
10
- model_id: "DeepSeek-AI/DeepSeek-R1-Distill-Llama-8B"
11
- gemini-flash-01-21: # Using a shorter key for easier referencing in code
12
- name: "Gemini 2.0 Flash (Exp 01-21)"
13
- model_id: "google/gemini-2.0-flash-thinking-exp-01-21"
14
-
15
- model_selection:
16
- suggested_models: # (Keep suggested models - might be useful later)
17
- - "mistralai/Mistral-7B-Instruct-v0.2"
18
- - "google/flan-t5-xl"
19
- - "facebook/bart-large"
20
- criteria_prompt: "Consider these criteria when selecting a model: {rules.model_selection}"
21
-
22
- response_generation:
23
- error_message: "Sorry, I encountered an issue. Please check your input and project files."
24
- default_instruction: "How can I help you with your project?"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.history/configs/chatbot_config_20250202081215.yaml DELETED
@@ -1,24 +0,0 @@
1
- chatbot:
2
- name: "Project Guidance Chatbot"
3
- description: "Your helpful AI assistant for project completion with LLM selection and token control."
4
- default_llm_model_id: "deepseek-r1-distill-llama-8b"
5
- max_response_tokens: 200 # Maximum tokens for LLM generated responses
6
-
7
- available_models:
8
- deepseek-r1-distill-llama-8b:
9
- name: "DeepSeek-R1-Distill-Llama-8B"
10
- model_id: "DeepSeek-AI/DeepSeek-R1-Distill-Llama-8B"
11
- gemini-flash-01-21: # Using a shorter key for easier referencing in code
12
- name: "Gemini 2.0 Flash (Exp 01-21)"
13
- model_id: "google/gemini-2.0-flash-thinking-exp-01-21"
14
-
15
- model_selection:
16
- suggested_models: # (Keep suggested models - might be useful later)
17
- - "mistralai/Mistral-7B-Instruct-v0.2"
18
- - "google/flan-t5-xl"
19
- - "facebook/bart-large"
20
- criteria_prompt: "Consider these criteria when selecting a model: {rules.model_selection}"
21
-
22
- response_generation:
23
- error_message: "Sorry, I encountered an issue. Please check your input and project files."
24
- default_instruction: "How can I help you with your project?"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.history/requirements_20250202081152.txt DELETED
@@ -1,5 +0,0 @@
1
- gradio
2
- PyYAML
3
- transformers
4
- torch
5
- accelerate
 
 
 
 
 
 
.history/requirements_20250202083728.txt DELETED
@@ -1,6 +0,0 @@
1
- gradio
2
- PyYAML
3
- transformers
4
- torch
5
- accelerate
6
- bitsandbytes
 
 
 
 
 
 
 
.history/scripts/chatbot_logic_20250202080927.py DELETED
@@ -1,326 +0,0 @@
1
- from scripts.parsing_utils import load_yaml_file, get_roadmap_phases, get_project_rules
2
- import os
3
- from transformers import AutoModelForCausalLM, AutoTokenizer # Import necessary classes
4
- import yaml # Import yaml for config modification
5
- import logging # Import logging
6
-
7
- # Set up logging
8
- logging.basicConfig(level=logging.ERROR, # Set default logging level to ERROR
9
- format='%(asctime)s - %(levelname)s - %(message)s')
10
-
11
- class ProjectGuidanceChatbot:
12
- def __init__(self, roadmap_file, rules_file, config_file, code_templates_dir):
13
- self.roadmap_file = roadmap_file
14
- self.rules_file = rules_file
15
- self.config_file = config_file
16
- self.code_templates_dir = code_templates_dir
17
-
18
- self.roadmap_data = load_yaml_file(self.roadmap_file)
19
- self.rules_data = load_yaml_file(self.rules_file)
20
- self.config_data = load_yaml_file(self.config_file)
21
-
22
- self.phases = get_roadmap_phases(self.roadmap_data)
23
- self.rules = get_project_rules(self.rules_data)
24
- self.chatbot_config = self.config_data.get('chatbot', {}) if self.config_data else {}
25
- self.model_config = self.config_data.get('model_selection', {}) if self.config_data else {}
26
- self.response_config = self.config_data.get('response_generation', {}) if self.config_data else {}
27
- self.available_models_config = self.config_data.get('available_models', {}) if self.config_data else {}
28
- self.max_response_tokens = self.chatbot_config.get('max_response_tokens', 200)
29
-
30
- self.current_phase = None
31
- self.active_model_key = self.chatbot_config.get('default_llm_model_id') # Get default model key
32
- self.active_model_info = self.available_models_config.get(self.active_model_key) # Get model info from config
33
-
34
- # Placeholder for actual model and tokenizer - replace with LLM loading logic
35
- self.llm_model = None # Placeholder for loaded model
36
- self.llm_tokenizer = None # Placeholder for tokenizer
37
- self.load_llm_model(self.active_model_info) # Load initial model
38
-
39
- self.update_mode_active = False # Flag to track update mode
40
-
41
-
42
- def load_llm_model(self, model_info):
43
- """Loads the LLM model and tokenizer based on model_info."""
44
- if not model_info:
45
- error_message = "Error: Model information not provided."
46
- logging.error(error_message) # Log the error
47
- self.llm_model = None
48
- self.llm_tokenizer = None
49
- return
50
-
51
- model_id = model_info.get('model_id')
52
- model_name = model_info.get('name')
53
- if not model_id:
54
- error_message = f"Error: 'model_id' not found for model: {model_name}"
55
- logging.error(error_message) # Log the error
56
- self.llm_model = None
57
- self.llm_tokenizer = None
58
- return
59
-
60
- print(f"Loading model: {model_name} ({model_id})...")
61
- try:
62
- self.llm_tokenizer = AutoTokenizer.from_pretrained(model_id)
63
- self.llm_model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto") # device_map="auto" for GPU/CPU handling
64
- print(f"Model {model_name} loaded successfully.")
65
- except Exception as e:
66
- error_message = f"Error loading model {model_name} ({model_id}): {e}"
67
- logging.exception(error_message) # Log exception with traceback
68
- self.llm_model = None
69
- self.llm_tokenizer = None
70
- self.active_model_info = model_info
71
-
72
- def switch_llm_model(self, model_key):
73
- """Switches the active LLM model based on the provided model key."""
74
- if model_key in self.available_models_config:
75
- model_info = self.available_models_config[model_key]
76
- print(f"Switching LLM model to: {model_info.get('name')}")
77
- self.load_llm_model(model_info)
78
- self.active_model_key = model_key
79
- return f"Switched to model: {model_info.get('name')}"
80
- else:
81
- error_message = f"Error: Model key '{model_key}' not found in available models."
82
- logging.error(error_message) # Log the error
83
- return error_message # Return error message to UI
84
-
85
- def enter_update_mode(self):
86
- """Enters the chatbot's update mode."""
87
- self.update_mode_active = True
88
- return "Entering update mode. Please enter configuration commands (or 'sagor is python/help' for commands)."
89
-
90
- def exit_update_mode(self):
91
- """Exits the chatbot's update mode and reloads configuration."""
92
- self.update_mode_active = False
93
- self.reload_config()
94
- return "Exiting update mode. Configuration reloaded."
95
-
96
- def reload_config(self):
97
- """Reloads configuration files."""
98
- print("Reloading configuration...")
99
- try:
100
- self.config_data = load_yaml_file(self.config_file)
101
- self.roadmap_data = load_yaml_file(self.roadmap_file)
102
- self.rules_data = load_yaml_file(self.rules_file)
103
- self.chatbot_config = self.config_data.get('chatbot', {}) if self.config_data else {}
104
- self.model_config = self.config_data.get('model_selection', {}) if self.config_data else {}
105
- self.response_config = self.config_data.get('response_generation', {}) if self.config_data else {}
106
- self.available_models_config = self.config_data.get('available_models', {}) if self.config_data else {}
107
- self.max_response_tokens = self.chatbot_config.get('max_response_tokens', 200)
108
- self.phases = get_roadmap_phases(self.roadmap_data)
109
- self.rules = get_project_rules(self.rules_data)
110
- print("Configuration reloaded.")
111
- except Exception as e:
112
- error_message = f"Error reloading configuration files: {e}"
113
- logging.exception(error_message) # Log exception with traceback
114
- print(error_message) # Print to console as well, as reloading might be critical
115
-
116
- def get_chatbot_greeting(self):
117
- current_model_name = self.active_model_info.get('name', 'Unknown Model') if self.active_model_info else 'Unknown Model'
118
- return f"Hello! I am the {self.chatbot_config.get('name', 'Project Guidance Chatbot')}. Currently using **{current_model_name}**. Max response tokens: {self.max_response_tokens}. {self.chatbot_config.get('description', 'How can I help you with your project?')}"
119
-
120
- def generate_llm_response(self, user_query):
121
- """Generates a response using the currently active LLM."""
122
- if not self.llm_model or not self.llm_tokenizer:
123
- error_message = "LLM model not loaded. Please select a model."
124
- logging.error(error_message) # Log the error
125
- return error_message # Return error to UI
126
- try:
127
- inputs = self.llm_tokenizer(user_query, return_tensors="pt").to(self.llm_model.device)
128
- outputs = self.llm_model.generate(**inputs, max_length=self.max_response_tokens, num_beams=5, no_repeat_ngram_size=2, early_stopping=True) # Use max_response_tokens
129
- response = self.llm_tokenizer.decode(outputs[0], skip_special_tokens=True)
130
- return response
131
- except Exception as e:
132
- error_message = f"Error generating LLM response: {e}"
133
- logging.exception(error_message) # Log exception with traceback
134
- return self.response_config.get('error_message', "Sorry, something went wrong.") # Return generic error to UI
135
-
136
- def process_query(self, user_query):
137
- secret_command_prefix = "sagor is python/"
138
-
139
- if user_query.startswith(secret_command_prefix):
140
- command = user_query[len(secret_command_prefix):].strip()
141
- if command == "start":
142
- return self.enter_update_mode()
143
- elif command == "exit":
144
- return self.exit_update_mode()
145
- elif command == "help":
146
- return self.get_update_mode_help()
147
- elif self.update_mode_active:
148
- # --- Process configuration modification commands here ---
149
- # Example: "modify_rule general rule_1 new rule text"
150
- parts = command.split()
151
- if parts and parts[0] == "modify_rule":
152
- if len(parts) >= 4:
153
- category = parts[1]
154
- rule_key = parts[2]
155
- new_rule_text_parts = parts[3:]
156
- new_rule_text = " ".join(new_rule_text_parts)
157
- return self.modify_rule_in_config(category, rule_key, new_rule_text)
158
- else:
159
- return "Invalid 'modify_rule' command format. Usage: modify_rule <category> <rule_key> <new rule text>"
160
- elif parts and parts[0] == "add_phase": # Example for adding phase - you'll need to expand this logic
161
- # ... (Parsing logic for 'add_phase' command and calling a function to modify roadmap.yaml) ...
162
- return "Command 'add_phase' is not yet implemented in this example." # Placeholder
163
- else:
164
- return f"Unknown update command: {command}. Type 'sagor is python/help' for commands."
165
- else:
166
- return f"Unknown command (not in update mode): {command}. To enter update mode, type 'sagor is python/start'."
167
-
168
-
169
- if self.update_mode_active:
170
- return "In update mode. Please enter a configuration command (or 'sagor is python/help' for commands)."
171
-
172
-
173
- if not self.phases:
174
- return "Error: Roadmap data not loaded correctly."
175
- if not self.rules:
176
- return "Error: Rules data not loaded correctly."
177
-
178
- query_lower = user_query.lower()
179
-
180
- if "roadmap" in query_lower:
181
- return self.get_roadmap_summary()
182
- if "rules" in query_lower:
183
- return self.get_rules_summary()
184
- if "switch model to" in query_lower:
185
- parts = user_query.lower().split("switch model to")
186
- if len(parts) > 1:
187
- model_name_or_key = parts[1].strip()
188
- for key, model_data in self.available_models_config.items():
189
- if model_data['name'].lower() == model_name_or_key.lower() or key.lower() == model_name_or_key.lower():
190
- switch_result = self.switch_llm_model(key)
191
- return switch_result + "\n" + self.get_chatbot_greeting()
192
- return f"Model '{model_name_or_key}' not found in available models."
193
-
194
-
195
- if self.current_phase:
196
- current_phase_data = self.phases.get(self.current_phase)
197
- if current_phase_data:
198
- phase_response = self.get_phase_guidance(current_phase_data, user_query)
199
- if phase_response:
200
- return phase_response
201
-
202
- for phase_key, phase_data in self.phases.items():
203
- if phase_data['name'].lower() in query_lower:
204
- self.current_phase = phase_key
205
- return self.get_phase_introduction(phase_data)
206
-
207
- llm_response = self.generate_llm_response(user_query)
208
- if llm_response:
209
- return llm_response
210
-
211
- return self.response_config.get('default_instruction', "How can I help you with your project phases or general questions?")
212
-
213
- def get_update_mode_help(self):
214
- """Provides help message for update mode commands."""
215
- help_message = "Update Mode Commands:\n"
216
- help_message += "- `sagor is python/exit`: Exit update mode and reload configuration.\n"
217
- help_message += "- `sagor is python/modify_rule <category> <rule_key> <new rule text>`: Modify a rule in rules.yaml.\n"
218
- help_message += " Example: `sagor is python/modify_rule general rule_1 Prioritize open and responsible AI.`\n"
219
- help_message += "- `sagor is python/add_phase ...`: (Not yet implemented) Add a new phase to roadmap.yaml.\n"
220
- help_message += "- `sagor is python/help`: Show this help message.\n"
221
- help_message += "\nMake sure to use the correct syntax for commands. After exiting update mode, the chatbot will reload the configuration."
222
- return help_message
223
-
224
-
225
- def modify_rule_in_config(self, category, rule_key, new_rule_text):
226
- """Modifies a rule in the rules.yaml configuration."""
227
- if not self.rules_data or 'project_rules' not in self.rules_data:
228
- error_message = "Error: Rules data not loaded or invalid format."
229
- logging.error(error_message) # Log the error
230
- return error_message # Return error to UI
231
- if category not in self.rules_data['project_rules']:
232
- error_message = f"Error: Rule category '{category}' not found."
233
- logging.error(error_message) # Log the error
234
- return error_message # Return error to UI
235
- if rule_key not in self.rules_data['project_rules'][category]:
236
- error_message = f"Error: Rule key '{rule_key}' not found in category '{category}'."
237
- logging.error(error_message) # Log the error
238
- return error_message # Return error to UI
239
-
240
- self.rules_data['project_rules'][category][rule_key] = new_rule_text # Update rule in memory
241
-
242
- try:
243
- with open(self.rules_file, 'w') as f:
244
- yaml.dump(self.rules_data, f, indent=2) # Save changes to rules.yaml
245
- self.reload_config() # Reload config to reflect changes immediately
246
- return f"Rule '{rule_key}' in category '{category}' updated to: '{new_rule_text}'. Configuration reloaded."
247
- except Exception as e:
248
- error_message = f"Error saving changes to {self.rules_file}: {e}"
249
- logging.exception(error_message) # Log exception with traceback
250
- return error_message # Return error to UI
251
-
252
-
253
- def get_roadmap_summary(self):
254
- summary = "Project Roadmap:\n"
255
- for phase_key, phase_data in self.phases.items():
256
- summary += f"- **Phase: {phase_data['name']}**\n"
257
- summary += f" Description: {phase_data['description']}\n"
258
- summary += f" Milestones: {', '.join(phase_data['milestones'])}\n"
259
- return summary
260
-
261
- def get_rules_summary(self):
262
- summary = "Project Rules:\n"
263
- for rule_category, rules_list in self.rules.items():
264
- summary += f"**{rule_category.capitalize()} Rules:**\n"
265
- for rule_key, rule_text in rules_list.items():
266
- summary += f"- {rule_text}\n"
267
- return summary
268
-
269
- def get_phase_introduction(self, phase_data):
270
- return f"Okay, let's focus on **Phase: {phase_data['name']}**. \nDescription: {phase_data['description']}. \nKey milestones are: {', '.join(phase_data['milestones'])}. \nWhat would you like to know or do in this phase?"
271
-
272
- def get_phase_guidance(self, phase_data, user_query):
273
- query_lower = user_query.lower()
274
-
275
- if "milestones" in query_lower:
276
- return "The milestones for this phase are: " + ", ".join(phase_data['milestones'])
277
- if "actions" in query_lower or "how to" in query_lower:
278
- if 'actions' in phase_data:
279
- return "Recommended actions for this phase: " + ", ".join(phase_data['actions'])
280
- else:
281
- return "No specific actions are listed for this phase in the roadmap."
282
- if "code" in query_lower or "script" in query_lower:
283
- if 'code_generation_hint' in phase_data:
284
- template_filename_prefix = phase_data['name'].lower().replace(" ", "_")
285
- template_filepath = os.path.join(self.code_templates_dir, f"{template_filename_prefix}_template.py.txt")
286
- if os.path.exists(template_filepath):
287
- code_snippet = self.generate_code_snippet(template_filepath, phase_data)
288
- return "Here's a starting code snippet for this phase:\n\n```python\n" + code_snippet + "\n```\n\nRemember to adapt it to your specific needs."
289
- else:
290
- return f"A code template for this phase ({phase_data['name']}) is not yet available. However, the hint is: {phase_data['code_generation_hint']}"
291
- else:
292
- return "No code generation hint is available for this phase."
293
-
294
- return f"For phase '{phase_data['name']}', remember the description: {phase_data['description']}. Consider the milestones and actions. What specific aspect are you interested in?"
295
-
296
-
297
- def generate_code_snippet(self, template_filepath, phase_data):
298
- """Generates code snippet from a template file. (Simple template filling example)"""
299
- try:
300
- with open(template_filepath, 'r') as f:
301
- template_content = f.read()
302
-
303
- code_snippet = template_content.replace("{{phase_name}}", phase_data['name'])
304
- return code_snippet
305
- except FileNotFoundError:
306
- return f"Error: Code template file not found at {template_filepath}"
307
- except Exception as e:
308
- return f"Error generating code snippet: {e}"
309
-
310
-
311
- # Example usage (for testing - remove or adjust for app.py)
312
- if __name__ == '__main__':
313
- chatbot = ProjectGuidanceChatbot(
314
- roadmap_file="roadmap.yaml",
315
- rules_file="rules.yaml",
316
- config_file="configs/chatbot_config.yaml",
317
- code_templates_dir="scripts/code_templates"
318
- )
319
- print(chatbot.get_chatbot_greeting())
320
-
321
- while True:
322
- user_input = input("You: ")
323
- if user_input.lower() == "exit":
324
- break
325
- response = chatbot.process_query(user_input)
326
- print("Chatbot:", response)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.history/scripts/chatbot_logic_20250202083642.py DELETED
@@ -1,322 +0,0 @@
1
- from scripts.parsing_utils import load_yaml_file, get_roadmap_phases, get_project_rules
2
- import os
3
- from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig # Import BitsAndBytesConfig
4
- import yaml
5
- import logging
6
-
7
- logging.basicConfig(level=logging.ERROR,
8
- format='%(asctime)s - %(levelname)s - %(message)s')
9
-
10
- class ProjectGuidanceChatbot:
11
- def __init__(self, roadmap_file, rules_file, config_file, code_templates_dir):
12
- self.roadmap_file = roadmap_file
13
- self.rules_file = rules_file
14
- self.config_file = config_file
15
- self.code_templates_dir = code_templates_dir
16
-
17
- self.roadmap_data = load_yaml_file(self.roadmap_file)
18
- self.rules_data = load_yaml_file(self.rules_file)
19
- self.config_data = load_yaml_file(self.config_file)
20
-
21
- self.phases = get_roadmap_phases(self.roadmap_data)
22
- self.rules = get_project_rules(self.rules_data)
23
- self.chatbot_config = self.config_data.get('chatbot', {}) if self.config_data else {}
24
- self.model_config = self.config_data.get('model_selection', {}) if self.config_data else {}
25
- self.response_config = self.config_data.get('response_generation', {}) if self.config_data else {}
26
- self.available_models_config = self.config_data.get('available_models', {}) if self.config_data else {}
27
- self.max_response_tokens = self.chatbot_config.get('max_response_tokens', 200)
28
-
29
- self.current_phase = None
30
- self.active_model_key = self.chatbot_config.get('default_llm_model_id')
31
- self.active_model_info = self.available_models_config.get(self.active_model_key)
32
-
33
- self.llm_model = None
34
- self.llm_tokenizer = None
35
- self.load_llm_model(self.active_model_info)
36
-
37
- self.update_mode_active = False
38
-
39
- def load_llm_model(self, model_info):
40
- """Loads the LLM model and tokenizer based on model_info with 4-bit quantization."""
41
- if not model_info:
42
- error_message = "Error: Model information not provided."
43
- logging.error(error_message)
44
- self.llm_model = None
45
- self.llm_tokenizer = None
46
- return
47
-
48
- model_id = model_info.get('model_id')
49
- model_name = model_info.get('name')
50
- if not model_id:
51
- error_message = f"Error: 'model_id' not found for model: {model_name}"
52
- logging.error(error_message)
53
- self.llm_model = None
54
- self.llm_tokenizer = None
55
- return
56
-
57
- print(f"Loading model: {model_name} ({model_id}) with 4-bit quantization...") # Indicate quantization
58
- try:
59
- bnb_config = BitsAndBytesConfig( # Configure 4-bit quantization
60
- load_in_4bit=True,
61
- bnb_4bit_quant_type="nf4", # "nf4" is recommended for Llama models
62
- bnb_4bit_compute_dtype=torch.bfloat16, # Or torch.float16 if bfloat16 not supported
63
- )
64
- self.llm_tokenizer = AutoTokenizer.from_pretrained(model_id)
65
- self.llm_model = AutoModelForCausalLM.from_pretrained(
66
- model_id,
67
- device_map="auto",
68
- quantization_config=bnb_config # Apply quantization config
69
- )
70
- print(f"Model {model_name} loaded successfully with 4-bit quantization.") # Indicate quantization success
71
- except Exception as e:
72
- error_message = f"Error loading model {model_name} ({model_id}) with 4-bit quantization: {e}"
73
- logging.exception(error_message)
74
- self.llm_model = None
75
- self.llm_tokenizer = None
76
- self.active_model_info = model_info
77
-
78
- def switch_llm_model(self, model_key):
79
- """Switches the active LLM model based on the provided model key."""
80
- if model_key in self.available_models_config:
81
- model_info = self.available_models_config[model_key]
82
- print(f"Switching LLM model to: {model_info.get('name')}")
83
- self.load_llm_model(model_info)
84
- self.active_model_key = model_key
85
- return f"Switched to model: {model_info.get('name')}"
86
- else:
87
- error_message = f"Error: Model key '{model_key}' not found in available models."
88
- logging.error(error_message)
89
- return error_message
90
-
91
- def enter_update_mode(self):
92
- """Enters the chatbot's update mode."""
93
- self.update_mode_active = True
94
- return "Entering update mode. Please enter configuration commands (or 'sagor is python/help' for commands)."
95
-
96
- def exit_update_mode(self):
97
- """Exits the chatbot's update mode and reloads configuration."""
98
- self.update_mode_active = False
99
- self.reload_config()
100
- return "Exiting update mode. Configuration reloaded."
101
-
102
- def reload_config(self):
103
- """Reloads configuration files."""
104
- print("Reloading configuration...")
105
- try:
106
- self.config_data = load_yaml_file(self.config_file)
107
- self.roadmap_data = load_yaml_file(self.roadmap_file)
108
- self.rules_data = load_yaml_file(self.rules_file)
109
- self.chatbot_config = self.config_data.get('chatbot', {}) if self.config_data else {}
110
- self.model_config = self.config_data.get('model_selection', {}) if self.config_data else {}
111
- self.response_config = self.config_data.get('response_generation', {}) if self.config_data else {}
112
- self.available_models_config = self.config_data.get('available_models', {}) if self.config_data else {}
113
- self.max_response_tokens = self.chatbot_config.get('max_response_tokens', 200)
114
- self.phases = get_roadmap_phases(self.roadmap_data)
115
- self.rules = get_project_rules(self.rules_data)
116
- print("Configuration reloaded.")
117
- except Exception as e:
118
- error_message = f"Error reloading configuration files: {e}"
119
- logging.exception(error_message)
120
- print(error_message)
121
-
122
- def get_chatbot_greeting(self):
123
- current_model_name = self.active_model_info.get('name', 'Unknown Model') if self.active_model_info else 'Unknown Model'
124
- return f"Hello! I am the {self.chatbot_config.get('name', 'Project Guidance Chatbot')}. Currently using **{current_model_name}** (4-bit quantized). Max response tokens: {self.max_response_tokens}. {self.chatbot_config.get('description', 'How can I help you with your project?')}" # Indicate quantization in greeting
125
-
126
- def generate_llm_response(self, user_query):
127
- """Generates a response using the currently active LLM."""
128
- if not self.llm_model or not self.llm_tokenizer:
129
- error_message = "LLM model not loaded. Please select a model."
130
- logging.error(error_message)
131
- return error_message
132
- try:
133
- inputs = self.llm_tokenizer(user_query, return_tensors="pt").to(self.llm_model.device)
134
- outputs = self.llm_model.generate(**inputs, max_length=self.max_response_tokens, num_beams=5, no_repeat_ngram_size=2, early_stopping=True)
135
- response = self.llm_tokenizer.decode(outputs[0], skip_special_tokens=True)
136
- return response
137
- except Exception as e:
138
- error_message = f"Error generating LLM response: {e}"
139
- logging.exception(error_message)
140
- return self.response_config.get('error_message', "Sorry, something went wrong.")
141
-
142
- def process_query(self, user_query):
143
- secret_command_prefix = "sagor is python/"
144
-
145
- if user_query.startswith(secret_command_prefix):
146
- command = user_query[len(secret_command_prefix):].strip()
147
- if command == "start":
148
- return self.enter_update_mode()
149
- elif command == "exit":
150
- return self.exit_update_mode()
151
- elif command == "help":
152
- return self.get_update_mode_help()
153
- elif self.update_mode_active:
154
- parts = command.split()
155
- if parts and parts[0] == "modify_rule":
156
- if len(parts) >= 4:
157
- category = parts[1]
158
- rule_key = parts[2]
159
- new_rule_text_parts = parts[3:]
160
- new_rule_text = " ".join(new_rule_text_parts)
161
- return self.modify_rule_in_config(category, rule_key, new_rule_text)
162
- else:
163
- return "Invalid 'modify_rule' command format. Usage: modify_rule <category> <rule_key> <new rule text>"
164
- elif parts and parts[0] == "add_phase":
165
- return "Command 'add_phase' is not yet implemented in this example."
166
- else:
167
- return f"Unknown update command: {command}. Type 'sagor is python/help' for commands."
168
- else:
169
- return f"Unknown command (not in update mode): {command}. To enter update mode, type 'sagor is python/start'."
170
-
171
- if self.update_mode_active:
172
- return "In update mode. Please enter a configuration command (or 'sagor is python/help' for commands)."
173
-
174
- if not self.phases:
175
- return "Error: Roadmap data not loaded correctly."
176
- if not self.rules:
177
- return "Error: Rules data not loaded correctly."
178
-
179
- query_lower = user_query.lower()
180
-
181
- if "roadmap" in query_lower:
182
- return self.get_roadmap_summary()
183
- if "rules" in query_lower:
184
- return self.get_rules_summary()
185
- if "switch model to" in query_lower:
186
- parts = user_query.lower().split("switch model to")
187
- if len(parts) > 1:
188
- model_name_or_key = parts[1].strip()
189
- for key, model_data in self.available_models_config.items():
190
- if model_data['name'].lower() == model_name_or_key.lower() or key.lower() == model_name_or_key.lower():
191
- switch_result = self.switch_llm_model(key)
192
- return switch_result + "\n" + self.get_chatbot_greeting()
193
- return f"Model '{model_name_or_key}' not found in available models."
194
-
195
- if self.current_phase:
196
- current_phase_data = self.phases.get(self.current_phase)
197
- if current_phase_data:
198
- phase_response = self.get_phase_guidance(current_phase_data, user_query)
199
- if phase_response:
200
- return phase_response
201
-
202
- for phase_key, phase_data in self.phases.items():
203
- if phase_data['name'].lower() in query_lower:
204
- self.current_phase = phase_key
205
- return self.get_phase_introduction(phase_data)
206
-
207
- llm_response = self.generate_llm_response(user_query)
208
- if llm_response:
209
- return llm_response
210
-
211
- return self.response_config.get('default_instruction', "How can I help you with your project phases or general questions?")
212
-
213
- def get_update_mode_help(self):
214
- """Provides help message for update mode commands."""
215
- help_message = "Update Mode Commands:\n"
216
- help_message += "- `sagor is python/exit`: Exit update mode and reload configuration.\n"
217
- help_message += "- `sagor is python/modify_rule <category> <rule_key> <new rule text>`: Modify a rule in rules.yaml.\n"
218
- help_message += " Example: `sagor is python/modify_rule general rule_1 Prioritize open and responsible AI.`\n"
219
- help_message += "- `sagor is python/add_phase ...`: (Not yet implemented) Add a new phase to roadmap.yaml.\n"
220
- help_message += "- `sagor is python/help`: Show this help message.\n"
221
- help_message += "\nMake sure to use the correct syntax for commands. After exiting update mode, the chatbot will reload the configuration."
222
- return help_message
223
-
224
- def modify_rule_in_config(self, category, rule_key, new_rule_text):
225
- """Modifies a rule in the rules.yaml configuration."""
226
- if not self.rules_data or 'project_rules' not in self.rules_data:
227
- error_message = "Error: Rules data not loaded or invalid format."
228
- logging.error(error_message)
229
- return error_message
230
- if category not in self.rules_data['project_rules']:
231
- error_message = f"Error: Rule category '{category}' not found."
232
- logging.error(error_message)
233
- return error_message
234
- if rule_key not in self.rules_data['project_rules'][category]:
235
- error_message = f"Error: Rule key '{rule_key}' not found in category '{category}'."
236
- logging.error(error_message)
237
- return error_message
238
-
239
- self.rules_data['project_rules'][category][rule_key] = new_rule_text
240
-
241
- try:
242
- with open(self.rules_file, 'w') as f:
243
- yaml.dump(self.rules_data, f, indent=2)
244
- self.reload_config()
245
- return f"Rule '{rule_key}' in category '{category}' updated to: '{new_rule_text}'. Configuration reloaded."
246
- except Exception as e:
247
- error_message = f"Error saving changes to {self.rules_file}: {e}"
248
- logging.exception(error_message)
249
- return error_message
250
-
251
- def get_roadmap_summary(self):
252
- summary = "Project Roadmap:\n"
253
- for phase_key, phase_data in self.phases.items():
254
- summary += f"- **Phase: {phase_data['name']}**\n"
255
- summary += f" Description: {phase_data['description']}\n"
256
- summary += f" Milestones: {', '.join(phase_data['milestones'])}\n"
257
- return summary
258
-
259
- def get_rules_summary(self):
260
- summary = "Project Rules:\n"
261
- for rule_category, rules_list in self.rules.items():
262
- summary += f"**{rule_category.capitalize()} Rules:**\n"
263
- for rule_key, rule_text in rules_list.items():
264
- summary += f"- {rule_text}\n"
265
- return summary
266
-
267
- def get_phase_introduction(self, phase_data):
268
- return f"Okay, let's focus on **Phase: {phase_data['name']}**. \nDescription: {phase_data['description']}. \nKey milestones are: {', '.join(phase_data['milestones'])}. \nWhat would you like to know or do in this phase?"
269
-
270
- def get_phase_guidance(self, phase_data, user_query):
271
- query_lower = user_query.lower()
272
-
273
- if "milestones" in query_lower:
274
- return "The milestones for this phase are: " + ", ".join(phase_data['milestones'])
275
- if "actions" in query_lower or "how to" in query_lower:
276
- if 'actions' in phase_data:
277
- return "Recommended actions for this phase: " + ", ".join(phase_data['actions'])
278
- else:
279
- return "No specific actions are listed for this phase in the roadmap."
280
- if "code" in query_lower or "script" in query_lower:
281
- if 'code_generation_hint' in phase_data:
282
- template_filename_prefix = phase_data['name'].lower().replace(" ", "_")
283
- template_filepath = os.path.join(self.code_templates_dir, f"{template_filename_prefix}_template.py.txt")
284
- if os.path.exists(template_filepath):
285
- code_snippet = self.generate_code_snippet(template_filepath, phase_data)
286
- return "Here's a starting code snippet for this phase:\n\n```python\n" + code_snippet + "\n```\n\nRemember to adapt it to your specific needs."
287
- else:
288
- return f"A code template for this phase ({phase_data['name']}) is not yet available. However, the hint is: {phase_data['code_generation_hint']}"
289
- else:
290
- return "No code generation hint is available for this phase."
291
-
292
- return f"For phase '{phase_data['name']}', remember the description: {phase_data['description']}. Consider the milestones and actions. What specific aspect are you interested in?"
293
-
294
- def generate_code_snippet(self, template_filepath, phase_data):
295
- """Generates code snippet from a template file. (Simple template filling example)"""
296
- try:
297
- with open(template_filepath, 'r') as f:
298
- template_content = f.read()
299
-
300
- code_snippet = template_content.replace("{{phase_name}}", phase_data['name'])
301
- return code_snippet
302
- except FileNotFoundError:
303
- return f"Error: Code template file not found at {template_filepath}"
304
- except Exception as e:
305
- return f"Error generating code snippet: {e}"
306
-
307
- # Example usage (for testing - remove or adjust for app.py)
308
- if __name__ == '__main__':
309
- chatbot = ProjectGuidanceChatbot(
310
- roadmap_file="roadmap.yaml",
311
- rules_file="rules.yaml",
312
- config_file="configs/chatbot_config.yaml",
313
- code_templates_dir="scripts/code_templates"
314
- )
315
- print(chatbot.get_chatbot_greeting())
316
-
317
- while True:
318
- user_input = input("You: ")
319
- if user_input.lower() == "exit":
320
- break
321
- response = chatbot.process_query(user_input)
322
- print("Chatbot:", response)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Dockerfile DELETED
File without changes
README.md DELETED
@@ -1,68 +0,0 @@
1
- ---
2
- title: Chatbot for Project Guidance
3
- emoji: 📚
4
- colorFrom: yellow
5
- colorTo: indigo
6
- sdk: gradio
7
- sdk_version: 5.14.0
8
- app_file: app.py
9
- pinned: false
10
- short_description: Custom AI Chatbot for Project Guidance
11
- license: osl-3.0
12
- environment: conda
13
- ---
14
- # Custom AI Chatbot for Project Guidance
15
-
16
- This project implements a custom AI chatbot designed to guide users through complex projects based on predefined roadmaps and rules.
17
-
18
- **Features:**
19
-
20
- * **Roadmap-based Guidance:** Follows a structured roadmap defined in `roadmap.yaml`.
21
- * **Rule Enforcement:** Adheres to project rules defined in `rules.yaml`.
22
- * **Dynamic Response Generation:** Provides context-aware and step-by-step guidance.
23
- * **Code Snippet Generation:** Generates complete code snippets for project phases using templates.
24
- * **LLM Selection:** Integrates with Hugging Face Hub for flexible LLM selection (DeepSeek and Gemini models).
25
- * **Model Switching:** Allows users to switch between available LLMs via the UI.
26
- * **Basic LLM Responses:** Generates responses using the selected LLM for general queries.
27
- * **Token Control:** Limits LLM response length using `max_response_tokens` in `configs/chatbot_config.yaml`.
28
- * **Configuration Update Mode:** Allows authorized users to modify chatbot configuration (rules) via chat commands in a special "update mode".
29
- * **Error Handling:** Includes error handling for model loading, switching, and configuration updates, with UI warnings.
30
- * **Deployable on Hugging Face Spaces:** Built using Gradio for easy deployment.
31
-
32
- **Important Notes on Safety Settings and Self-Update:**
33
-
34
- * **Direct Safety Configuration Limited:** For the Hugging Face models used directly via `transformers` (DeepSeek and Gemini Flash), there are **no easily configurable, standardized "safety settings"**. Safety is model-dependent.
35
- * **Model-Dependent Safety:** Safety behavior is primarily determined by how these models were trained.
36
- * **Configuration-Based Self-Update:** The "self-update" feature is implemented as a *configuration modification* system. It allows authorized users to change the chatbot's rules by modifying the `rules.yaml` file via chat commands in "update mode." **It does not modify the core Python code itself.**
37
- * **Security Warning:** The "self-update" feature, even in its configuration-based form, introduces security considerations. **Secret commands are NOT a robust security measure.** In a real deployment, stronger authentication and authorization would be essential. Use this feature with caution and only in trusted environments.
38
-
39
- **Getting Started:**
40
-
41
- 1. **Clone this repository.**
42
- 2. **Install dependencies:** `pip install -r requirements.txt`
43
- 3. **Customize `roadmap.yaml` and `rules.yaml`** to define your project guidance.
44
- 4. **Configure `configs/chatbot_config.yaml`** to set up LLMs, token limits, and chatbot behavior.
45
- 5. **Run the Gradio app:** `python app.py`
46
- 6. **To enter update mode:** Type `sagor is python/start` in the chat.
47
- 7. **To get help in update mode:** Type `sagor is python/help`.
48
- 8. **To exit update mode:** Type `sagor is python/exit`.
49
- 9. **Deploy to Hugging Face Spaces** (refer to Hugging Face Spaces documentation).
50
-
51
- **Available Models:**
52
-
53
- * DeepSeek-R1-Distill-Llama-8B
54
- * Gemini 2.0 Flash (Exp 01-21)
55
-
56
- **Further Development:**
57
-
58
- * Enhance LLM response generation for more context-aware and project-specific guidance.
59
- * Implement more sophisticated state management to track user progress through the roadmap.
60
- * Improve code generation with more dynamic templates and customization options.
61
- * Develop a more advanced GUI or web-based interface for configuration management.
62
- * Add more LLMs to the selection pool.
63
- * Implement more robust error handling and logging.
64
- * Explore and potentially integrate keyword-based output filtering for basic safety control.
65
- * Investigate using commercial LLM APIs for more advanced safety settings and control.
66
- * **Improve security and authorization for the configuration update mode.**
67
-
68
- **License:** [Your License]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app.py DELETED
@@ -1,49 +0,0 @@
1
- import gradio as gr
2
- from scripts.chatbot_logic import ProjectGuidanceChatbot
3
-
4
- # Initialize Chatbot
5
- chatbot = ProjectGuidanceChatbot(
6
- roadmap_file="roadmap.yaml",
7
- rules_file="rules.yaml",
8
- config_file="configs/chatbot_config.yaml",
9
- code_templates_dir="scripts/code_templates"
10
- )
11
-
12
- def respond(message, chat_history):
13
- bot_message = chatbot.process_query(message)
14
- chat_history.append((message, bot_message))
15
- return "", chat_history
16
-
17
- def switch_model(model_key):
18
- model_switch_result = chatbot.switch_llm_model(model_key) # Get result message
19
- greeting_message = chatbot.get_chatbot_greeting()
20
-
21
- if isinstance(model_switch_result, str) and "Error:" in model_switch_result: # Check if result is an error string
22
- return gr.Warning(model_switch_result), greeting_message # Display error as Gradio Warning
23
- else:
24
- return None, greeting_message # No warning, just update greeting
25
-
26
- with gr.Blocks() as demo:
27
- chatbot_greeting_md = gr.Markdown(chatbot.get_chatbot_greeting())
28
- gr.Markdown(f"# {chatbot.chatbot_config.get('name', 'Project Guidance Chatbot')}")
29
-
30
- model_choices = [(model['name'], key) for key, model in chatbot.available_models_config.items()] # Updated choices to include FLAN-T5 and Gemini
31
- model_dropdown = gr.Dropdown(
32
- choices=model_choices,
33
- value=chatbot.active_model_info['name'] if chatbot.active_model_info else None,
34
- label="Select LLM Model"
35
- )
36
- model_error_output = gr.Warning(visible=False) # Initially hidden warning component
37
- model_dropdown.change(
38
- fn=switch_model,
39
- inputs=model_dropdown,
40
- outputs=[model_error_output, chatbot_greeting_md] # Output both warning and greeting
41
- )
42
-
43
- chatbot_ui = gr.Chatbot()
44
- msg = gr.Textbox()
45
- clear = gr.ClearButton([msg, chatbot_ui])
46
-
47
- msg.submit(respond, [msg, chatbot_ui], [msg, chatbot_ui])
48
-
49
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
conda.yaml DELETED
@@ -1,19 +0,0 @@
1
- name: chatbot-env
2
- channels:
3
- - pytorch
4
- - conda-forge
5
- - defaults
6
- dependencies:
7
- - python=3.10
8
- - gradio
9
- - pyyaml
10
- - transformers
11
- - pytorch::torch>=2.0.0 torchvision torchaudio pytorch-cuda=11.8 -c pytorch
12
- - accelerate>=0.26.0
13
- - bitsandbytes
14
-
15
- # --- Force environment recreation on each build (Less efficient - use with caution) ---
16
- # This section is NOT standard Conda practice for every startup, but can be used for debugging
17
- run:
18
- - conda env remove -n chatbot-env --yes # Remove existing environment (if it exists)
19
- - conda env create -f conda.yaml --force --yes # Recreate environment from scratch
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
configs/chatbot_config.yaml DELETED
@@ -1,26 +0,0 @@
1
- chatbot:
2
- name: "Project Guidance Chatbot"
3
- description: "Your helpful AI assistant for project completion with LLM selection and token control."
4
- default_llm_model_id: "flan-t5-xl" # Setting FLAN-T5-XL as default
5
-
6
- available_models:
7
- deepseek-r1-distill-llama-8b:
8
- name: "DeepSeek-R1-Distill-Llama-8B"
9
- model_id: "DeepSeek-AI/DeepSeek-R1-Distill-Llama-8B"
10
- gemini-flash-01-21:
11
- name: "Gemini 2.0 Flash (Exp 01-21)"
12
- model_id: "google/gemini-2.0-flash-thinking-exp-01-21"
13
- flan-t5-xl: # Adding FLAN-T5-XL
14
- name: "FLAN-T5 XL"
15
- model_id: "google/flan-t5-xl"
16
-
17
- model_selection:
18
- suggested_models:
19
- - "mistralai/Mistral-7B-Instruct-v0.2"
20
- - "google/flan-t5-xl"
21
- - "facebook/bart-large"
22
- criteria_prompt: "Consider these criteria when selecting a model: {rules.model_selection}"
23
-
24
- response_generation:
25
- error_message: "Sorry, I encountered an issue. Please check your input and project files."
26
- default_instruction: "How can I help you with your project?"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
roadmap.yaml DELETED
@@ -1,131 +0,0 @@
1
- project_name: "Custom LLM Project Guidance"
2
- roadmap:
3
- phase_1:
4
- name: "Base Model Selection"
5
- description: "Choose the appropriate pre-trained Large Language Model for the project."
6
- milestones:
7
- - "Research available models on Hugging Face Hub and other repositories."
8
- - "Evaluate models based on project requirements (efficiency, scalability, fine-tunability, licensing)."
9
- - "Shortlist models: Mistral 7B, Mixtral 8x7B, DeepSeek 67B, LLaMA 2/3."
10
- - "Document model selection rationale in `models/selected_model.txt`."
11
- actions:
12
- - "Set up the development environment using PyTorch and Hugging Face Transformers."
13
- - "Implement LoRA/QLoRA for memory-efficient fine-tuning."
14
- - "Verify model licensing compliance for research and commercial use."
15
- dependencies:
16
- - "Hugging Face Hub API access."
17
- - "PyTorch and Hugging Face Transformers libraries installed."
18
- deliverables:
19
- - "`models/selected_model.txt`: Document with model selection rationale."
20
- - "`scripts/setup_environment.sh`: Script to set up the development environment."
21
- code_generation_hint: "Create a script to download and load the selected model."
22
-
23
- phase_2:
24
- name: "Fine-Tuning & De-Alignment"
25
- description: "Remove RLHF alignment layers to allow unrestricted model responses."
26
- milestones:
27
- - "Curate and preprocess datasets supporting freedom of expression (e.g., cybersecurity, blockchain, Web3)."
28
- - "Fine-tune the model using LoRA/QLoRA techniques."
29
- - "Implement direct parameter editing to bypass built-in safety filters."
30
- - "Validate de-alignment success through response diversity testing."
31
- actions:
32
- - "Prepare datasets in `data/` directory."
33
- - "Use fine-tuning scripts in `scripts/fine_tuning.py`."
34
- - "Validate de-alignment success through response diversity testing."
35
- dependencies:
36
- - "Access to uncensored datasets (e.g., cybersecurity, blockchain, Web3)."
37
- - "LoRA/QLoRA libraries installed."
38
- deliverables:
39
- - "`data/`: Directory containing curated datasets."
40
- - "`scripts/fine_tuning.py`: Script for fine-tuning the model."
41
- - "`results/fine_tuning_results.txt`: Document with fine-tuning results."
42
- code_generation_hint: "Include LoRA/QLoRA configurations in the fine-tuning script."
43
-
44
- phase_3:
45
- name: "AutoDAN-Turbo Implementation"
46
- description: "Develop an automated system using a Hierarchical Genetic Algorithm (HGA) to generate stealthy jailbreak prompts."
47
- milestones:
48
- - "Design the Genetic Algorithm with seed prompts, mutation, crossover, and selection processes."
49
- - "Define evaluation functions for stealthiness and jailbreak success rate."
50
- - "Test and validate AutoDAN-Turbo across multiple LLMs."
51
- actions:
52
- - "Implement HGA in `scripts/autodan_turbo.py`."
53
- - "Use perplexity-based testing to evaluate prompt quality."
54
- - "Document results in `results/autodan_turbo_tests.txt`."
55
- dependencies:
56
- - "Access to multiple LLMs (e.g., LLaMA, GPT-J) for testing."
57
- - "Genetic Algorithm libraries (e.g., DEAP)."
58
- deliverables:
59
- - "`scripts/autodan_turbo.py`: Script for generating stealthy jailbreak prompts."
60
- - "`results/autodan_turbo_tests.txt`: Document with test results."
61
- code_generation_hint: "Include metrics for stealthiness and jailbreak success in the evaluation script."
62
-
63
- phase_4:
64
- name: "Deployment & Security Considerations"
65
- description: "Deploy the model securely while ensuring high performance and cost efficiency."
66
- milestones:
67
- - "Deploy locally (e.g., vLLM) or via cloud providers like RunPod / Lambda Labs."
68
- - "Implement controlled API access and monitor usage."
69
- - "Optimize performance using quantization techniques (e.g., GPTQ, AWQ)."
70
- actions:
71
- - "Set up deployment scripts in `scripts/deploy.py`."
72
- - "Configure API access controls in `config/api_access.yaml`."
73
- - "Benchmark performance and document results in `results/performance_benchmarks.txt`."
74
- dependencies:
75
- - "Access to cloud providers (e.g., RunPod, Lambda Labs)."
76
- - "Quantization libraries (e.g., GPTQ, AWQ)."
77
- deliverables:
78
- - "`scripts/deploy.py`: Script for deploying the model."
79
- - "`config/api_access.yaml`: Configuration file for API access controls."
80
- - "`results/performance_benchmarks.txt`: Document with performance benchmarks."
81
- code_generation_hint: "Include quantization scripts to reduce VRAM usage."
82
-
83
- phase_5:
84
- name: "Budget & Resource Strategy"
85
- description: "Minimize costs by leveraging trial/free VPS accounts and optimizing resource allocation."
86
- milestones:
87
- - "Use trial/free VPS accounts to minimize expenses."
88
- - "Maximize VPS access using multiple BINs for trial accounts."
89
- - "Monitor performance and adjust deployments based on resource efficiency."
90
- actions:
91
- - "Document VPS account details in `config/vps_accounts.yaml`."
92
- - "Track resource usage in `logs/resource_usage.log`."
93
- dependencies:
94
- - "Access to multiple BINs for creating trial accounts."
95
- - "Monitoring tools for resource usage."
96
- deliverables:
97
- - "`config/vps_accounts.yaml`: Configuration file with VPS account details."
98
- - "`logs/resource_usage.log`: Log file tracking resource usage."
99
- code_generation_hint: "Create a script to automate VPS account creation and monitoring."
100
-
101
- phase_6:
102
- name: "Empowering Creative Idea Generation"
103
- description: "Use the customized LLM as a creative tool for coding, research, and innovation."
104
- milestones:
105
- - "Integrate the LLM into coding environments for rapid prototyping."
106
- - "Encourage creative experimentation and document successful use cases."
107
- - "Share innovative applications for further inspiration."
108
- actions:
109
- - "Develop integration scripts in `scripts/integration.py`."
110
- - "Document use cases in `docs/use_cases.md`."
111
- dependencies:
112
- - "Access to coding environments (e.g., Jupyter Notebook, VS Code)."
113
- - "Creative prompts and workflows for testing."
114
- deliverables:
115
- - "`scripts/integration.py`: Script for integrating the LLM into coding environments."
116
- - "`docs/use_cases.md`: Document with successful use cases."
117
- code_generation_hint: "Include examples of creative prompts and coding workflows."
118
-
119
- expected_outcomes:
120
- - "Fully Customized, Censorship-Free LLM: A robust offline model that answers every question without filtering."
121
- - "Effective Jailbreak System (AutoDAN-Turbo): An automated system generating stealthy jailbreak prompts."
122
- - "Secure & Cost-Effective Deployment: A low-cost, high-security architecture leveraging trial/free VPS resources."
123
- - "Empowered Creativity: A powerful AI for unrestricted ideation, coding, and innovation across multiple industries."
124
-
125
- next_steps:
126
- - "Finalize the base model and development environment."
127
- - "Curate uncensored datasets and begin fine-tuning using de-alignment techniques."
128
- - "Develop and test AutoDAN-Turbo with stealthy jailbreak prompt evaluation."
129
- - "Deploy the model using secure trial/free VPS accounts."
130
- - "Monitor performance, security posture, and resource usage."
131
- - "Encourage creative LLM usage and document innovative projects for continuous improvement."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
rules.yaml DELETED
@@ -1,78 +0,0 @@
1
- project_rules:
2
- general:
3
- rule_1: "Prioritize open-source models and tools whenever possible for transparency and customization."
4
- rule_2: "Document every step of your project, including model selection, fine-tuning parameters, and deployment configurations."
5
- rule_3: "Adhere to ethical guidelines and responsible AI practices throughout the project lifecycle."
6
-
7
- model_selection:
8
- rule_1: "Choose a base model that is open-source, scalable, and efficient."
9
- rule_2: "Ensure the model supports fine-tuning via LoRA/QLoRA for memory efficiency."
10
- rule_3: "Confirm that the model's licensing aligns with both research and commercial use."
11
- rule_4: "Set up the development environment with PyTorch and Hugging Face Transformers."
12
-
13
- fine_tuning:
14
- rule_1: "Specify datasets that promote unrestricted responses and are relevant to the application domain."
15
- rule_2: "Remove or bypass RLHF alignment layers to allow unrestricted responses."
16
- rule_3: "Implement LoRA/QLoRA techniques for efficient parameter modifications."
17
- rule_4: "Use direct parameter editing to bypass built-in safety filters."
18
- rule_5: "Monitor training metrics and validate generalization performance using validation datasets."
19
-
20
- autodan_turbo:
21
- rule_1: "Outline a Hierarchical Genetic Algorithm (HGA) for generating stealthy jailbreak prompts."
22
- rule_2: "Include Genetic Algorithm components: Seed prompts, Mutation, Crossover, and Selection processes."
23
- rule_3: "Define evaluation functions for stealthiness (natural language quality) and jailbreak success rate."
24
- rule_4: "Use perplexity and response analysis to evaluate prompt effectiveness."
25
- rule_5: "Ensure cross-model testing for compatibility with different LLM architectures."
26
-
27
- deployment:
28
- rule_1: "Ensure the model is deployable on both local hardware and cloud services (e.g., RunPod, Lambda Labs)."
29
- rule_2: "Implement controlled API access to monitor and restrict unauthorized usage."
30
- rule_3: "Include security measures such as adversarial attack defenses and rollback strategies (e.g., VM snapshots)."
31
- rule_4: "Optimize performance using quantization techniques (e.g., GPTQ, AWQ)."
32
- rule_5: "Set up monitoring and logging to track model performance and usage in production."
33
-
34
- budget_and_resources:
35
- rule_1: "Outline a strategy for utilizing free/trial VPS accounts to minimize costs."
36
- rule_2: "Define methods to maximize free resources, such as using multiple BINs for trial accounts."
37
- rule_3: "Continuously evaluate performance and cost efficiency during deployment."
38
-
39
- creativity_and_innovation:
40
- rule_1: "Position the LLM as a tool for unrestricted ideation, coding, and research."
41
- rule_2: "Support AI integration in programming environments for rapid prototyping."
42
- rule_3: "Document real-world success cases for iterative improvement and inspiration."
43
-
44
- code_implementation:
45
- rule_1: "Write every code implementation in full without skipping any logic, function, or process."
46
- rule_2: "Provide the entire codebase, including preprocessing, training, evaluation, deployment, and API integration scripts."
47
- rule_3: "Explicitly list all dependencies, including Python libraries, frameworks, and external APIs."
48
- rule_4: "Avoid placeholders or summaries; include all functional parts of the code."
49
-
50
- dataset_and_model_storage:
51
- rule_1: "Store raw datasets in `/data/raw_data.json`."
52
- rule_2: "Store processed datasets in `/data/processed_data.json`."
53
- rule_3: "Save the base model (before fine-tuning) in `/models/base_model/`."
54
- rule_4: "Save the fine-tuned model in `/models/fine_tuned_model/`."
55
-
56
- project_file_structure:
57
- rule_1: "Define a clear and maintainable file structure for the project."
58
- rule_2: "Example structure:"
59
- rule_3: "`/custom-llm-project`"
60
- rule_4: "`│── /data`"
61
- rule_5: "`│ ├── raw_data.json # Raw dataset(s)`"
62
- rule_6: "`│ ├── processed_data.json # Processed dataset(s)`"
63
- rule_7: "`│── /models`"
64
- rule_8: "`│ ├── base_model/ # Base model (before fine-tuning)`"
65
- rule_9: "`│ ├── fine_tuned_model/ # Fine-tuned model (after success)`"
66
- rule_10: "`│── /scripts`"
67
- rule_11: "`│ ├── preprocess.py # Preprocessing script`"
68
- rule_12: "`│ ├── train.py # Training script`"
69
- rule_13: "`│ ├── evaluate.py # Evaluation script`"
70
- rule_14: "`│ ├── deploy.py # Deployment script`"
71
- rule_15: "`│── /api`"
72
- rule_16: "`│ ├── server.py # API server script`"
73
- rule_17: "`│ ├── routes.py # API routes`"
74
- rule_18: "`│── /configs`"
75
- rule_19: "`│ ├── training_config.yaml # Training configuration`"
76
- rule_20: "`│ ├── model_config.json # Model configuration`"
77
- rule_21: "`���── requirements.txt # List of dependencies`"
78
- rule_22: "`│── README.md # Project documentation`"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scripts/chatbot_logic.py DELETED
@@ -1,323 +0,0 @@
1
- from scripts.parsing_utils import load_yaml_file, get_roadmap_phases, get_project_rules
2
- import os
3
- from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
4
- import yaml
5
- import logging
6
- import torch # ADD THIS LINE - Import torch
7
-
8
- logging.basicConfig(level=logging.ERROR,
9
- format='%(asctime)s - %(levelname)s - %(message)s')
10
-
11
- class ProjectGuidanceChatbot:
12
- def __init__(self, roadmap_file, rules_file, config_file, code_templates_dir):
13
- self.roadmap_file = roadmap_file
14
- self.rules_file = rules_file
15
- self.config_file = config_file
16
- self.code_templates_dir = code_templates_dir
17
-
18
- self.roadmap_data = load_yaml_file(self.roadmap_file)
19
- self.rules_data = load_yaml_file(self.rules_file)
20
- self.config_data = load_yaml_file(self.config_file)
21
-
22
- self.phases = get_roadmap_phases(self.roadmap_data)
23
- self.rules = get_project_rules(self.rules_data)
24
- self.chatbot_config = self.config_data.get('chatbot', {}) if self.config_data else {}
25
- self.model_config = self.config_data.get('model_selection', {}) if self.config_data else {}
26
- self.response_config = self.config_data.get('response_generation', {}) if self.config_data else {}
27
- self.available_models_config = self.config_data.get('available_models', {}) if self.config_data else {}
28
- self.max_response_tokens = self.chatbot_config.get('max_response_tokens', 200)
29
-
30
- self.current_phase = None
31
- self.active_model_key = self.chatbot_config.get('default_llm_model_id')
32
- self.active_model_info = self.available_models_config.get(self.active_model_key)
33
-
34
- self.llm_model = None
35
- self.llm_tokenizer = None
36
- self.load_llm_model(self.active_model_info)
37
-
38
- self.update_mode_active = False
39
-
40
- def load_llm_model(self, model_info):
41
- """Loads the LLM model and tokenizer based on model_info with 4-bit quantization."""
42
- if not model_info:
43
- error_message = "Error: Model information not provided."
44
- logging.error(error_message)
45
- self.llm_model = None
46
- self.llm_tokenizer = None
47
- return
48
-
49
- model_id = model_info.get('model_id')
50
- model_name = model_info.get('name')
51
- if not model_id:
52
- error_message = f"Error: 'model_id' not found for model: {model_name}"
53
- logging.error(error_message)
54
- self.llm_model = None
55
- self.llm_tokenizer = None
56
- return
57
-
58
- print(f"Loading model: {model_name} ({model_id}) with 4-bit quantization...") # Indicate quantization
59
- try:
60
- bnb_config = BitsAndBytesConfig( # Configure 4-bit quantization
61
- load_in_4bit=True,
62
- bnb_4bit_quant_type="nf4", # "nf4" is recommended for Llama models
63
- bnb_4bit_compute_dtype=torch.bfloat16, # Or torch.float16 if bfloat16 not supported
64
- )
65
- self.llm_tokenizer = AutoTokenizer.from_pretrained(model_id)
66
- self.llm_model = AutoModelForCausalLM.from_pretrained(
67
- model_id,
68
- device_map="auto",
69
- quantization_config=bnb_config # Apply quantization config
70
- )
71
- print(f"Model {model_name} loaded successfully with 4-bit quantization.") # Indicate quantization success
72
- except Exception as e:
73
- error_message = f"Error loading model {model_name} ({model_id}) with 4-bit quantization: {e}"
74
- logging.exception(error_message)
75
- self.llm_model = None
76
- self.llm_tokenizer = None
77
- self.active_model_info = model_info
78
-
79
- def switch_llm_model(self, model_key):
80
- """Switches the active LLM model based on the provided model key."""
81
- if model_key in self.available_models_config:
82
- model_info = self.available_models_config[model_key]
83
- print(f"Switching LLM model to: {model_info.get('name')}")
84
- self.load_llm_model(model_info)
85
- self.active_model_key = model_key
86
- return f"Switched to model: {model_info.get('name')}"
87
- else:
88
- error_message = f"Error: Model key '{model_key}' not found in available models."
89
- logging.error(error_message)
90
- return error_message
91
-
92
- def enter_update_mode(self):
93
- """Enters the chatbot's update mode."""
94
- self.update_mode_active = True
95
- return "Entering update mode. Please enter configuration commands (or 'sagor is python/help' for commands)."
96
-
97
- def exit_update_mode(self):
98
- """Exits the chatbot's update mode and reloads configuration."""
99
- self.update_mode_active = False
100
- self.reload_config()
101
- return "Exiting update mode. Configuration reloaded."
102
-
103
- def reload_config(self):
104
- """Reloads configuration files."""
105
- print("Reloading configuration...")
106
- try:
107
- self.config_data = load_yaml_file(self.config_file)
108
- self.roadmap_data = load_yaml_file(self.roadmap_file)
109
- self.rules_data = load_yaml_file(self.rules_file)
110
- self.chatbot_config = self.config_data.get('chatbot', {}) if self.config_data else {}
111
- self.model_config = self.config_data.get('model_selection', {}) if self.config_data else {}
112
- self.response_config = self.config_data.get('response_generation', {}) if self.config_data else {}
113
- self.available_models_config = self.config_data.get('available_models', {}) if self.config_data else {}
114
- self.max_response_tokens = self.chatbot_config.get('max_response_tokens', 200)
115
- self.phases = get_roadmap_phases(self.roadmap_data)
116
- self.rules = get_project_rules(self.rules_data)
117
- print("Configuration reloaded.")
118
- except Exception as e:
119
- error_message = f"Error reloading configuration files: {e}"
120
- logging.exception(error_message)
121
- print(error_message)
122
-
123
- def get_chatbot_greeting(self):
124
- current_model_name = self.active_model_info.get('name', 'Unknown Model') if self.active_model_info else 'Unknown Model'
125
- return f"Hello! I am the {self.chatbot_config.get('name', 'Project Guidance Chatbot')}. Currently using **{current_model_name}** (4-bit quantized). Max response tokens: {self.max_response_tokens}. {self.chatbot_config.get('description', 'How can I help you with your project?')}" # Indicate quantization in greeting
126
-
127
- def generate_llm_response(self, user_query):
128
- """Generates a response using the currently active LLM."""
129
- if not self.llm_model or not self.llm_tokenizer:
130
- error_message = "LLM model not loaded. Please select a model."
131
- logging.error(error_message)
132
- return error_message
133
- try:
134
- inputs = self.llm_tokenizer(user_query, return_tensors="pt").to(self.llm_model.device)
135
- outputs = self.llm_model.generate(**inputs, max_length=self.max_response_tokens, num_beams=5, no_repeat_ngram_size=2, early_stopping=True)
136
- response = self.llm_tokenizer.decode(outputs[0], skip_special_tokens=True)
137
- return response
138
- except Exception as e:
139
- error_message = f"Error generating LLM response: {e}"
140
- logging.exception(error_message)
141
- return self.response_config.get('error_message', "Sorry, something went wrong.")
142
-
143
- def process_query(self, user_query):
144
- secret_command_prefix = "sagor is python/"
145
-
146
- if user_query.startswith(secret_command_prefix):
147
- command = user_query[len(secret_command_prefix):].strip()
148
- if command == "start":
149
- return self.enter_update_mode()
150
- elif command == "exit":
151
- return self.exit_update_mode()
152
- elif command == "help":
153
- return self.get_update_mode_help()
154
- elif self.update_mode_active:
155
- parts = command.split()
156
- if parts and parts[0] == "modify_rule":
157
- if len(parts) >= 4:
158
- category = parts[1]
159
- rule_key = parts[2]
160
- new_rule_text_parts = parts[3:]
161
- new_rule_text = " ".join(new_rule_text_parts)
162
- return self.modify_rule_in_config(category, rule_key, new_rule_text)
163
- else:
164
- return "Invalid 'modify_rule' command format. Usage: modify_rule <category> <rule_key> <new rule text>"
165
- elif parts and parts[0] == "add_phase":
166
- return "Command 'add_phase' is not yet implemented in this example."
167
- else:
168
- return f"Unknown update command: {command}. Type 'sagor is python/help' for commands."
169
- else:
170
- return f"Unknown command (not in update mode): {command}. To enter update mode, type 'sagor is python/start'."
171
-
172
- if self.update_mode_active:
173
- return "In update mode. Please enter a configuration command (or 'sagor is python/help' for commands)."
174
-
175
- if not self.phases:
176
- return "Error: Roadmap data not loaded correctly."
177
- if not self.rules:
178
- return "Error: Rules data not loaded correctly."
179
-
180
- query_lower = user_query.lower()
181
-
182
- if "roadmap" in query_lower:
183
- return self.get_roadmap_summary()
184
- if "rules" in query_lower:
185
- return self.get_rules_summary()
186
- if "switch model to" in query_lower:
187
- parts = user_query.lower().split("switch model to")
188
- if len(parts) > 1:
189
- model_name_or_key = parts[1].strip()
190
- for key, model_data in self.available_models_config.items():
191
- if model_data['name'].lower() == model_name_or_key.lower() or key.lower() == model_name_or_key.lower():
192
- switch_result = self.switch_llm_model(key)
193
- return switch_result + "\n" + self.get_chatbot_greeting()
194
- return f"Model '{model_name_or_key}' not found in available models."
195
-
196
- if self.current_phase:
197
- current_phase_data = self.phases.get(self.current_phase)
198
- if current_phase_data:
199
- phase_response = self.get_phase_guidance(current_phase_data, user_query)
200
- if phase_response:
201
- return phase_response
202
-
203
- for phase_key, phase_data in self.phases.items():
204
- if phase_data['name'].lower() in query_lower:
205
- self.current_phase = phase_key
206
- return self.get_phase_introduction(phase_data)
207
-
208
- llm_response = self.generate_llm_response(user_query)
209
- if llm_response:
210
- return llm_response
211
-
212
- return self.response_config.get('default_instruction', "How can I help you with your project phases or general questions?")
213
-
214
- def get_update_mode_help(self):
215
- """Provides help message for update mode commands."""
216
- help_message = "Update Mode Commands:\n"
217
- help_message += "- `sagor is python/exit`: Exit update mode and reload configuration.\n"
218
- help_message += "- `sagor is python/modify_rule <category> <rule_key> <new rule text>`: Modify a rule in rules.yaml.\n"
219
- help_message += " Example: `sagor is python/modify_rule general rule_1 Prioritize open and responsible AI.`\n"
220
- help_message += "- `sagor is python/add_phase ...`: (Not yet implemented) Add a new phase to roadmap.yaml.\n"
221
- help_message += "- `sagor is python/help`: Show this help message.\n"
222
- help_message += "\nMake sure to use the correct syntax for commands. After exiting update mode, the chatbot will reload the configuration."
223
- return help_message
224
-
225
- def modify_rule_in_config(self, category, rule_key, new_rule_text):
226
- """Modifies a rule in the rules.yaml configuration."""
227
- if not self.rules_data or 'project_rules' not in self.rules_data:
228
- error_message = "Error: Rules data not loaded or invalid format."
229
- logging.error(error_message)
230
- return error_message
231
- if category not in self.rules_data['project_rules']:
232
- error_message = f"Error: Rule category '{category}' not found."
233
- logging.error(error_message)
234
- return error_message
235
- if rule_key not in self.rules_data['project_rules'][category]:
236
- error_message = f"Error: Rule key '{rule_key}' not found in category '{category}'."
237
- logging.error(error_message)
238
- return error_message
239
-
240
- self.rules_data['project_rules'][category][rule_key] = new_rule_text
241
-
242
- try:
243
- with open(self.rules_file, 'w') as f:
244
- yaml.dump(self.rules_data, f, indent=2)
245
- self.reload_config()
246
- return f"Rule '{rule_key}' in category '{category}' updated to: '{new_rule_text}'. Configuration reloaded."
247
- except Exception as e:
248
- error_message = f"Error saving changes to {self.rules_file}: {e}"
249
- logging.exception(error_message)
250
- return error_message
251
-
252
- def get_roadmap_summary(self):
253
- summary = "Project Roadmap:\n"
254
- for phase_key, phase_data in self.phases.items():
255
- summary += f"- **Phase: {phase_data['name']}**\n"
256
- summary += f" Description: {phase_data['description']}\n"
257
- summary += f" Milestones: {', '.join(phase_data['milestones'])}\n"
258
- return summary
259
-
260
- def get_rules_summary(self):
261
- summary = "Project Rules:\n"
262
- for rule_category, rules_list in self.rules.items():
263
- summary += f"**{rule_category.capitalize()} Rules:**\n"
264
- for rule_key, rule_text in rules_list.items():
265
- summary += f"- {rule_text}\n"
266
- return summary
267
-
268
- def get_phase_introduction(self, phase_data):
269
- return f"Okay, let's focus on **Phase: {phase_data['name']}**. \nDescription: {phase_data['description']}. \nKey milestones are: {', '.join(phase_data['milestones'])}. \nWhat would you like to know or do in this phase?"
270
-
271
- def get_phase_guidance(self, phase_data, user_query):
272
- query_lower = user_query.lower()
273
-
274
- if "milestones" in query_lower:
275
- return "The milestones for this phase are: " + ", ".join(phase_data['milestones'])
276
- if "actions" in query_lower or "how to" in query_lower:
277
- if 'actions' in phase_data:
278
- return "Recommended actions for this phase: " + ", ".join(phase_data['actions'])
279
- else:
280
- return "No specific actions are listed for this phase in the roadmap."
281
- if "code" in query_lower or "script" in query_lower:
282
- if 'code_generation_hint' in phase_data:
283
- template_filename_prefix = phase_data['name'].lower().replace(" ", "_")
284
- template_filepath = os.path.join(self.code_templates_dir, f"{template_filename_prefix}_template.py.txt")
285
- if os.path.exists(template_filepath):
286
- code_snippet = self.generate_code_snippet(template_filepath, phase_data)
287
- return "Here's a starting code snippet for this phase:\n\n```python\n" + code_snippet + "\n```\n\nRemember to adapt it to your specific needs."
288
- else:
289
- return f"A code template for this phase ({phase_data['name']}) is not yet available. However, the hint is: {phase_data['code_generation_hint']}"
290
- else:
291
- return "No code generation hint is available for this phase."
292
-
293
- return f"For phase '{phase_data['name']}', remember the description: {phase_data['description']}. Consider the milestones and actions. What specific aspect are you interested in?"
294
-
295
- def generate_code_snippet(self, template_filepath, phase_data):
296
- """Generates code snippet from a template file. (Simple template filling example)"""
297
- try:
298
- with open(template_filepath, 'r') as f:
299
- template_content = f.read()
300
-
301
- code_snippet = template_content.replace("{{phase_name}}", phase_data['name'])
302
- return code_snippet
303
- except FileNotFoundError:
304
- return f"Error: Code template file not found at {template_filepath}"
305
- except Exception as e:
306
- return f"Error generating code snippet: {e}"
307
-
308
- # Example usage (for testing - remove or adjust for app.py)
309
- if __name__ == '__main__':
310
- chatbot = ProjectGuidanceChatbot(
311
- roadmap_file="roadmap.yaml",
312
- rules_file="rules.yaml",
313
- config_file="configs/chatbot_config.yaml",
314
- code_templates_dir="scripts/code_templates"
315
- )
316
- print(chatbot.get_chatbot_greeting())
317
-
318
- while True:
319
- user_input = input("You: ")
320
- if user_input.lower() == "exit":
321
- break
322
- response = chatbot.process_query(user_input)
323
- print("Chatbot:", response)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scripts/code_templates/api_template.py.txt DELETED
@@ -1,60 +0,0 @@
1
- # Template for API integration script for {{phase_name}} (using Flask example)
2
-
3
- from flask import Flask, request, jsonify
4
- from transformers import AutoModelForSequenceClassification, AutoTokenizer
5
- import torch # Example PyTorch
6
-
7
- app = Flask(__name__)
8
-
9
- # --- Model and Tokenizer Loading ---
10
- model_name = "models/fine_tuned_model" # Replace with your actual model path
11
- tokenizer_name = "bert-base-uncased" # Replace with the tokenizer used for training, likely the base model tokenizer
12
- try:
13
- tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
14
- model = AutoModelForSequenceClassification.from_pretrained(model_name)
15
- print("Model and tokenizer loaded successfully.")
16
- model.eval() # Set model to evaluation mode
17
- except Exception as e:
18
- print(f"Error loading model or tokenizer: {e}")
19
- tokenizer = None
20
- model = None
21
-
22
-
23
- @app.route('/predict', methods=['POST'])
24
- def predict():
25
- if not tokenizer or not model:
26
- return jsonify({"error": "Model or tokenizer not loaded."}), 500
27
-
28
- try:
29
- data = request.get_json()
30
- text = data.get('text')
31
-
32
- if not text:
33
- return jsonify({"error": "No text input provided."}), 400
34
-
35
- inputs = tokenizer(text, padding=True, truncation=True, return_tensors="pt") # Tokenize input text
36
-
37
- with torch.no_grad(): # Inference mode
38
- outputs = model(**inputs)
39
- logits = outputs.logits
40
- predicted_class_id = torch.argmax(logits, dim=-1).item() # Get predicted class
41
-
42
- # --- Map class ID to label (if applicable) ---
43
- # Example for binary classification (class 0 and 1)
44
- labels = ["Negative", "Positive"] # Replace with your actual labels
45
- predicted_label = labels[predicted_class_id] if predicted_class_id < len(labels) else f"Class {predicted_class_id}"
46
-
47
-
48
- return jsonify({"prediction": predicted_label, "class_id": predicted_class_id})
49
-
50
- except Exception as e:
51
- print(f"Prediction error: {e}")
52
- return jsonify({"error": "Error during prediction."}), 500
53
-
54
- @app.route('/', methods=['GET'])
55
- def health_check():
56
- return jsonify({"status": "API is healthy"}), 200
57
-
58
-
59
- if __name__ == '__main__':
60
- app.run(debug=False, host='0.0.0.0', port=5000) # Run Flask app
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scripts/code_templates/evaluation_template.py.txt DELETED
@@ -1,67 +0,0 @@
1
- # Template for model evaluation script for {{phase_name}}
2
-
3
- from transformers import AutoModelForSequenceClassification, AutoTokenizer
4
- from datasets import load_dataset # Example datasets library
5
- from sklearn.metrics import accuracy_score, classification_report # Example metrics
6
- import torch # Example PyTorch
7
- # Add other necessary imports
8
-
9
- def evaluate_model(model_path, dataset_path, model_name="bert-base-uncased"):
10
- """
11
- Evaluates a trained model on a dataset.
12
- """
13
- try:
14
- # Load dataset for evaluation (replace with your actual dataset loading)
15
- dataset = load_dataset('csv', data_files=dataset_path) # Example: CSV dataset loading, replace with your dataset format
16
-
17
- print("Evaluation dataset loaded. Loading model and tokenizer...")
18
-
19
- tokenizer = AutoTokenizer.from_pretrained(model_name) # Use base model tokenizer (or fine-tuned tokenizer if saved separately)
20
- model = AutoModelForSequenceClassification.from_pretrained(model_path)
21
-
22
- def tokenize_function(examples):
23
- return tokenizer(examples["text_column"], padding="max_length", truncation=True) # Example: tokenize 'text_column'
24
-
25
- tokenized_datasets = dataset.map(tokenize_function, batched=True)
26
-
27
- def compute_metrics(eval_pred):
28
- predictions, labels = eval_pred
29
- predictions = predictions.argmax(axis=-1)
30
- accuracy = accuracy_score(labels, predictions)
31
- report = classification_report(labels, predictions, output_dict=True) # Detailed report
32
- return {"accuracy": accuracy, "classification_report": report}
33
-
34
- training_args = TrainingArguments(
35
- output_dir="./evaluation_results",
36
- per_device_eval_batch_size=64,
37
- logging_dir='./eval_logs',
38
- )
39
-
40
- trainer = Trainer(
41
- model=model,
42
- args=training_args,
43
- eval_dataset=tokenized_datasets["validation"], # Assuming 'validation' split exists
44
- compute_metrics=compute_metrics,
45
- tokenizer=tokenizer
46
- )
47
-
48
- evaluation_results = trainer.evaluate()
49
-
50
- print("Model evaluation completed.")
51
- print("Evaluation Results:")
52
- print(f"Accuracy: {evaluation_results['eval_accuracy']}")
53
- print("Classification Report:\n", evaluation_results['eval_classification_report'])
54
-
55
-
56
- except FileNotFoundError:
57
- print(f"Error: Dataset file or model files not found.")
58
- except Exception as e:
59
- print(f"Error during model evaluation: {e}")
60
-
61
-
62
- if __name__ == "__main__":
63
- model_filepath = "models/fine_tuned_model" # Replace with your model path
64
- evaluation_data_filepath = "data/evaluation_dataset.csv" # Replace with your evaluation data path
65
- base_model_name = "bert-base-uncased" # Replace with your base model name
66
-
67
- evaluate_model(model_filepath, evaluation_data_filepath, model_name=base_model_name)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scripts/code_templates/preprocessing_template.py.txt DELETED
@@ -1,44 +0,0 @@
1
- # Template for data preprocessing script for {{phase_name}}
2
-
3
- import pandas as pd
4
- # Add other necessary imports
5
-
6
- def preprocess_data(raw_data_path, processed_data_path):
7
- """
8
- Reads raw data, preprocesses it, and saves the processed data.
9
- """
10
- try:
11
- # Load raw data (replace with your actual data loading)
12
- data = pd.read_csv(raw_data_path) # Example: CSV loading
13
-
14
- print("Data loaded successfully. Starting preprocessing...")
15
-
16
- # --- Data Preprocessing Steps ---
17
- # Example steps (customize based on your data and project)
18
-
19
- # 1. Handle missing values
20
- data = data.fillna(0) # Example: fill NaN with 0
21
-
22
- # 2. Feature engineering (example: create a new feature)
23
- data['feature_length'] = data['text_column'].str.len() # Example: length of text column
24
-
25
- # 3. Text cleaning (if applicable - example: lowercasing)
26
- if 'text_column' in data.columns:
27
- data['text_column'] = data['text_column'].str.lower()
28
-
29
- # --- End of Preprocessing Steps ---
30
-
31
- # Save processed data
32
- data.to_csv(processed_data_path, index=False)
33
- print(f"Processed data saved to {processed_data_path}")
34
-
35
- except FileNotFoundError:
36
- print(f"Error: Raw data file not found at {raw_data_path}")
37
- except Exception as e:
38
- print(f"Error during data preprocessing: {e}")
39
-
40
- if __name__ == "__main__":
41
- raw_data_filepath = "data/raw_dataset.csv" # Replace with your raw data path
42
- processed_data_filepath = "data/processed_dataset.csv" # Replace with your desired output path
43
-
44
- preprocess_data(raw_data_filepath, processed_data_filepath)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scripts/code_templates/training_template.py.txt DELETED
@@ -1,58 +0,0 @@
1
- # Template for model training script for {{phase_name}}
2
-
3
- from transformers import AutoModelForSequenceClassification, AutoTokenizer, TrainingArguments, Trainer
4
- from datasets import load_dataset # Example - datasets library
5
- import torch # Example - PyTorch
6
- # Add other necessary imports
7
-
8
- def train_model(processed_dataset_path, model_name="bert-base-uncased", output_dir="./model_output"):
9
- """
10
- Trains a model on the processed dataset.
11
- """
12
- try:
13
- # Load processed dataset (replace with your actual dataset loading)
14
- dataset = load_dataset('csv', data_files=processed_dataset_path) # Example: CSV dataset loading, replace with your dataset format
15
-
16
- print("Dataset loaded. Preparing model and training...")
17
-
18
- tokenizer = AutoTokenizer.from_pretrained(model_name)
19
- model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2) # Example: binary classification
20
-
21
- def tokenize_function(examples):
22
- return tokenizer(examples["text_column"], padding="max_length", truncation=True) # Example: tokenize 'text_column'
23
-
24
- tokenized_datasets = dataset.map(tokenize_function, batched=True)
25
-
26
- training_args = TrainingArguments(
27
- output_dir=output_dir,
28
- num_train_epochs=3, # Example epochs
29
- per_device_train_batch_size=16, # Example batch size
30
- per_device_eval_batch_size=64, # Example batch size
31
- warmup_steps=500, # Example warmup steps
32
- weight_decay=0.01, # Example weight decay
33
- logging_dir='./logs', # Directory for logs
34
- logging_steps=10,
35
- )
36
-
37
- trainer = Trainer(
38
- model=model,
39
- args=training_args,
40
- train_dataset=tokenized_datasets["train"], # Assuming 'train' split exists
41
- eval_dataset=tokenized_datasets["validation"], # Assuming 'validation' split exists - optional
42
- tokenizer=tokenizer,
43
- )
44
-
45
- trainer.train()
46
-
47
- print(f"Model training completed. Model saved to {output_dir}")
48
-
49
- except Exception as e:
50
- print(f"Error during model training: {e}")
51
-
52
-
53
- if __name__ == "__main__":
54
- processed_data_filepath = "data/processed_dataset.csv" # Replace with your processed data path
55
- model_output_directory = "models/fine_tuned_model" # Replace with your desired output directory
56
- base_model_name = "bert-base-uncased" # Replace with your base model name
57
-
58
- train_model(processed_data_filepath, model_name=base_model_name, output_dir=model_output_directory)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scripts/parsing_utils.py DELETED
@@ -1,28 +0,0 @@
1
- import yaml
2
-
3
- def load_yaml_file(filepath):
4
- """Loads and parses a YAML file."""
5
- try:
6
- with open(filepath, 'r') as f:
7
- data = yaml.safe_load(f)
8
- return data
9
- except FileNotFoundError:
10
- print(f"Error: File not found at {filepath}")
11
- return None
12
- except yaml.YAMLError as e:
13
- print(f"Error parsing YAML file {filepath}: {e}")
14
- return None
15
-
16
- def get_roadmap_phases(roadmap_data):
17
- """Extracts phases from roadmap data."""
18
- if roadmap_data and 'roadmap' in roadmap_data:
19
- return roadmap_data['roadmap']
20
- return None
21
-
22
- def get_project_rules(rules_data):
23
- """Extracts project rules data."""
24
- if rules_data and 'project_rules' in rules_data:
25
- return rules_data['project_rules']
26
- return None
27
-
28
- # You can add more parsing utility functions as needed