diff --git a/.history/README_20250202035449.md b/.history/README_20250202035449.md new file mode 100644 index 0000000000000000000000000000000000000000..7c548d5fa39d9751b660eb8b070d1ca2366b55c0 --- /dev/null +++ b/.history/README_20250202035449.md @@ -0,0 +1,13 @@ +--- +title: Test +emoji: 📚 +colorFrom: yellow +colorTo: red +sdk: streamlit +sdk_version: 1.41.1 +app_file: app.py +pinned: false +short_description: STTETTETE +--- + +Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference diff --git a/.history/README_20250202065215.md b/.history/README_20250202065215.md new file mode 100644 index 0000000000000000000000000000000000000000..a896e5120926cc77974798460d3d425f5cfa0c81 --- /dev/null +++ b/.history/README_20250202065215.md @@ -0,0 +1,56 @@ +--- +title: Test +emoji: 📚 +colorFrom: yellow +colorTo: red +sdk: streamlit +sdk_version: 1.41.1 +app_file: app.py +pinned: false +short_description: STTETTETE +--- +# Custom AI Chatbot for Project Guidance + +This project provides a template for building a custom AI chatbot that guides users through projects based on predefined roadmaps and rules. + +## Getting Started + +1. **Clone this repository.** +2. **Install dependencies:** `pip install -r requirements.txt` +3. **Customize `roadmap.txt` and `rules.txt`:** Define your project's roadmap and rules in YAML format. +4. **Run the chatbot:** `python app.py` +5. **Access the chatbot:** Open the Gradio link provided in the console. + +## Deployment on Hugging Face Spaces + +1. **Create a new Space on Hugging Face Spaces.** Choose Gradio or Streamlit as the SDK. +2. **Upload all project files** to your Space repository. +3. **Hugging Face Spaces will automatically install dependencies from `requirements.txt` and run `app.py` (or your chosen app script).** + +## Customization + +* **Modify `roadmap.txt` and `rules.txt`** to fit your specific project. +* **Implement actual LLM integration** in `guidance_system.py` and `app.py`. +* **Develop code generation logic** in `code_generator.py`. +* **Enhance the user interface** in `app.py`. +* **Add more sophisticated parsing and guidance logic** as needed. + +## File Structure + +* `/data`: Intended for storing project datasets (e.g., for fine-tuning). Currently a placeholder. +* `/models`: Intended for storing downloaded or trained models. Currently a placeholder. +* `/scripts`: Contains Python scripts for core chatbot logic (parsing, guidance, code generation, error handling). +* `/api`: Intended for API related files if you integrate with external services. Currently a placeholder. +* `/configs`: Contains configuration files like `config.yaml`. +* `app.py`: The main application script using Gradio for the user interface. +* `requirements.txt`: Lists Python dependencies. +* `roadmap.txt`: Defines the project roadmap in YAML format. +* `rules.txt`: Defines project rules and best practices in YAML format. + +## Example Roadmap and Rules + +See `roadmap.txt` and `rules.txt` for example content. + +## License + +[Your License] (e.g., MIT License) \ No newline at end of file diff --git a/.history/README_20250202065217.md b/.history/README_20250202065217.md new file mode 100644 index 0000000000000000000000000000000000000000..8fc3a142e24d21e32e7076e1457a96e6ea35fe5f --- /dev/null +++ b/.history/README_20250202065217.md @@ -0,0 +1,57 @@ +--- +title: Test +emoji: 📚 +colorFrom: yellow +colorTo: red +sdk: streamlit +sdk_version: 1.41.1 +app_file: app.py +pinned: false +short_description: STTETTETE +--- + +# Custom AI Chatbot for Project Guidance + +This project provides a template for building a custom AI chatbot that guides users through projects based on predefined roadmaps and rules. + +## Getting Started + +1. **Clone this repository.** +2. **Install dependencies:** `pip install -r requirements.txt` +3. **Customize `roadmap.txt` and `rules.txt`:** Define your project's roadmap and rules in YAML format. +4. **Run the chatbot:** `python app.py` +5. **Access the chatbot:** Open the Gradio link provided in the console. + +## Deployment on Hugging Face Spaces + +1. **Create a new Space on Hugging Face Spaces.** Choose Gradio or Streamlit as the SDK. +2. **Upload all project files** to your Space repository. +3. **Hugging Face Spaces will automatically install dependencies from `requirements.txt` and run `app.py` (or your chosen app script).** + +## Customization + +* **Modify `roadmap.txt` and `rules.txt`** to fit your specific project. +* **Implement actual LLM integration** in `guidance_system.py` and `app.py`. +* **Develop code generation logic** in `code_generator.py`. +* **Enhance the user interface** in `app.py`. +* **Add more sophisticated parsing and guidance logic** as needed. + +## File Structure + +* `/data`: Intended for storing project datasets (e.g., for fine-tuning). Currently a placeholder. +* `/models`: Intended for storing downloaded or trained models. Currently a placeholder. +* `/scripts`: Contains Python scripts for core chatbot logic (parsing, guidance, code generation, error handling). +* `/api`: Intended for API related files if you integrate with external services. Currently a placeholder. +* `/configs`: Contains configuration files like `config.yaml`. +* `app.py`: The main application script using Gradio for the user interface. +* `requirements.txt`: Lists Python dependencies. +* `roadmap.txt`: Defines the project roadmap in YAML format. +* `rules.txt`: Defines project rules and best practices in YAML format. + +## Example Roadmap and Rules + +See `roadmap.txt` and `rules.txt` for example content. + +## License + +[Your License] (e.g., MIT License) \ No newline at end of file diff --git a/.history/README_20250202065728.md b/.history/README_20250202065728.md new file mode 100644 index 0000000000000000000000000000000000000000..98b1beb9c7f00cf11c7c11c00dc64becee52d8b2 --- /dev/null +++ b/.history/README_20250202065728.md @@ -0,0 +1,38 @@ +--- +title: Test +emoji: 📚 +colorFrom: yellow +colorTo: red +sdk: streamlit +sdk_version: 1.41.1 +app_file: app.py +pinned: false +short_description: STTETTETE +--- +# Custom AI Chatbot for Project Guidance + +This project implements a custom AI chatbot designed to guide users through complex projects based on predefined roadmaps and rules. + +**Features:** + +* **Roadmap-based Guidance:** Follows a structured roadmap defined in `roadmap.yaml`. +* **Rule Enforcement:** Adheres to project rules defined in `rules.yaml`. +* **Dynamic Response Generation:** Provides context-aware and step-by-step guidance. +* **Code Snippet Generation:** Generates complete code snippets for project phases. +* **LLM Selection:** Integrates with Hugging Face Hub for flexible LLM selection. +* **Deployable on Hugging Face Spaces:** Built using Gradio for easy deployment. + +**Getting Started:** + +1. **Clone this repository.** +2. **Install dependencies:** `pip install -r requirements.txt` +3. **Customize `roadmap.yaml` and `rules.yaml`** to define your project guidance. +4. **Configure `configs/chatbot_config.yaml`** (optional). +5. **Run the Gradio app:** `python app.py` +6. **Deploy to Hugging Face Spaces** (refer to Hugging Face Spaces documentation). + +**Further Development:** + +* [List potential improvements and features here, as discussed earlier] + +**License:** [Your License] \ No newline at end of file diff --git a/.history/README_20250202071804.md b/.history/README_20250202071804.md new file mode 100644 index 0000000000000000000000000000000000000000..33c458a12b5ff3f3ca3e85d0f0e9a9fc2d36f00f --- /dev/null +++ b/.history/README_20250202071804.md @@ -0,0 +1,51 @@ +--- +title: Test +emoji: 📚 +colorFrom: yellow +colorTo: red +sdk: streamlit +sdk_version: 1.41.1 +app_file: app.py +pinned: false +short_description: STTETTETE +--- +# Custom AI Chatbot for Project Guidance + +This project implements a custom AI chatbot designed to guide users through complex projects based on predefined roadmaps and rules. + +**Features:** + +* **Roadmap-based Guidance:** Follows a structured roadmap defined in `roadmap.yaml`. +* **Rule Enforcement:** Adheres to project rules defined in `rules.yaml`. +* **Dynamic Response Generation:** Provides context-aware and step-by-step guidance. +* **Code Snippet Generation:** Generates complete code snippets for project phases using templates. +* **LLM Selection:** Integrates with Hugging Face Hub for flexible LLM selection (DeepSeek and Gemini models). +* **Model Switching:** Allows users to switch between available LLMs via the UI. +* **Basic LLM Responses:** Generates responses using the selected LLM for general queries. +* **Error Handling:** Includes error handling for model loading and switching, with UI warnings. +* **Deployable on Hugging Face Spaces:** Built using Gradio for easy deployment. + +**Getting Started:** + +1. **Clone this repository.** +2. **Install dependencies:** `pip install -r requirements.txt` +3. **Customize `roadmap.yaml` and `rules.yaml`** to define your project guidance. +4. **Configure `configs/chatbot_config.yaml`** to set up LLMs and chatbot behavior. +5. **Run the Gradio app:** `python app.py` +6. **Deploy to Hugging Face Spaces** (refer to Hugging Face Spaces documentation). + +**Available Models:** + +* DeepSeek-R1-Distill-Llama-8B +* Gemini 2.0 Flash (Exp 01-21) + +**Further Development:** + +* Enhance LLM response generation for more context-aware and project-specific guidance. +* Implement more sophisticated state management to track user progress through the roadmap. +* Improve code generation with more dynamic templates and customization options. +* Develop a more advanced GUI or web-based interface. +* Add more LLMs to the selection pool. +* Implement more robust error handling and logging. + +**License:** [Your License] \ No newline at end of file diff --git a/.history/README_20250202072652.md b/.history/README_20250202072652.md new file mode 100644 index 0000000000000000000000000000000000000000..dc3a8d352419a3cf96bc1171d5e031d652b4bf6b --- /dev/null +++ b/.history/README_20250202072652.md @@ -0,0 +1,61 @@ +--- +title: Test +emoji: 📚 +colorFrom: yellow +colorTo: red +sdk: streamlit +sdk_version: 1.41.1 +app_file: app.py +pinned: false +short_description: STTETTETE +--- +# Custom AI Chatbot for Project Guidance + +This project implements a custom AI chatbot designed to guide users through complex projects based on predefined roadmaps and rules. + +**Features:** + +* **Roadmap-based Guidance:** Follows a structured roadmap defined in `roadmap.yaml`. +* **Rule Enforcement:** Adheres to project rules defined in `rules.yaml`. +* **Dynamic Response Generation:** Provides context-aware and step-by-step guidance. +* **Code Snippet Generation:** Generates complete code snippets for project phases using templates. +* **LLM Selection:** Integrates with Hugging Face Hub for flexible LLM selection (DeepSeek and Gemini models). +* **Model Switching:** Allows users to switch between available LLMs via the UI. +* **Basic LLM Responses:** Generates responses using the selected LLM for general queries. +* **Token Control:** Limits LLM response length using `max_response_tokens` in `configs/chatbot_config.yaml`. +* **Error Handling:** Includes error handling for model loading and switching, with UI warnings. +* **Deployable on Hugging Face Spaces:** Built using Gradio for easy deployment. + +**Important Notes on Safety Settings:** + +* **Direct Safety Configuration Limited:** For the Hugging Face models used directly via `transformers` (DeepSeek and Gemini Flash), there are **no easily configurable, standardized "safety settings"** like "Harassment: None," "Hate: None," etc., available through the `transformers` library itself. +* **Model-Dependent Safety:** Safety behavior is primarily determined by how these models were trained and any inherent safety mechanisms built by their creators. +* **Basic Output Filtering (Possible Extension):** For a very rudimentary level of control, you could implement keyword-based output filtering as a post-processing step, but this is not implemented in this version. +* **Commercial APIs Offer More Control:** If you need fine-grained safety controls, consider using commercial LLM APIs (like Google AI Gemini API, OpenAI API), which often provide parameters to adjust safety filters in their API requests. + +**Getting Started:** + +1. **Clone this repository.** +2. **Install dependencies:** `pip install -r requirements.txt` +3. **Customize `roadmap.yaml` and `rules.yaml`** to define your project guidance. +4. **Configure `configs/chatbot_config.yaml`** to set up LLMs, token limits, and chatbot behavior. +5. **Run the Gradio app:** `python app.py` +6. **Deploy to Hugging Face Spaces** (refer to Hugging Face Spaces documentation). + +**Available Models:** + +* DeepSeek-R1-Distill-Llama-8B +* Gemini 2.0 Flash (Exp 01-21) + +**Further Development:** + +* Enhance LLM response generation for more context-aware and project-specific guidance. +* Implement more sophisticated state management to track user progress through the roadmap. +* Improve code generation with more dynamic templates and customization options. +* Develop a more advanced GUI or web-based interface. +* Add more LLMs to the selection pool. +* Implement more robust error handling and logging. +* **Explore and potentially integrate keyword-based output filtering for basic safety control.** +* **Investigate using commercial LLM APIs for more advanced safety settings and control.** + +**License:** [Your License] \ No newline at end of file diff --git a/.history/app_20250202070123.py b/.history/app_20250202070123.py new file mode 100644 index 0000000000000000000000000000000000000000..b350af9f13cfb142e54fc9e94dad8f3d8bafb009 --- /dev/null +++ b/.history/app_20250202070123.py @@ -0,0 +1,28 @@ +import gradio as gr +from scripts.chatbot_logic import ProjectGuidanceChatbot + +# Initialize Chatbot +chatbot = ProjectGuidanceChatbot( + roadmap_file="roadmap.yaml", + rules_file="rules.yaml", + config_file="configs/chatbot_config.yaml", + code_templates_dir="scripts/code_templates" +) + +def respond(message, chat_history): + bot_message = chatbot.process_query(message) + chat_history.append((message, bot_message)) + return "", chat_history + +with gr.Blocks() as demo: + chatbot_greeting = chatbot.get_chatbot_greeting() + gr.Markdown(f"# {chatbot.chatbot_config.get('name', 'Project Guidance Chatbot')}") + gr.Markdown(chatbot_greeting) + + chatbot_ui = gr.Chatbot() + msg = gr.Textbox() + clear = gr.ClearButton([msg, chatbot_ui]) + + msg.submit(respond, [msg, chatbot_ui], [msg, chatbot_ui]) + +demo.launch() \ No newline at end of file diff --git a/.history/app_20250202071439.py b/.history/app_20250202071439.py new file mode 100644 index 0000000000000000000000000000000000000000..b20e8faf4a7b7f6d36119d57c9b15660af2bba4b --- /dev/null +++ b/.history/app_20250202071439.py @@ -0,0 +1,42 @@ +import gradio as gr +from scripts.chatbot_logic import ProjectGuidanceChatbot + +# Initialize Chatbot +chatbot = ProjectGuidanceChatbot( + roadmap_file="roadmap.yaml", + rules_file="rules.yaml", + config_file="configs/chatbot_config.yaml", + code_templates_dir="scripts/code_templates" +) + +def respond(message, chat_history): + bot_message = chatbot.process_query(message) + chat_history.append((message, bot_message)) + return "", chat_history + +def switch_model(model_key): + model_switch_message = chatbot.switch_llm_model(model_key) + greeting_message = chatbot.get_chatbot_greeting() + return greeting_message # Update greeting to reflect new model + +with gr.Blocks() as demo: + chatbot_greeting_md = gr.Markdown(chatbot.get_chatbot_greeting()) # Markdown for greeting - to update dynamically + gr.Markdown(f"# {chatbot.chatbot_config.get('name', 'Project Guidance Chatbot')}") + + # Model selection dropdown + model_choices = [(model['name'], key) for key, model in chatbot.available_models_config.items()] # (Display Name, Key) + model_dropdown = gr.Dropdown( + choices=model_choices, + value=chatbot.active_model_info['name'] if chatbot.active_model_info else None, # Set initial value to current model name + label="Select LLM Model" + ) + model_dropdown.change(fn=switch_model, inputs=model_dropdown, outputs=chatbot_greeting_md) # Update greeting on model change + + + chatbot_ui = gr.Chatbot() + msg = gr.Textbox() + clear = gr.ClearButton([msg, chatbot_ui]) + + msg.submit(respond, [msg, chatbot_ui], [msg, chatbot_ui]) + +demo.launch() \ No newline at end of file diff --git a/.history/app_20250202072114.py b/.history/app_20250202072114.py new file mode 100644 index 0000000000000000000000000000000000000000..b49c7b96dfa162f93c3db22b0cac17706f10748a --- /dev/null +++ b/.history/app_20250202072114.py @@ -0,0 +1,49 @@ +import gradio as gr +from scripts.chatbot_logic import ProjectGuidanceChatbot + +# Initialize Chatbot +chatbot = ProjectGuidanceChatbot( + roadmap_file="roadmap.yaml", + rules_file="rules.yaml", + config_file="configs/chatbot_config.yaml", + code_templates_dir="scripts/code_templates" +) + +def respond(message, chat_history): + bot_message = chatbot.process_query(message) + chat_history.append((message, bot_message)) + return "", chat_history + +def switch_model(model_key): + model_switch_result = chatbot.switch_llm_model(model_key) # Get result message + greeting_message = chatbot.get_chatbot_greeting() + + if "Error:" in model_switch_result: # Check if result contains "Error:" + return gr.Warning(model_switch_result), greeting_message # Display error as Gradio Warning + else: + return None, greeting_message # No warning, just update greeting + +with gr.Blocks() as demo: + chatbot_greeting_md = gr.Markdown(chatbot.get_chatbot_greeting()) + gr.Markdown(f"# {chatbot.chatbot_config.get('name', 'Project Guidance Chatbot')}") + + model_choices = [(model['name'], key) for key, model in chatbot.available_models_config.items()] + model_dropdown = gr.Dropdown( + choices=model_choices, + value=chatbot.active_model_info['name'] if chatbot.active_model_info else None, + label="Select LLM Model" + ) + model_error_output = gr.Warning(visible=False) # Initially hidden warning component + model_dropdown.change( + fn=switch_model, + inputs=model_dropdown, + outputs=[model_error_output, chatbot_greeting_md] # Output both warning and greeting + ) + + chatbot_ui = gr.Chatbot() + msg = gr.Textbox() + clear = gr.ClearButton([msg, chatbot_ui]) + + msg.submit(respond, [msg, chatbot_ui], [msg, chatbot_ui]) + +demo.launch() \ No newline at end of file diff --git a/.history/app_20250202072637.py b/.history/app_20250202072637.py new file mode 100644 index 0000000000000000000000000000000000000000..b49c7b96dfa162f93c3db22b0cac17706f10748a --- /dev/null +++ b/.history/app_20250202072637.py @@ -0,0 +1,49 @@ +import gradio as gr +from scripts.chatbot_logic import ProjectGuidanceChatbot + +# Initialize Chatbot +chatbot = ProjectGuidanceChatbot( + roadmap_file="roadmap.yaml", + rules_file="rules.yaml", + config_file="configs/chatbot_config.yaml", + code_templates_dir="scripts/code_templates" +) + +def respond(message, chat_history): + bot_message = chatbot.process_query(message) + chat_history.append((message, bot_message)) + return "", chat_history + +def switch_model(model_key): + model_switch_result = chatbot.switch_llm_model(model_key) # Get result message + greeting_message = chatbot.get_chatbot_greeting() + + if "Error:" in model_switch_result: # Check if result contains "Error:" + return gr.Warning(model_switch_result), greeting_message # Display error as Gradio Warning + else: + return None, greeting_message # No warning, just update greeting + +with gr.Blocks() as demo: + chatbot_greeting_md = gr.Markdown(chatbot.get_chatbot_greeting()) + gr.Markdown(f"# {chatbot.chatbot_config.get('name', 'Project Guidance Chatbot')}") + + model_choices = [(model['name'], key) for key, model in chatbot.available_models_config.items()] + model_dropdown = gr.Dropdown( + choices=model_choices, + value=chatbot.active_model_info['name'] if chatbot.active_model_info else None, + label="Select LLM Model" + ) + model_error_output = gr.Warning(visible=False) # Initially hidden warning component + model_dropdown.change( + fn=switch_model, + inputs=model_dropdown, + outputs=[model_error_output, chatbot_greeting_md] # Output both warning and greeting + ) + + chatbot_ui = gr.Chatbot() + msg = gr.Textbox() + clear = gr.ClearButton([msg, chatbot_ui]) + + msg.submit(respond, [msg, chatbot_ui], [msg, chatbot_ui]) + +demo.launch() \ No newline at end of file diff --git a/.history/app_20250202072640.py b/.history/app_20250202072640.py new file mode 100644 index 0000000000000000000000000000000000000000..b49c7b96dfa162f93c3db22b0cac17706f10748a --- /dev/null +++ b/.history/app_20250202072640.py @@ -0,0 +1,49 @@ +import gradio as gr +from scripts.chatbot_logic import ProjectGuidanceChatbot + +# Initialize Chatbot +chatbot = ProjectGuidanceChatbot( + roadmap_file="roadmap.yaml", + rules_file="rules.yaml", + config_file="configs/chatbot_config.yaml", + code_templates_dir="scripts/code_templates" +) + +def respond(message, chat_history): + bot_message = chatbot.process_query(message) + chat_history.append((message, bot_message)) + return "", chat_history + +def switch_model(model_key): + model_switch_result = chatbot.switch_llm_model(model_key) # Get result message + greeting_message = chatbot.get_chatbot_greeting() + + if "Error:" in model_switch_result: # Check if result contains "Error:" + return gr.Warning(model_switch_result), greeting_message # Display error as Gradio Warning + else: + return None, greeting_message # No warning, just update greeting + +with gr.Blocks() as demo: + chatbot_greeting_md = gr.Markdown(chatbot.get_chatbot_greeting()) + gr.Markdown(f"# {chatbot.chatbot_config.get('name', 'Project Guidance Chatbot')}") + + model_choices = [(model['name'], key) for key, model in chatbot.available_models_config.items()] + model_dropdown = gr.Dropdown( + choices=model_choices, + value=chatbot.active_model_info['name'] if chatbot.active_model_info else None, + label="Select LLM Model" + ) + model_error_output = gr.Warning(visible=False) # Initially hidden warning component + model_dropdown.change( + fn=switch_model, + inputs=model_dropdown, + outputs=[model_error_output, chatbot_greeting_md] # Output both warning and greeting + ) + + chatbot_ui = gr.Chatbot() + msg = gr.Textbox() + clear = gr.ClearButton([msg, chatbot_ui]) + + msg.submit(respond, [msg, chatbot_ui], [msg, chatbot_ui]) + +demo.launch() \ No newline at end of file diff --git a/configs/config.yaml b/.history/configs/chatbot_config_20250202065636.yaml similarity index 100% rename from configs/config.yaml rename to .history/configs/chatbot_config_20250202065636.yaml diff --git a/.history/configs/chatbot_config_20250202071313.yaml b/.history/configs/chatbot_config_20250202071313.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4d779f9806d3115e78dee598dfa4b1dd8ab0aa0d --- /dev/null +++ b/.history/configs/chatbot_config_20250202071313.yaml @@ -0,0 +1,23 @@ +chatbot: + name: "Project Guidance Chatbot" + description: "Your helpful AI assistant for project completion with LLM selection." + default_llm_model_id: "deepseek-r1-distill-llama-8b" # Default model ID (using keys from 'available_models') + +available_models: + deepseek-r1-distill-llama-8b: + name: "DeepSeek-R1-Distill-Llama-8B" + model_id: "DeepSeek-AI/DeepSeek-R1-Distill-Llama-8B" + gemini-flash-01-21: # Using a shorter key for easier referencing in code + name: "Gemini 2.0 Flash (Exp 01-21)" + model_id: "google/gemini-2.0-flash-thinking-exp-01-21" + +model_selection: + suggested_models: # (Keep suggested models - might be useful later) + - "mistralai/Mistral-7B-Instruct-v0.2" + - "google/flan-t5-xl" + - "facebook/bart-large" + criteria_prompt: "Consider these criteria when selecting a model: {rules.model_selection}" + +response_generation: + error_message: "Sorry, I encountered an issue. Please check your input and project files." + default_instruction: "How can I help you with your project?" \ No newline at end of file diff --git a/.history/configs/chatbot_config_20250202071856.yaml b/.history/configs/chatbot_config_20250202071856.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4d779f9806d3115e78dee598dfa4b1dd8ab0aa0d --- /dev/null +++ b/.history/configs/chatbot_config_20250202071856.yaml @@ -0,0 +1,23 @@ +chatbot: + name: "Project Guidance Chatbot" + description: "Your helpful AI assistant for project completion with LLM selection." + default_llm_model_id: "deepseek-r1-distill-llama-8b" # Default model ID (using keys from 'available_models') + +available_models: + deepseek-r1-distill-llama-8b: + name: "DeepSeek-R1-Distill-Llama-8B" + model_id: "DeepSeek-AI/DeepSeek-R1-Distill-Llama-8B" + gemini-flash-01-21: # Using a shorter key for easier referencing in code + name: "Gemini 2.0 Flash (Exp 01-21)" + model_id: "google/gemini-2.0-flash-thinking-exp-01-21" + +model_selection: + suggested_models: # (Keep suggested models - might be useful later) + - "mistralai/Mistral-7B-Instruct-v0.2" + - "google/flan-t5-xl" + - "facebook/bart-large" + criteria_prompt: "Consider these criteria when selecting a model: {rules.model_selection}" + +response_generation: + error_message: "Sorry, I encountered an issue. Please check your input and project files." + default_instruction: "How can I help you with your project?" \ No newline at end of file diff --git a/.history/configs/chatbot_config_20250202072554.yaml b/.history/configs/chatbot_config_20250202072554.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4a5b06421c9eba1414dae0fd1330ac676a19baab --- /dev/null +++ b/.history/configs/chatbot_config_20250202072554.yaml @@ -0,0 +1,24 @@ +chatbot: + name: "Project Guidance Chatbot" + description: "Your helpful AI assistant for project completion with LLM selection and token control." + default_llm_model_id: "deepseek-r1-distill-llama-8b" + max_response_tokens: 200 # Maximum tokens for LLM generated responses + +available_models: + deepseek-r1-distill-llama-8b: + name: "DeepSeek-R1-Distill-Llama-8B" + model_id: "DeepSeek-AI/DeepSeek-R1-Distill-Llama-8B" + gemini-flash-01-21: + name: "Gemini 2.0 Flash (Exp 01-21)" + model_id: "google/gemini-2.0-flash-thinking-exp-01-21" + +model_selection: + suggested_models: + - "mistralai/Mistral-7B-Instruct-v0.2" + - "google/flan-t5-xl" + - "facebook/bart-large" + criteria_prompt: "Consider these criteria when selecting a model: {rules.model_selection}" + +response_generation: + error_message: "Sorry, I encountered an issue. Please check your input and project files." + default_instruction: "How can I help you with your project?" \ No newline at end of file diff --git a/.history/details_20250202061415.txt b/.history/details_20250202061415.txt new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/.history/details_20250202065715.txt b/.history/details_20250202065715.txt new file mode 100644 index 0000000000000000000000000000000000000000..cb54dbcbfd5f9f74cc52e23befe7f174ccb5dfb8 --- /dev/null +++ b/.history/details_20250202065715.txt @@ -0,0 +1,21 @@ +custom-llm-project/ +├── data/ +│ └── # (Optional: Datasets or example data - currently empty) +├── models/ +│ └── # (Optional: Could store cached models or local models in future) +├── scripts/ +│ ├── chatbot_logic.py # Core chatbot logic (parsing, response generation, code gen) +│ ├── parsing_utils.py # Utility functions for parsing roadmap and rules +│ └── code_templates/ # Directory for code templates +│ ├── preprocessing_template.py.txt +│ ├── training_template.py.txt +│ ├── evaluation_template.py.txt +│ └── api_template.py.txt +├── configs/ +│ └── chatbot_config.yaml # Configuration for chatbot behavior, LLM selection, etc. +├── api/ # (Placeholder for future API integration - currently empty) +├── roadmap.yaml # Project roadmap (YAML format) +├── rules.yaml # Project rules (YAML format) +├── requirements.txt # Python dependencies +├── app.py # Gradio application script (main entry point for HF Spaces) +└── README.md # Project README file (documentation) \ No newline at end of file diff --git a/.history/requirements_20250202065234.txt b/.history/requirements_20250202065234.txt new file mode 100644 index 0000000000000000000000000000000000000000..d7a9962ae79e4900da79b02bf94fb51fe7087d88 --- /dev/null +++ b/.history/requirements_20250202065234.txt @@ -0,0 +1,3 @@ +# requirements.txt +gradio +PyYAML \ No newline at end of file diff --git a/.history/requirements_20250202065734.txt b/.history/requirements_20250202065734.txt new file mode 100644 index 0000000000000000000000000000000000000000..a7c54e9cbf818c5771f52e924a469b7b6601c769 --- /dev/null +++ b/.history/requirements_20250202065734.txt @@ -0,0 +1,4 @@ +gradio +PyYAML +transformers # For interacting with Hugging Face models (if needed directly) +# Add other dependencies as needed, e.g., for specific LLMs or utilities \ No newline at end of file diff --git a/.history/requirements_20250202071821.txt b/.history/requirements_20250202071821.txt new file mode 100644 index 0000000000000000000000000000000000000000..5d8862d108b3c864927a138520743f9f6d37fdec --- /dev/null +++ b/.history/requirements_20250202071821.txt @@ -0,0 +1,4 @@ +gradio +PyYAML +transformers # For interacting with Hugging Face models (if needed directly) +torch \ No newline at end of file diff --git a/.history/requirements_20250202071824.txt b/.history/requirements_20250202071824.txt new file mode 100644 index 0000000000000000000000000000000000000000..5d8862d108b3c864927a138520743f9f6d37fdec --- /dev/null +++ b/.history/requirements_20250202071824.txt @@ -0,0 +1,4 @@ +gradio +PyYAML +transformers # For interacting with Hugging Face models (if needed directly) +torch \ No newline at end of file diff --git a/.history/scripts/chatbot_logic_20250202065546.py b/.history/scripts/chatbot_logic_20250202065546.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/.history/scripts/chatbot_logic_20250202070001.py b/.history/scripts/chatbot_logic_20250202070001.py new file mode 100644 index 0000000000000000000000000000000000000000..94c7630028f10ef50cb97bc517ff374250a02bcd --- /dev/null +++ b/.history/scripts/chatbot_logic_20250202070001.py @@ -0,0 +1,132 @@ +from scripts.parsing_utils import load_yaml_file, get_roadmap_phases, get_project_rules +import os + +class ProjectGuidanceChatbot: + def __init__(self, roadmap_file, rules_file, config_file, code_templates_dir): + self.roadmap_file = roadmap_file + self.rules_file = rules_file + self.config_file = config_file + self.code_templates_dir = code_templates_dir + + self.roadmap_data = load_yaml_file(self.roadmap_file) + self.rules_data = load_yaml_file(self.rules_file) + self.config_data = load_yaml_file(self.config_file) + + self.phases = get_roadmap_phases(self.roadmap_data) + self.rules = get_project_rules(self.rules_data) + self.chatbot_config = self.config_data.get('chatbot', {}) if self.config_data else {} + self.model_config = self.config_data.get('model_selection', {}) if self.config_data else {} + self.response_config = self.config_data.get('response_generation', {}) if self.config_data else {} + + + self.current_phase = None # Track current phase (can be improved with state management) + + def get_chatbot_greeting(self): + return f"Hello! I am the {self.chatbot_config.get('name', 'Project Guidance Chatbot')}. {self.chatbot_config.get('description', 'How can I help you with your project?')}" + + def process_query(self, user_query): + if not self.phases: + return "Error: Roadmap data not loaded correctly." + if not self.rules: + return "Error: Rules data not loaded correctly." + + query_lower = user_query.lower() + + if "roadmap" in query_lower: + return self.get_roadmap_summary() + if "rules" in query_lower: + return self.get_rules_summary() + + if self.current_phase: + current_phase_data = self.phases.get(self.current_phase) + if current_phase_data: + phase_response = self.get_phase_guidance(current_phase_data, user_query) + if phase_response: + return phase_response + + # Basic phase detection based on keywords (improve with more sophisticated NLP) + for phase_key, phase_data in self.phases.items(): + if phase_data['name'].lower() in query_lower: + self.current_phase = phase_key # Set current phase if detected + return self.get_phase_introduction(phase_data) + + return self.response_config.get('default_instruction', "I can guide you through project phases. Ask me about a specific phase or project aspect.") + + def get_roadmap_summary(self): + summary = "Project Roadmap:\n" + for phase_key, phase_data in self.phases.items(): + summary += f"- **Phase: {phase_data['name']}**\n" + summary += f" Description: {phase_data['description']}\n" + summary += f" Milestones: {', '.join(phase_data['milestones'])}\n" + return summary + + def get_rules_summary(self): + summary = "Project Rules:\n" + for rule_category, rules_list in self.rules.items(): + summary += f"**{rule_category.capitalize()} Rules:**\n" + for rule_key, rule_text in rules_list.items(): + summary += f"- {rule_text}\n" + return summary + + def get_phase_introduction(self, phase_data): + return f"Okay, let's focus on **Phase: {phase_data['name']}**. \nDescription: {phase_data['description']}. \nKey milestones are: {', '.join(phase_data['milestones'])}. \nWhat would you like to know or do in this phase?" + + def get_phase_guidance(self, phase_data, user_query): + query_lower = user_query.lower() + + if "milestones" in query_lower: + return "The milestones for this phase are: " + ", ".join(phase_data['milestones']) + if "actions" in query_lower or "how to" in query_lower: + if 'actions' in phase_data: + return "Recommended actions for this phase: " + ", ".join(phase_data['actions']) + else: + return "No specific actions are listed for this phase in the roadmap." + if "code" in query_lower or "script" in query_lower: + if 'code_generation_hint' in phase_data: + template_filename_prefix = phase_data['name'].lower().replace(" ", "_") + template_filepath = os.path.join(self.code_templates_dir, f"{template_filename_prefix}_template.py.txt") # Example template file naming convention + if os.path.exists(template_filepath): + code_snippet = self.generate_code_snippet(template_filepath, phase_data) # Pass phase data for potential customization + return "Here's a starting code snippet for this phase:\n\n```python\n" + code_snippet + "\n```\n\nRemember to adapt it to your specific needs." + else: + return f"A code template for this phase ({phase_data['name']}) is not yet available. However, the hint is: {phase_data['code_generation_hint']}" + else: + return "No code generation hint is available for this phase." + + # General guidance based on phase description (can be enhanced with LLM) + return f"For phase '{phase_data['name']}', remember the description: {phase_data['description']}. Consider the milestones and actions. What specific aspect are you interested in?" + + + def generate_code_snippet(self, template_filepath, phase_data): + """Generates code snippet from a template file. (Simple template filling example)""" + try: + with open(template_filepath, 'r') as f: + template_content = f.read() + + # Simple placeholder replacement - enhance with more sophisticated templating + code_snippet = template_content.replace("{{phase_name}}", phase_data['name']) + # Add more dynamic replacements based on phase_data or other context if needed + + return code_snippet + except FileNotFoundError: + return f"Error: Code template file not found at {template_filepath}" + except Exception as e: + return f"Error generating code snippet: {e}" + + +# Example usage (for testing - remove or adjust for app.py) +if __name__ == '__main__': + chatbot = ProjectGuidanceChatbot( + roadmap_file="roadmap.yaml", + rules_file="rules.yaml", + config_file="configs/chatbot_config.yaml", + code_templates_dir="scripts/code_templates" + ) + print(chatbot.get_chatbot_greeting()) + + while True: + user_input = input("You: ") + if user_input.lower() == "exit": + break + response = chatbot.process_query(user_input) + print("Chatbot:", response) \ No newline at end of file diff --git a/.history/scripts/chatbot_logic_20250202071411.py b/.history/scripts/chatbot_logic_20250202071411.py new file mode 100644 index 0000000000000000000000000000000000000000..69c38443899ba1355dd565e6c3a5fe543f13a831 --- /dev/null +++ b/.history/scripts/chatbot_logic_20250202071411.py @@ -0,0 +1,150 @@ +from scripts.parsing_utils import load_yaml_file, get_roadmap_phases, get_project_rules +import os +# Placeholder import for LLM interaction - replace with actual LLM library when integrating +# from transformers import AutoModelForCausalLM, AutoTokenizer + +class ProjectGuidanceChatbot: + def __init__(self, roadmap_file, rules_file, config_file, code_templates_dir): + self.roadmap_file = roadmap_file + self.rules_file = rules_file + self.config_file = config_file + self.code_templates_dir = code_templates_dir + + self.roadmap_data = load_yaml_file(self.roadmap_file) + self.rules_data = load_yaml_file(self.rules_file) + self.config_data = load_yaml_file(self.config_file) + + self.phases = get_roadmap_phases(self.roadmap_data) + self.rules = get_project_rules(self.rules_data) + self.chatbot_config = self.config_data.get('chatbot', {}) if self.config_data else {} + self.model_config = self.config_data.get('model_selection', {}) if self.config_data else {} + self.response_config = self.config_data.get('response_generation', {}) if self.config_data else {} + self.available_models_config = self.config_data.get('available_models', {}) if self.config_data else {} + + self.current_phase = None + self.active_model_key = self.chatbot_config.get('default_llm_model_id') # Get default model key + self.active_model_info = self.available_models_config.get(self.active_model_key) # Get model info from config + + # Placeholder for actual model and tokenizer - replace with LLM loading logic + self.llm_model = None # Placeholder for loaded model + self.llm_tokenizer = None # Placeholder for tokenizer + self.load_llm_model(self.active_model_info) # Load initial model + + + def load_llm_model(self, model_info): + """Loads the LLM model and tokenizer based on model_info (placeholder).""" + if not model_info: + print("Error: Model information not provided.") + self.llm_model = None + self.llm_tokenizer = None + return + + model_id = model_info.get('model_id') + model_name = model_info.get('name') + if not model_id: + print(f"Error: 'model_id' not found for model: {model_name}") + self.llm_model = None + self.llm_tokenizer = None + return + + print(f"Loading model: {model_name} ({model_id})... (Placeholder - replace with actual LLM loading)") + # --- Placeholder for actual LLM loading using transformers (example) --- + # try: + # self.llm_tokenizer = AutoTokenizer.from_pretrained(model_id) + # self.llm_model = AutoModelForCausalLM.from_pretrained(model_id) # Or appropriate model class + # print(f"Model {model_name} loaded successfully.") + # except Exception as e: + # print(f"Error loading model {model_name} ({model_id}): {e}") + # self.llm_model = None + # self.llm_tokenizer = None + # --- End Placeholder --- + self.active_model_info = model_info # Update active model info + + def switch_llm_model(self, model_key): + """Switches the active LLM model based on the provided model key.""" + if model_key in self.available_models_config: + model_info = self.available_models_config[model_key] + print(f"Switching LLM model to: {model_info.get('name')}") + self.load_llm_model(model_info) # Load the new model + self.active_model_key = model_key # Update active model key + return f"Switched to model: {model_info.get('name')}" + else: + return f"Error: Model key '{model_key}' not found in available models." + + + def get_chatbot_greeting(self): + current_model_name = self.active_model_info.get('name', 'Unknown Model') if self.active_model_info else 'Unknown Model' + return f"Hello! I am the {self.chatbot_config.get('name', 'Project Guidance Chatbot')}. Currently using **{current_model_name}**. {self.chatbot_config.get('description', 'How can I help you with your project?')}" + + def process_query(self, user_query): + # ... (rest of the process_query function remains largely the same for roadmap/rules logic) ... + + if "roadmap" in query_lower: + return self.get_roadmap_summary() + if "rules" in query_lower: + return self.get_rules_summary() + if "switch model to" in query_lower: # Example command for switching model via text input + parts = user_query.lower().split("switch model to") + if len(parts) > 1: + model_name_or_key = parts[1].strip() + for key, model_data in self.available_models_config.items(): + if model_data['name'].lower() == model_name_or_key.lower() or key.lower() == model_name_or_key.lower(): + switch_result = self.switch_llm_model(key) # Switch by key + return switch_result + "\n" + self.get_chatbot_greeting() # Re-greet with new model info + return f"Model '{model_name_or_key}' not found in available models." + + + if self.current_phase: + current_phase_data = self.phases.get(self.current_phase) + if current_phase_data: + phase_response = self.get_phase_guidance(current_phase_data, user_query) + if phase_response: + return phase_response + + # Basic phase detection based on keywords + for phase_key, phase_data in self.phases.items(): + if phase_data['name'].lower() in query_lower: + self.current_phase = phase_key + return self.get_phase_introduction(phase_data) + + # --- Placeholder for LLM-based response generation --- + # If no specific roadmap/rule logic applies, you could use the LLM here + # llm_response = self.generate_llm_response(user_query) # Function to interact with LLM + # if llm_response: + # return llm_response + + return self.response_config.get('default_instruction', "I can guide you through project phases. Ask me about a specific phase or project aspect.") + + # --- Placeholder for LLM response generation function --- + # def generate_llm_response(self, user_query): + # """Generates a response using the currently active LLM (placeholder).""" + # if not self.llm_model or not self.llm_tokenizer: + # return "LLM model not loaded." + # try: + # inputs = self.llm_tokenizer(user_query, return_tensors="pt") # Tokenize + # outputs = self.llm_model.generate(**inputs) # Generate response + # response = self.llm_tokenizer.decode(outputs[0], skip_special_tokens=True) # Decode + # return response + # except Exception as e: + # return f"Error generating LLM response: {e}" + + + # ... (rest of the chatbot logic functions like get_roadmap_summary, get_rules_summary, get_phase_guidance, generate_code_snippet remain largely the same) ... + + +# Example usage (for testing - remove or adjust for app.py) +if __name__ == '__main__': + chatbot = ProjectGuidanceChatbot( + roadmap_file="roadmap.yaml", + rules_file="rules.yaml", + config_file="configs/chatbot_config.yaml", + code_templates_dir="scripts/code_templates" + ) + print(chatbot.get_chatbot_greeting()) + + while True: + user_input = input("You: ") + if user_input.lower() == "exit": + break + response = chatbot.process_query(user_input) + print("Chatbot:", response) \ No newline at end of file diff --git a/.history/scripts/chatbot_logic_20250202072003.py b/.history/scripts/chatbot_logic_20250202072003.py new file mode 100644 index 0000000000000000000000000000000000000000..d06923bd455536d2ccfb4d9a497717275f5b2ca4 --- /dev/null +++ b/.history/scripts/chatbot_logic_20250202072003.py @@ -0,0 +1,209 @@ +from scripts.parsing_utils import load_yaml_file, get_roadmap_phases, get_project_rules +import os +from transformers import AutoModelForCausalLM, AutoTokenizer # Import necessary classes + +class ProjectGuidanceChatbot: + def __init__(self, roadmap_file, rules_file, config_file, code_templates_dir): + self.roadmap_file = roadmap_file + self.rules_file = rules_file + self.config_file = config_file + self.code_templates_dir = code_templates_dir + + self.roadmap_data = load_yaml_file(self.roadmap_file) + self.rules_data = load_yaml_file(self.rules_file) + self.config_data = load_yaml_file(self.config_file) + + self.phases = get_roadmap_phases(self.roadmap_data) + self.rules = get_project_rules(self.rules_data) + self.chatbot_config = self.config_data.get('chatbot', {}) if self.config_data else {} + self.model_config = self.config_data.get('model_selection', {}) if self.config_data else {} + self.response_config = self.config_data.get('response_generation', {}) if self.config_data else {} + self.available_models_config = self.config_data.get('available_models', {}) if self.config_data else {} + + self.current_phase = None + self.active_model_key = self.chatbot_config.get('default_llm_model_id') # Get default model key + self.active_model_info = self.available_models_config.get(self.active_model_key) # Get model info from config + + # Placeholder for actual model and tokenizer - replace with LLM loading logic + self.llm_model = None # Placeholder for loaded model + self.llm_tokenizer = None # Placeholder for tokenizer + self.load_llm_model(self.active_model_info) # Load initial model + + + def load_llm_model(self, model_info): + """Loads the LLM model and tokenizer based on model_info.""" + if not model_info: + print("Error: Model information not provided.") + self.llm_model = None + self.llm_tokenizer = None + return + + model_id = model_info.get('model_id') + model_name = model_info.get('name') + if not model_id: + print(f"Error: 'model_id' not found for model: {model_name}") + self.llm_model = None + self.llm_tokenizer = None + return + + print(f"Loading model: {model_name} ({model_id})...") + try: + self.llm_tokenizer = AutoTokenizer.from_pretrained(model_id) + self.llm_model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto") # device_map="auto" for GPU/CPU handling + print(f"Model {model_name} loaded successfully.") + except Exception as e: + print(f"Error loading model {model_name} ({model_id}): {e}") + self.llm_model = None + self.llm_tokenizer = None + self.active_model_info = model_info + + def switch_llm_model(self, model_key): + """Switches the active LLM model based on the provided model key.""" + if model_key in self.available_models_config: + model_info = self.available_models_config[model_key] + print(f"Switching LLM model to: {model_info.get('name')}") + self.load_llm_model(model_info) # Load the new model + self.active_model_key = model_key # Update active model key + return f"Switched to model: {model_info.get('name')}" + else: + return f"Error: Model key '{model_key}' not found in available models." + + + def get_chatbot_greeting(self): + current_model_name = self.active_model_info.get('name', 'Unknown Model') if self.active_model_info else 'Unknown Model' + return f"Hello! I am the {self.chatbot_config.get('name', 'Project Guidance Chatbot')}. Currently using **{current_model_name}**. {self.chatbot_config.get('description', 'How can I help you with your project?')}" + + def generate_llm_response(self, user_query): + """Generates a response using the currently active LLM.""" + if not self.llm_model or not self.llm_tokenizer: + return "LLM model not loaded. Please select a model." + try: + inputs = self.llm_tokenizer(user_query, return_tensors="pt").to(self.llm_model.device) # Tokenize and move to model's device + outputs = self.llm_model.generate(**inputs, max_length=200, num_beams=5, no_repeat_ngram_size=2, early_stopping=True) # Example generation parameters + response = self.llm_tokenizer.decode(outputs[0], skip_special_tokens=True) # Decode + return response + except Exception as e: + print(f"Error generating LLM response: {e}") + return f"Error generating response from LLM: {e}" + + def process_query(self, user_query): + if not self.phases: + return "Error: Roadmap data not loaded correctly." + if not self.rules: + return "Error: Rules data not loaded correctly." + + query_lower = user_query.lower() + + if "roadmap" in query_lower: + return self.get_roadmap_summary() + if "rules" in query_lower: + return self.get_rules_summary() + if "switch model to" in query_lower: # Example command for switching model via text input + parts = user_query.lower().split("switch model to") + if len(parts) > 1: + model_name_or_key = parts[1].strip() + for key, model_data in self.available_models_config.items(): + if model_data['name'].lower() == model_name_or_key.lower() or key.lower() == model_name_or_key.lower(): + switch_result = self.switch_llm_model(key) # Switch by key + return switch_result + "\n" + self.get_chatbot_greeting() # Re-greet with new model info + return f"Model '{model_name_or_key}' not found in available models." + + + if self.current_phase: + current_phase_data = self.phases.get(self.current_phase) + if current_phase_data: + phase_response = self.get_phase_guidance(current_phase_data, user_query) + if phase_response: + return phase_response + + # Basic phase detection based on keywords + for phase_key, phase_data in self.phases.items(): + if phase_data['name'].lower() in query_lower: + self.current_phase = phase_key + return self.get_phase_introduction(phase_data) + + # --- After roadmap/rules logic, try LLM response if no specific guidance found --- + llm_response = self.generate_llm_response(user_query) + if llm_response: + return llm_response + + return self.response_config.get('default_instruction', "I can guide you through project phases. Ask me about a specific phase or project aspect.") + + def get_roadmap_summary(self): + summary = "Project Roadmap:\n" + for phase_key, phase_data in self.phases.items(): + summary += f"- **Phase: {phase_data['name']}**\n" + summary += f" Description: {phase_data['description']}\n" + summary += f" Milestones: {', '.join(phase_data['milestones'])}\n" + return summary + + def get_rules_summary(self): + summary = "Project Rules:\n" + for rule_category, rules_list in self.rules.items(): + summary += f"**{rule_category.capitalize()} Rules:**\n" + for rule_key, rule_text in rules_list.items(): + summary += f"- {rule_text}\n" + return summary + + def get_phase_introduction(self, phase_data): + return f"Okay, let's focus on **Phase: {phase_data['name']}**. \nDescription: {phase_data['description']}. \nKey milestones are: {', '.join(phase_data['milestones'])}. \nWhat would you like to know or do in this phase?" + + def get_phase_guidance(self, phase_data, user_query): + query_lower = user_query.lower() + + if "milestones" in query_lower: + return "The milestones for this phase are: " + ", ".join(phase_data['milestones']) + if "actions" in query_lower or "how to" in query_lower: + if 'actions' in phase_data: + return "Recommended actions for this phase: " + ", ".join(phase_data['actions']) + else: + return "No specific actions are listed for this phase in the roadmap." + if "code" in query_lower or "script" in query_lower: + if 'code_generation_hint' in phase_data: + template_filename_prefix = phase_data['name'].lower().replace(" ", "_") + template_filepath = os.path.join(self.code_templates_dir, f"{template_filename_prefix}_template.py.txt") # Example template file naming convention + if os.path.exists(template_filepath): + code_snippet = self.generate_code_snippet(template_filepath, phase_data) # Pass phase data for potential customization + return "Here's a starting code snippet for this phase:\n\n```python\n" + code_snippet + "\n```\n\nRemember to adapt it to your specific needs." + else: + return f"A code template for this phase ({phase_data['name']}) is not yet available. However, the hint is: {phase_data['code_generation_hint']}" + else: + return "No code generation hint is available for this phase." + + # General guidance based on phase description (can be enhanced with LLM) + return f"For phase '{phase_data['name']}', remember the description: {phase_data['description']}. Consider the milestones and actions. What specific aspect are you interested in?" + + + def generate_code_snippet(self, template_filepath, phase_data): + """Generates code snippet from a template file. (Simple template filling example)""" + try: + with open(template_filepath, 'r') as f: + template_content = f.read() + + # Simple placeholder replacement - enhance with more sophisticated templating + code_snippet = template_content.replace("{{phase_name}}", phase_data['name']) + # Add more dynamic replacements based on phase_data or other context if needed + + return code_snippet + except FileNotFoundError: + return f"Error: Code template file not found at {template_filepath}" + except Exception as e: + return f"Error generating code snippet: {e}" + + +# Example usage (for testing - remove or adjust for app.py) +if __name__ == '__main__': + chatbot = ProjectGuidanceChatbot( + roadmap_file="roadmap.yaml", + rules_file="rules.yaml", + config_file="configs/chatbot_config.yaml", + code_templates_dir="scripts/code_templates" + ) + print(chatbot.get_chatbot_greeting()) + + while True: + user_input = input("You: ") + if user_input.lower() == "exit": + break + response = chatbot.process_query(user_input) + print("Chatbot:", response) \ No newline at end of file diff --git a/.history/scripts/chatbot_logic_20250202072623.py b/.history/scripts/chatbot_logic_20250202072623.py new file mode 100644 index 0000000000000000000000000000000000000000..245363311e4f4530a5cf648af6667892e20e8065 --- /dev/null +++ b/.history/scripts/chatbot_logic_20250202072623.py @@ -0,0 +1,203 @@ +from scripts.parsing_utils import load_yaml_file, get_roadmap_phases, get_project_rules +import os +from transformers import AutoModelForCausalLM, AutoTokenizer # Import necessary classes + +class ProjectGuidanceChatbot: + def __init__(self, roadmap_file, rules_file, config_file, code_templates_dir): + self.roadmap_file = roadmap_file + self.rules_file = rules_file + self.config_file = config_file + self.code_templates_dir = code_templates_dir + + self.roadmap_data = load_yaml_file(self.roadmap_file) + self.rules_data = load_yaml_file(self.rules_file) + self.config_data = load_yaml_file(self.config_file) + + self.phases = get_roadmap_phases(self.roadmap_data) + self.rules = get_project_rules(self.rules_data) + self.chatbot_config = self.config_data.get('chatbot', {}) if self.config_data else {} + self.model_config = self.config_data.get('model_selection', {}) if self.config_data else {} + self.response_config = self.config_data.get('response_generation', {}) if self.config_data else {} + self.available_models_config = self.config_data.get('available_models', {}) if self.config_data else {} + + self.current_phase = None + self.active_model_key = self.chatbot_config.get('default_llm_model_id') # Get default model key + self.active_model_info = self.available_models_config.get(self.active_model_key) # Get model info from config + self.max_response_tokens = self.chatbot_config.get('max_response_tokens', 200) # Get max tokens from config + + self.llm_model = None + self.llm_tokenizer = None + self.load_llm_model(self.active_model_info) + + + def load_llm_model(self, model_info): + """Loads the LLM model and tokenizer based on model_info.""" + if not model_info: + print("Error: Model information not provided.") + self.llm_model = None + self.llm_tokenizer = None + return + + model_id = model_info.get('model_id') + model_name = model_info.get('name') + if not model_id: + print(f"Error: 'model_id' not found for model: {model_name}") + self.llm_model = None + self.llm_tokenizer = None + return + + print(f"Loading model: {model_name} ({model_id})...") + try: + self.llm_tokenizer = AutoTokenizer.from_pretrained(model_id) + self.llm_model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto") # device_map="auto" for GPU/CPU handling + print(f"Model {model_name} loaded successfully.") + except Exception as e: + print(f"Error loading model {model_name} ({model_id}): {e}") + self.llm_model = None + self.llm_tokenizer = None + self.active_model_info = model_info + + def switch_llm_model(self, model_key): + """Switches the active LLM model based on the provided model key.""" + if model_key in self.available_models_config: + model_info = self.available_models_config[model_key] + print(f"Switching LLM model to: {model_info.get('name')}") + self.load_llm_model(model_info) + self.active_model_key = model_key + return f"Switched to model: {model_info.get('name')}" + else: + return f"Error: Model key '{model_key}' not found in available models." + + + def get_chatbot_greeting(self): + current_model_name = self.active_model_info.get('name', 'Unknown Model') if self.active_model_info else 'Unknown Model' + return f"Hello! I am the {self.chatbot_config.get('name', 'Project Guidance Chatbot')}. Currently using **{current_model_name}**. Max response tokens: {self.max_response_tokens}. {self.chatbot_config.get('description', 'How can I help you with your project?')}" + + def generate_llm_response(self, user_query): + """Generates a response using the currently active LLM.""" + if not self.llm_model or not self.llm_tokenizer: + return "LLM model not loaded. Please select a model." + try: + inputs = self.llm_tokenizer(user_query, return_tensors="pt").to(self.llm_model.device) + outputs = self.llm_model.generate(**inputs, max_length=self.max_response_tokens, num_beams=5, no_repeat_ngram_size=2, early_stopping=True) # Use max_response_tokens + response = self.llm_tokenizer.decode(outputs[0], skip_special_tokens=True) + return response + except Exception as e: + print(f"Error generating LLM response: {e}") + return f"Error generating response from LLM: {e}" + + def process_query(self, user_query): + if not self.phases: + return "Error: Roadmap data not loaded correctly." + if not self.rules: + return "Error: Rules data not loaded correctly." + + query_lower = user_query.lower() + + if "roadmap" in query_lower: + return self.get_roadmap_summary() + if "rules" in query_lower: + return self.get_rules_summary() + if "switch model to" in query_lower: + parts = user_query.lower().split("switch model to") + if len(parts) > 1: + model_name_or_key = parts[1].strip() + for key, model_data in self.available_models_config.items(): + if model_data['name'].lower() == model_name_or_key.lower() or key.lower() == model_name_or_key.lower(): + switch_result = self.switch_llm_model(key) + return switch_result + "\n" + self.get_chatbot_greeting() + return f"Model '{model_name_or_key}' not found in available models." + + + if self.current_phase: + current_phase_data = self.phases.get(self.current_phase) + if current_phase_data: + phase_response = self.get_phase_guidance(current_phase_data, user_query) + if phase_response: + return phase_response + + for phase_key, phase_data in self.phases.items(): + if phase_data['name'].lower() in query_lower: + self.current_phase = phase_key + return self.get_phase_introduction(phase_data) + + llm_response = self.generate_llm_response(user_query) + if llm_response: + return llm_response + + return self.response_config.get('default_instruction', "I can guide you through project phases. Ask me about a specific phase or project aspect.") + + def get_roadmap_summary(self): + summary = "Project Roadmap:\n" + for phase_key, phase_data in self.phases.items(): + summary += f"- **Phase: {phase_data['name']}**\n" + summary += f" Description: {phase_data['description']}\n" + summary += f" Milestones: {', '.join(phase_data['milestones'])}\n" + return summary + + def get_rules_summary(self): + summary = "Project Rules:\n" + for rule_category, rules_list in self.rules.items(): + summary += f"**{rule_category.capitalize()} Rules:**\n" + for rule_key, rule_text in rules_list.items(): + summary += f"- {rule_text}\n" + return summary + + def get_phase_introduction(self, phase_data): + return f"Okay, let's focus on **Phase: {phase_data['name']}**. \nDescription: {phase_data['description']}. \nKey milestones are: {', '.join(phase_data['milestones'])}. \nWhat would you like to know or do in this phase?" + + def get_phase_guidance(self, phase_data, user_query): + query_lower = user_query.lower() + + if "milestones" in query_lower: + return "The milestones for this phase are: " + ", ".join(phase_data['milestones']) + if "actions" in query_lower or "how to" in query_lower: + if 'actions' in phase_data: + return "Recommended actions for this phase: " + ", ".join(phase_data['actions']) + else: + return "No specific actions are listed for this phase in the roadmap." + if "code" in query_lower or "script" in query_lower: + if 'code_generation_hint' in phase_data: + template_filename_prefix = phase_data['name'].lower().replace(" ", "_") + template_filepath = os.path.join(self.code_templates_dir, f"{template_filename_prefix}_template.py.txt") + if os.path.exists(template_filepath): + code_snippet = self.generate_code_snippet(template_filepath, phase_data) + return "Here's a starting code snippet for this phase:\n\n```python\n" + code_snippet + "\n```\n\nRemember to adapt it to your specific needs." + else: + return f"A code template for this phase ({phase_data['name']}) is not yet available. However, the hint is: {phase_data['code_generation_hint']}" + else: + return "No code generation hint is available for this phase." + + return f"For phase '{phase_data['name']}', remember the description: {phase_data['description']}. Consider the milestones and actions. What specific aspect are you interested in?" + + + def generate_code_snippet(self, template_filepath, phase_data): + """Generates code snippet from a template file. (Simple template filling example)""" + try: + with open(template_filepath, 'r') as f: + template_content = f.read() + + code_snippet = template_content.replace("{{phase_name}}", phase_data['name']) + return code_snippet + except FileNotFoundError: + return f"Error: Code template file not found at {template_filepath}" + except Exception as e: + return f"Error generating code snippet: {e}" + + +# Example usage (for testing - remove or adjust for app.py) +if __name__ == '__main__': + chatbot = ProjectGuidanceChatbot( + roadmap_file="roadmap.yaml", + rules_file="rules.yaml", + config_file="configs/chatbot_config.yaml", + code_templates_dir="scripts/code_templates" + ) + print(chatbot.get_chatbot_greeting()) + + while True: + user_input = input("You: ") + if user_input.lower() == "exit": + break + response = chatbot.process_query(user_input) + print("Chatbot:", response) \ No newline at end of file diff --git a/.history/scripts/chatbot_logic_20250202072626.py b/.history/scripts/chatbot_logic_20250202072626.py new file mode 100644 index 0000000000000000000000000000000000000000..245363311e4f4530a5cf648af6667892e20e8065 --- /dev/null +++ b/.history/scripts/chatbot_logic_20250202072626.py @@ -0,0 +1,203 @@ +from scripts.parsing_utils import load_yaml_file, get_roadmap_phases, get_project_rules +import os +from transformers import AutoModelForCausalLM, AutoTokenizer # Import necessary classes + +class ProjectGuidanceChatbot: + def __init__(self, roadmap_file, rules_file, config_file, code_templates_dir): + self.roadmap_file = roadmap_file + self.rules_file = rules_file + self.config_file = config_file + self.code_templates_dir = code_templates_dir + + self.roadmap_data = load_yaml_file(self.roadmap_file) + self.rules_data = load_yaml_file(self.rules_file) + self.config_data = load_yaml_file(self.config_file) + + self.phases = get_roadmap_phases(self.roadmap_data) + self.rules = get_project_rules(self.rules_data) + self.chatbot_config = self.config_data.get('chatbot', {}) if self.config_data else {} + self.model_config = self.config_data.get('model_selection', {}) if self.config_data else {} + self.response_config = self.config_data.get('response_generation', {}) if self.config_data else {} + self.available_models_config = self.config_data.get('available_models', {}) if self.config_data else {} + + self.current_phase = None + self.active_model_key = self.chatbot_config.get('default_llm_model_id') # Get default model key + self.active_model_info = self.available_models_config.get(self.active_model_key) # Get model info from config + self.max_response_tokens = self.chatbot_config.get('max_response_tokens', 200) # Get max tokens from config + + self.llm_model = None + self.llm_tokenizer = None + self.load_llm_model(self.active_model_info) + + + def load_llm_model(self, model_info): + """Loads the LLM model and tokenizer based on model_info.""" + if not model_info: + print("Error: Model information not provided.") + self.llm_model = None + self.llm_tokenizer = None + return + + model_id = model_info.get('model_id') + model_name = model_info.get('name') + if not model_id: + print(f"Error: 'model_id' not found for model: {model_name}") + self.llm_model = None + self.llm_tokenizer = None + return + + print(f"Loading model: {model_name} ({model_id})...") + try: + self.llm_tokenizer = AutoTokenizer.from_pretrained(model_id) + self.llm_model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto") # device_map="auto" for GPU/CPU handling + print(f"Model {model_name} loaded successfully.") + except Exception as e: + print(f"Error loading model {model_name} ({model_id}): {e}") + self.llm_model = None + self.llm_tokenizer = None + self.active_model_info = model_info + + def switch_llm_model(self, model_key): + """Switches the active LLM model based on the provided model key.""" + if model_key in self.available_models_config: + model_info = self.available_models_config[model_key] + print(f"Switching LLM model to: {model_info.get('name')}") + self.load_llm_model(model_info) + self.active_model_key = model_key + return f"Switched to model: {model_info.get('name')}" + else: + return f"Error: Model key '{model_key}' not found in available models." + + + def get_chatbot_greeting(self): + current_model_name = self.active_model_info.get('name', 'Unknown Model') if self.active_model_info else 'Unknown Model' + return f"Hello! I am the {self.chatbot_config.get('name', 'Project Guidance Chatbot')}. Currently using **{current_model_name}**. Max response tokens: {self.max_response_tokens}. {self.chatbot_config.get('description', 'How can I help you with your project?')}" + + def generate_llm_response(self, user_query): + """Generates a response using the currently active LLM.""" + if not self.llm_model or not self.llm_tokenizer: + return "LLM model not loaded. Please select a model." + try: + inputs = self.llm_tokenizer(user_query, return_tensors="pt").to(self.llm_model.device) + outputs = self.llm_model.generate(**inputs, max_length=self.max_response_tokens, num_beams=5, no_repeat_ngram_size=2, early_stopping=True) # Use max_response_tokens + response = self.llm_tokenizer.decode(outputs[0], skip_special_tokens=True) + return response + except Exception as e: + print(f"Error generating LLM response: {e}") + return f"Error generating response from LLM: {e}" + + def process_query(self, user_query): + if not self.phases: + return "Error: Roadmap data not loaded correctly." + if not self.rules: + return "Error: Rules data not loaded correctly." + + query_lower = user_query.lower() + + if "roadmap" in query_lower: + return self.get_roadmap_summary() + if "rules" in query_lower: + return self.get_rules_summary() + if "switch model to" in query_lower: + parts = user_query.lower().split("switch model to") + if len(parts) > 1: + model_name_or_key = parts[1].strip() + for key, model_data in self.available_models_config.items(): + if model_data['name'].lower() == model_name_or_key.lower() or key.lower() == model_name_or_key.lower(): + switch_result = self.switch_llm_model(key) + return switch_result + "\n" + self.get_chatbot_greeting() + return f"Model '{model_name_or_key}' not found in available models." + + + if self.current_phase: + current_phase_data = self.phases.get(self.current_phase) + if current_phase_data: + phase_response = self.get_phase_guidance(current_phase_data, user_query) + if phase_response: + return phase_response + + for phase_key, phase_data in self.phases.items(): + if phase_data['name'].lower() in query_lower: + self.current_phase = phase_key + return self.get_phase_introduction(phase_data) + + llm_response = self.generate_llm_response(user_query) + if llm_response: + return llm_response + + return self.response_config.get('default_instruction', "I can guide you through project phases. Ask me about a specific phase or project aspect.") + + def get_roadmap_summary(self): + summary = "Project Roadmap:\n" + for phase_key, phase_data in self.phases.items(): + summary += f"- **Phase: {phase_data['name']}**\n" + summary += f" Description: {phase_data['description']}\n" + summary += f" Milestones: {', '.join(phase_data['milestones'])}\n" + return summary + + def get_rules_summary(self): + summary = "Project Rules:\n" + for rule_category, rules_list in self.rules.items(): + summary += f"**{rule_category.capitalize()} Rules:**\n" + for rule_key, rule_text in rules_list.items(): + summary += f"- {rule_text}\n" + return summary + + def get_phase_introduction(self, phase_data): + return f"Okay, let's focus on **Phase: {phase_data['name']}**. \nDescription: {phase_data['description']}. \nKey milestones are: {', '.join(phase_data['milestones'])}. \nWhat would you like to know or do in this phase?" + + def get_phase_guidance(self, phase_data, user_query): + query_lower = user_query.lower() + + if "milestones" in query_lower: + return "The milestones for this phase are: " + ", ".join(phase_data['milestones']) + if "actions" in query_lower or "how to" in query_lower: + if 'actions' in phase_data: + return "Recommended actions for this phase: " + ", ".join(phase_data['actions']) + else: + return "No specific actions are listed for this phase in the roadmap." + if "code" in query_lower or "script" in query_lower: + if 'code_generation_hint' in phase_data: + template_filename_prefix = phase_data['name'].lower().replace(" ", "_") + template_filepath = os.path.join(self.code_templates_dir, f"{template_filename_prefix}_template.py.txt") + if os.path.exists(template_filepath): + code_snippet = self.generate_code_snippet(template_filepath, phase_data) + return "Here's a starting code snippet for this phase:\n\n```python\n" + code_snippet + "\n```\n\nRemember to adapt it to your specific needs." + else: + return f"A code template for this phase ({phase_data['name']}) is not yet available. However, the hint is: {phase_data['code_generation_hint']}" + else: + return "No code generation hint is available for this phase." + + return f"For phase '{phase_data['name']}', remember the description: {phase_data['description']}. Consider the milestones and actions. What specific aspect are you interested in?" + + + def generate_code_snippet(self, template_filepath, phase_data): + """Generates code snippet from a template file. (Simple template filling example)""" + try: + with open(template_filepath, 'r') as f: + template_content = f.read() + + code_snippet = template_content.replace("{{phase_name}}", phase_data['name']) + return code_snippet + except FileNotFoundError: + return f"Error: Code template file not found at {template_filepath}" + except Exception as e: + return f"Error generating code snippet: {e}" + + +# Example usage (for testing - remove or adjust for app.py) +if __name__ == '__main__': + chatbot = ProjectGuidanceChatbot( + roadmap_file="roadmap.yaml", + rules_file="rules.yaml", + config_file="configs/chatbot_config.yaml", + code_templates_dir="scripts/code_templates" + ) + print(chatbot.get_chatbot_greeting()) + + while True: + user_input = input("You: ") + if user_input.lower() == "exit": + break + response = chatbot.process_query(user_input) + print("Chatbot:", response) \ No newline at end of file diff --git a/.history/scripts/code_templates/api_template.py_20250202065623.txt b/.history/scripts/code_templates/api_template.py_20250202065623.txt new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/.history/scripts/code_templates/api_template.py_20250202070107.txt b/.history/scripts/code_templates/api_template.py_20250202070107.txt new file mode 100644 index 0000000000000000000000000000000000000000..28b76b70c8dbd0c04c72b990baca6f6c7c78e54b --- /dev/null +++ b/.history/scripts/code_templates/api_template.py_20250202070107.txt @@ -0,0 +1,60 @@ +# Template for API integration script for {{phase_name}} (using Flask example) + +from flask import Flask, request, jsonify +from transformers import AutoModelForSequenceClassification, AutoTokenizer +import torch # Example PyTorch + +app = Flask(__name__) + +# --- Model and Tokenizer Loading --- +model_name = "models/fine_tuned_model" # Replace with your actual model path +tokenizer_name = "bert-base-uncased" # Replace with the tokenizer used for training, likely the base model tokenizer +try: + tokenizer = AutoTokenizer.from_pretrained(tokenizer_name) + model = AutoModelForSequenceClassification.from_pretrained(model_name) + print("Model and tokenizer loaded successfully.") + model.eval() # Set model to evaluation mode +except Exception as e: + print(f"Error loading model or tokenizer: {e}") + tokenizer = None + model = None + + +@app.route('/predict', methods=['POST']) +def predict(): + if not tokenizer or not model: + return jsonify({"error": "Model or tokenizer not loaded."}), 500 + + try: + data = request.get_json() + text = data.get('text') + + if not text: + return jsonify({"error": "No text input provided."}), 400 + + inputs = tokenizer(text, padding=True, truncation=True, return_tensors="pt") # Tokenize input text + + with torch.no_grad(): # Inference mode + outputs = model(**inputs) + logits = outputs.logits + predicted_class_id = torch.argmax(logits, dim=-1).item() # Get predicted class + + # --- Map class ID to label (if applicable) --- + # Example for binary classification (class 0 and 1) + labels = ["Negative", "Positive"] # Replace with your actual labels + predicted_label = labels[predicted_class_id] if predicted_class_id < len(labels) else f"Class {predicted_class_id}" + + + return jsonify({"prediction": predicted_label, "class_id": predicted_class_id}) + + except Exception as e: + print(f"Prediction error: {e}") + return jsonify({"error": "Error during prediction."}), 500 + +@app.route('/', methods=['GET']) +def health_check(): + return jsonify({"status": "API is healthy"}), 200 + + +if __name__ == '__main__': + app.run(debug=False, host='0.0.0.0', port=5000) # Run Flask app \ No newline at end of file diff --git a/.history/scripts/code_templates/api_template.py_20250202072103.txt b/.history/scripts/code_templates/api_template.py_20250202072103.txt new file mode 100644 index 0000000000000000000000000000000000000000..28b76b70c8dbd0c04c72b990baca6f6c7c78e54b --- /dev/null +++ b/.history/scripts/code_templates/api_template.py_20250202072103.txt @@ -0,0 +1,60 @@ +# Template for API integration script for {{phase_name}} (using Flask example) + +from flask import Flask, request, jsonify +from transformers import AutoModelForSequenceClassification, AutoTokenizer +import torch # Example PyTorch + +app = Flask(__name__) + +# --- Model and Tokenizer Loading --- +model_name = "models/fine_tuned_model" # Replace with your actual model path +tokenizer_name = "bert-base-uncased" # Replace with the tokenizer used for training, likely the base model tokenizer +try: + tokenizer = AutoTokenizer.from_pretrained(tokenizer_name) + model = AutoModelForSequenceClassification.from_pretrained(model_name) + print("Model and tokenizer loaded successfully.") + model.eval() # Set model to evaluation mode +except Exception as e: + print(f"Error loading model or tokenizer: {e}") + tokenizer = None + model = None + + +@app.route('/predict', methods=['POST']) +def predict(): + if not tokenizer or not model: + return jsonify({"error": "Model or tokenizer not loaded."}), 500 + + try: + data = request.get_json() + text = data.get('text') + + if not text: + return jsonify({"error": "No text input provided."}), 400 + + inputs = tokenizer(text, padding=True, truncation=True, return_tensors="pt") # Tokenize input text + + with torch.no_grad(): # Inference mode + outputs = model(**inputs) + logits = outputs.logits + predicted_class_id = torch.argmax(logits, dim=-1).item() # Get predicted class + + # --- Map class ID to label (if applicable) --- + # Example for binary classification (class 0 and 1) + labels = ["Negative", "Positive"] # Replace with your actual labels + predicted_label = labels[predicted_class_id] if predicted_class_id < len(labels) else f"Class {predicted_class_id}" + + + return jsonify({"prediction": predicted_label, "class_id": predicted_class_id}) + + except Exception as e: + print(f"Prediction error: {e}") + return jsonify({"error": "Error during prediction."}), 500 + +@app.route('/', methods=['GET']) +def health_check(): + return jsonify({"status": "API is healthy"}), 200 + + +if __name__ == '__main__': + app.run(debug=False, host='0.0.0.0', port=5000) # Run Flask app \ No newline at end of file diff --git a/.history/scripts/code_templates/evaluation_template.py_20250202065617.txt b/.history/scripts/code_templates/evaluation_template.py_20250202065617.txt new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/.history/scripts/code_templates/evaluation_template.py_20250202070058.txt b/.history/scripts/code_templates/evaluation_template.py_20250202070058.txt new file mode 100644 index 0000000000000000000000000000000000000000..274cab5bb2528ed6f6b7cbba767f7615972848fb --- /dev/null +++ b/.history/scripts/code_templates/evaluation_template.py_20250202070058.txt @@ -0,0 +1,67 @@ +# Template for model evaluation script for {{phase_name}} + +from transformers import AutoModelForSequenceClassification, AutoTokenizer +from datasets import load_dataset # Example datasets library +from sklearn.metrics import accuracy_score, classification_report # Example metrics +import torch # Example PyTorch +# Add other necessary imports + +def evaluate_model(model_path, dataset_path, model_name="bert-base-uncased"): + """ + Evaluates a trained model on a dataset. + """ + try: + # Load dataset for evaluation (replace with your actual dataset loading) + dataset = load_dataset('csv', data_files=dataset_path) # Example: CSV dataset loading, replace with your dataset format + + print("Evaluation dataset loaded. Loading model and tokenizer...") + + tokenizer = AutoTokenizer.from_pretrained(model_name) # Use base model tokenizer (or fine-tuned tokenizer if saved separately) + model = AutoModelForSequenceClassification.from_pretrained(model_path) + + def tokenize_function(examples): + return tokenizer(examples["text_column"], padding="max_length", truncation=True) # Example: tokenize 'text_column' + + tokenized_datasets = dataset.map(tokenize_function, batched=True) + + def compute_metrics(eval_pred): + predictions, labels = eval_pred + predictions = predictions.argmax(axis=-1) + accuracy = accuracy_score(labels, predictions) + report = classification_report(labels, predictions, output_dict=True) # Detailed report + return {"accuracy": accuracy, "classification_report": report} + + training_args = TrainingArguments( + output_dir="./evaluation_results", + per_device_eval_batch_size=64, + logging_dir='./eval_logs', + ) + + trainer = Trainer( + model=model, + args=training_args, + eval_dataset=tokenized_datasets["validation"], # Assuming 'validation' split exists + compute_metrics=compute_metrics, + tokenizer=tokenizer + ) + + evaluation_results = trainer.evaluate() + + print("Model evaluation completed.") + print("Evaluation Results:") + print(f"Accuracy: {evaluation_results['eval_accuracy']}") + print("Classification Report:\n", evaluation_results['eval_classification_report']) + + + except FileNotFoundError: + print(f"Error: Dataset file or model files not found.") + except Exception as e: + print(f"Error during model evaluation: {e}") + + +if __name__ == "__main__": + model_filepath = "models/fine_tuned_model" # Replace with your model path + evaluation_data_filepath = "data/evaluation_dataset.csv" # Replace with your evaluation data path + base_model_name = "bert-base-uncased" # Replace with your base model name + + evaluate_model(model_filepath, evaluation_data_filepath, model_name=base_model_name) \ No newline at end of file diff --git a/.history/scripts/code_templates/evaluation_template.py_20250202072054.txt b/.history/scripts/code_templates/evaluation_template.py_20250202072054.txt new file mode 100644 index 0000000000000000000000000000000000000000..274cab5bb2528ed6f6b7cbba767f7615972848fb --- /dev/null +++ b/.history/scripts/code_templates/evaluation_template.py_20250202072054.txt @@ -0,0 +1,67 @@ +# Template for model evaluation script for {{phase_name}} + +from transformers import AutoModelForSequenceClassification, AutoTokenizer +from datasets import load_dataset # Example datasets library +from sklearn.metrics import accuracy_score, classification_report # Example metrics +import torch # Example PyTorch +# Add other necessary imports + +def evaluate_model(model_path, dataset_path, model_name="bert-base-uncased"): + """ + Evaluates a trained model on a dataset. + """ + try: + # Load dataset for evaluation (replace with your actual dataset loading) + dataset = load_dataset('csv', data_files=dataset_path) # Example: CSV dataset loading, replace with your dataset format + + print("Evaluation dataset loaded. Loading model and tokenizer...") + + tokenizer = AutoTokenizer.from_pretrained(model_name) # Use base model tokenizer (or fine-tuned tokenizer if saved separately) + model = AutoModelForSequenceClassification.from_pretrained(model_path) + + def tokenize_function(examples): + return tokenizer(examples["text_column"], padding="max_length", truncation=True) # Example: tokenize 'text_column' + + tokenized_datasets = dataset.map(tokenize_function, batched=True) + + def compute_metrics(eval_pred): + predictions, labels = eval_pred + predictions = predictions.argmax(axis=-1) + accuracy = accuracy_score(labels, predictions) + report = classification_report(labels, predictions, output_dict=True) # Detailed report + return {"accuracy": accuracy, "classification_report": report} + + training_args = TrainingArguments( + output_dir="./evaluation_results", + per_device_eval_batch_size=64, + logging_dir='./eval_logs', + ) + + trainer = Trainer( + model=model, + args=training_args, + eval_dataset=tokenized_datasets["validation"], # Assuming 'validation' split exists + compute_metrics=compute_metrics, + tokenizer=tokenizer + ) + + evaluation_results = trainer.evaluate() + + print("Model evaluation completed.") + print("Evaluation Results:") + print(f"Accuracy: {evaluation_results['eval_accuracy']}") + print("Classification Report:\n", evaluation_results['eval_classification_report']) + + + except FileNotFoundError: + print(f"Error: Dataset file or model files not found.") + except Exception as e: + print(f"Error during model evaluation: {e}") + + +if __name__ == "__main__": + model_filepath = "models/fine_tuned_model" # Replace with your model path + evaluation_data_filepath = "data/evaluation_dataset.csv" # Replace with your evaluation data path + base_model_name = "bert-base-uncased" # Replace with your base model name + + evaluate_model(model_filepath, evaluation_data_filepath, model_name=base_model_name) \ No newline at end of file diff --git a/.history/scripts/code_templates/preprocessing_template.py_20250202065605.txt b/.history/scripts/code_templates/preprocessing_template.py_20250202065605.txt new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/.history/scripts/code_templates/preprocessing_template.py_20250202070023.txt b/.history/scripts/code_templates/preprocessing_template.py_20250202070023.txt new file mode 100644 index 0000000000000000000000000000000000000000..9f6fda82d1fb12bff9a3ff41401186aef563d92c --- /dev/null +++ b/.history/scripts/code_templates/preprocessing_template.py_20250202070023.txt @@ -0,0 +1,44 @@ +# Template for data preprocessing script for {{phase_name}} + +import pandas as pd +# Add other necessary imports + +def preprocess_data(raw_data_path, processed_data_path): + """ + Reads raw data, preprocesses it, and saves the processed data. + """ + try: + # Load raw data (replace with your actual data loading) + data = pd.read_csv(raw_data_path) # Example: CSV loading + + print("Data loaded successfully. Starting preprocessing...") + + # --- Data Preprocessing Steps --- + # Example steps (customize based on your data and project) + + # 1. Handle missing values + data = data.fillna(0) # Example: fill NaN with 0 + + # 2. Feature engineering (example: create a new feature) + data['feature_length'] = data['text_column'].str.len() # Example: length of text column + + # 3. Text cleaning (if applicable - example: lowercasing) + if 'text_column' in data.columns: + data['text_column'] = data['text_column'].str.lower() + + # --- End of Preprocessing Steps --- + + # Save processed data + data.to_csv(processed_data_path, index=False) + print(f"Processed data saved to {processed_data_path}") + + except FileNotFoundError: + print(f"Error: Raw data file not found at {raw_data_path}") + except Exception as e: + print(f"Error during data preprocessing: {e}") + +if __name__ == "__main__": + raw_data_filepath = "data/raw_dataset.csv" # Replace with your raw data path + processed_data_filepath = "data/processed_dataset.csv" # Replace with your desired output path + + preprocess_data(raw_data_filepath, processed_data_filepath) \ No newline at end of file diff --git a/.history/scripts/code_templates/preprocessing_template.py_20250202072023.txt b/.history/scripts/code_templates/preprocessing_template.py_20250202072023.txt new file mode 100644 index 0000000000000000000000000000000000000000..9f6fda82d1fb12bff9a3ff41401186aef563d92c --- /dev/null +++ b/.history/scripts/code_templates/preprocessing_template.py_20250202072023.txt @@ -0,0 +1,44 @@ +# Template for data preprocessing script for {{phase_name}} + +import pandas as pd +# Add other necessary imports + +def preprocess_data(raw_data_path, processed_data_path): + """ + Reads raw data, preprocesses it, and saves the processed data. + """ + try: + # Load raw data (replace with your actual data loading) + data = pd.read_csv(raw_data_path) # Example: CSV loading + + print("Data loaded successfully. Starting preprocessing...") + + # --- Data Preprocessing Steps --- + # Example steps (customize based on your data and project) + + # 1. Handle missing values + data = data.fillna(0) # Example: fill NaN with 0 + + # 2. Feature engineering (example: create a new feature) + data['feature_length'] = data['text_column'].str.len() # Example: length of text column + + # 3. Text cleaning (if applicable - example: lowercasing) + if 'text_column' in data.columns: + data['text_column'] = data['text_column'].str.lower() + + # --- End of Preprocessing Steps --- + + # Save processed data + data.to_csv(processed_data_path, index=False) + print(f"Processed data saved to {processed_data_path}") + + except FileNotFoundError: + print(f"Error: Raw data file not found at {raw_data_path}") + except Exception as e: + print(f"Error during data preprocessing: {e}") + +if __name__ == "__main__": + raw_data_filepath = "data/raw_dataset.csv" # Replace with your raw data path + processed_data_filepath = "data/processed_dataset.csv" # Replace with your desired output path + + preprocess_data(raw_data_filepath, processed_data_filepath) \ No newline at end of file diff --git a/.history/scripts/code_templates/training_template.py_20250202065612.txt b/.history/scripts/code_templates/training_template.py_20250202065612.txt new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/.history/scripts/code_templates/training_template.py_20250202070043.txt b/.history/scripts/code_templates/training_template.py_20250202070043.txt new file mode 100644 index 0000000000000000000000000000000000000000..0e96daf6759ecfb4b291e8b0007f8a02463b24de --- /dev/null +++ b/.history/scripts/code_templates/training_template.py_20250202070043.txt @@ -0,0 +1,58 @@ +# Template for model training script for {{phase_name}} + +from transformers import AutoModelForSequenceClassification, AutoTokenizer, TrainingArguments, Trainer +from datasets import load_dataset # Example - datasets library +import torch # Example - PyTorch +# Add other necessary imports + +def train_model(processed_dataset_path, model_name="bert-base-uncased", output_dir="./model_output"): + """ + Trains a model on the processed dataset. + """ + try: + # Load processed dataset (replace with your actual dataset loading) + dataset = load_dataset('csv', data_files=processed_dataset_path) # Example: CSV dataset loading, replace with your dataset format + + print("Dataset loaded. Preparing model and training...") + + tokenizer = AutoTokenizer.from_pretrained(model_name) + model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2) # Example: binary classification + + def tokenize_function(examples): + return tokenizer(examples["text_column"], padding="max_length", truncation=True) # Example: tokenize 'text_column' + + tokenized_datasets = dataset.map(tokenize_function, batched=True) + + training_args = TrainingArguments( + output_dir=output_dir, + num_train_epochs=3, # Example epochs + per_device_train_batch_size=16, # Example batch size + per_device_eval_batch_size=64, # Example batch size + warmup_steps=500, # Example warmup steps + weight_decay=0.01, # Example weight decay + logging_dir='./logs', # Directory for logs + logging_steps=10, + ) + + trainer = Trainer( + model=model, + args=training_args, + train_dataset=tokenized_datasets["train"], # Assuming 'train' split exists + eval_dataset=tokenized_datasets["validation"], # Assuming 'validation' split exists - optional + tokenizer=tokenizer, + ) + + trainer.train() + + print(f"Model training completed. Model saved to {output_dir}") + + except Exception as e: + print(f"Error during model training: {e}") + + +if __name__ == "__main__": + processed_data_filepath = "data/processed_dataset.csv" # Replace with your processed data path + model_output_directory = "models/fine_tuned_model" # Replace with your desired output directory + base_model_name = "bert-base-uncased" # Replace with your base model name + + train_model(processed_data_filepath, model_name=base_model_name, output_dir=model_output_directory) \ No newline at end of file diff --git a/.history/scripts/code_templates/training_template.py_20250202072034.txt b/.history/scripts/code_templates/training_template.py_20250202072034.txt new file mode 100644 index 0000000000000000000000000000000000000000..0e96daf6759ecfb4b291e8b0007f8a02463b24de --- /dev/null +++ b/.history/scripts/code_templates/training_template.py_20250202072034.txt @@ -0,0 +1,58 @@ +# Template for model training script for {{phase_name}} + +from transformers import AutoModelForSequenceClassification, AutoTokenizer, TrainingArguments, Trainer +from datasets import load_dataset # Example - datasets library +import torch # Example - PyTorch +# Add other necessary imports + +def train_model(processed_dataset_path, model_name="bert-base-uncased", output_dir="./model_output"): + """ + Trains a model on the processed dataset. + """ + try: + # Load processed dataset (replace with your actual dataset loading) + dataset = load_dataset('csv', data_files=processed_dataset_path) # Example: CSV dataset loading, replace with your dataset format + + print("Dataset loaded. Preparing model and training...") + + tokenizer = AutoTokenizer.from_pretrained(model_name) + model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2) # Example: binary classification + + def tokenize_function(examples): + return tokenizer(examples["text_column"], padding="max_length", truncation=True) # Example: tokenize 'text_column' + + tokenized_datasets = dataset.map(tokenize_function, batched=True) + + training_args = TrainingArguments( + output_dir=output_dir, + num_train_epochs=3, # Example epochs + per_device_train_batch_size=16, # Example batch size + per_device_eval_batch_size=64, # Example batch size + warmup_steps=500, # Example warmup steps + weight_decay=0.01, # Example weight decay + logging_dir='./logs', # Directory for logs + logging_steps=10, + ) + + trainer = Trainer( + model=model, + args=training_args, + train_dataset=tokenized_datasets["train"], # Assuming 'train' split exists + eval_dataset=tokenized_datasets["validation"], # Assuming 'validation' split exists - optional + tokenizer=tokenizer, + ) + + trainer.train() + + print(f"Model training completed. Model saved to {output_dir}") + + except Exception as e: + print(f"Error during model training: {e}") + + +if __name__ == "__main__": + processed_data_filepath = "data/processed_dataset.csv" # Replace with your processed data path + model_output_directory = "models/fine_tuned_model" # Replace with your desired output directory + base_model_name = "bert-base-uncased" # Replace with your base model name + + train_model(processed_data_filepath, model_name=base_model_name, output_dir=model_output_directory) \ No newline at end of file diff --git a/.history/scripts/parse_20250202061443.py b/.history/scripts/parse_20250202061443.py new file mode 100644 index 0000000000000000000000000000000000000000..81baba2b70387c786d9c5a94b79ca393050a26ff --- /dev/null +++ b/.history/scripts/parse_20250202061443.py @@ -0,0 +1 @@ +# File reading and parsing functions for roadmap.txt and rules.txt \ No newline at end of file diff --git a/.history/scripts/parsing_utils_20250202065553.py b/.history/scripts/parsing_utils_20250202065553.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/.history/scripts/parsing_utils_20250202065942.py b/.history/scripts/parsing_utils_20250202065942.py new file mode 100644 index 0000000000000000000000000000000000000000..16258b5dc1b1978cd4877f24f5fc434f550958b8 --- /dev/null +++ b/.history/scripts/parsing_utils_20250202065942.py @@ -0,0 +1,28 @@ +import yaml + +def load_yaml_file(filepath): + """Loads and parses a YAML file.""" + try: + with open(filepath, 'r') as f: + data = yaml.safe_load(f) + return data + except FileNotFoundError: + print(f"Error: File not found at {filepath}") + return None + except yaml.YAMLError as e: + print(f"Error parsing YAML file {filepath}: {e}") + return None + +def get_roadmap_phases(roadmap_data): + """Extracts phases from roadmap data.""" + if roadmap_data and 'roadmap' in roadmap_data: + return roadmap_data['roadmap'] + return None + +def get_project_rules(rules_data): + """Extracts project rules data.""" + if rules_data and 'project_rules' in rules_data: + return rules_data['project_rules'] + return None + +# You can add more parsing utility functions as needed \ No newline at end of file diff --git a/.history/scripts/parsing_utils_20250202071904.py b/.history/scripts/parsing_utils_20250202071904.py new file mode 100644 index 0000000000000000000000000000000000000000..16258b5dc1b1978cd4877f24f5fc434f550958b8 --- /dev/null +++ b/.history/scripts/parsing_utils_20250202071904.py @@ -0,0 +1,28 @@ +import yaml + +def load_yaml_file(filepath): + """Loads and parses a YAML file.""" + try: + with open(filepath, 'r') as f: + data = yaml.safe_load(f) + return data + except FileNotFoundError: + print(f"Error: File not found at {filepath}") + return None + except yaml.YAMLError as e: + print(f"Error parsing YAML file {filepath}: {e}") + return None + +def get_roadmap_phases(roadmap_data): + """Extracts phases from roadmap data.""" + if roadmap_data and 'roadmap' in roadmap_data: + return roadmap_data['roadmap'] + return None + +def get_project_rules(rules_data): + """Extracts project rules data.""" + if rules_data and 'project_rules' in rules_data: + return rules_data['project_rules'] + return None + +# You can add more parsing utility functions as needed \ No newline at end of file diff --git a/.history/scripts/parsing_utils_20250202071913.py b/.history/scripts/parsing_utils_20250202071913.py new file mode 100644 index 0000000000000000000000000000000000000000..16258b5dc1b1978cd4877f24f5fc434f550958b8 --- /dev/null +++ b/.history/scripts/parsing_utils_20250202071913.py @@ -0,0 +1,28 @@ +import yaml + +def load_yaml_file(filepath): + """Loads and parses a YAML file.""" + try: + with open(filepath, 'r') as f: + data = yaml.safe_load(f) + return data + except FileNotFoundError: + print(f"Error: File not found at {filepath}") + return None + except yaml.YAMLError as e: + print(f"Error parsing YAML file {filepath}: {e}") + return None + +def get_roadmap_phases(roadmap_data): + """Extracts phases from roadmap data.""" + if roadmap_data and 'roadmap' in roadmap_data: + return roadmap_data['roadmap'] + return None + +def get_project_rules(rules_data): + """Extracts project rules data.""" + if rules_data and 'project_rules' in rules_data: + return rules_data['project_rules'] + return None + +# You can add more parsing utility functions as needed \ No newline at end of file diff --git a/.history/scripts/parsing_utils_20250202071948.py b/.history/scripts/parsing_utils_20250202071948.py new file mode 100644 index 0000000000000000000000000000000000000000..16258b5dc1b1978cd4877f24f5fc434f550958b8 --- /dev/null +++ b/.history/scripts/parsing_utils_20250202071948.py @@ -0,0 +1,28 @@ +import yaml + +def load_yaml_file(filepath): + """Loads and parses a YAML file.""" + try: + with open(filepath, 'r') as f: + data = yaml.safe_load(f) + return data + except FileNotFoundError: + print(f"Error: File not found at {filepath}") + return None + except yaml.YAMLError as e: + print(f"Error parsing YAML file {filepath}: {e}") + return None + +def get_roadmap_phases(roadmap_data): + """Extracts phases from roadmap data.""" + if roadmap_data and 'roadmap' in roadmap_data: + return roadmap_data['roadmap'] + return None + +def get_project_rules(rules_data): + """Extracts project rules data.""" + if rules_data and 'project_rules' in rules_data: + return rules_data['project_rules'] + return None + +# You can add more parsing utility functions as needed \ No newline at end of file diff --git a/README.md b/README.md index 7c548d5fa39d9751b660eb8b070d1ca2366b55c0..dc3a8d352419a3cf96bc1171d5e031d652b4bf6b 100644 --- a/README.md +++ b/README.md @@ -9,5 +9,53 @@ app_file: app.py pinned: false short_description: STTETTETE --- +# Custom AI Chatbot for Project Guidance -Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference +This project implements a custom AI chatbot designed to guide users through complex projects based on predefined roadmaps and rules. + +**Features:** + +* **Roadmap-based Guidance:** Follows a structured roadmap defined in `roadmap.yaml`. +* **Rule Enforcement:** Adheres to project rules defined in `rules.yaml`. +* **Dynamic Response Generation:** Provides context-aware and step-by-step guidance. +* **Code Snippet Generation:** Generates complete code snippets for project phases using templates. +* **LLM Selection:** Integrates with Hugging Face Hub for flexible LLM selection (DeepSeek and Gemini models). +* **Model Switching:** Allows users to switch between available LLMs via the UI. +* **Basic LLM Responses:** Generates responses using the selected LLM for general queries. +* **Token Control:** Limits LLM response length using `max_response_tokens` in `configs/chatbot_config.yaml`. +* **Error Handling:** Includes error handling for model loading and switching, with UI warnings. +* **Deployable on Hugging Face Spaces:** Built using Gradio for easy deployment. + +**Important Notes on Safety Settings:** + +* **Direct Safety Configuration Limited:** For the Hugging Face models used directly via `transformers` (DeepSeek and Gemini Flash), there are **no easily configurable, standardized "safety settings"** like "Harassment: None," "Hate: None," etc., available through the `transformers` library itself. +* **Model-Dependent Safety:** Safety behavior is primarily determined by how these models were trained and any inherent safety mechanisms built by their creators. +* **Basic Output Filtering (Possible Extension):** For a very rudimentary level of control, you could implement keyword-based output filtering as a post-processing step, but this is not implemented in this version. +* **Commercial APIs Offer More Control:** If you need fine-grained safety controls, consider using commercial LLM APIs (like Google AI Gemini API, OpenAI API), which often provide parameters to adjust safety filters in their API requests. + +**Getting Started:** + +1. **Clone this repository.** +2. **Install dependencies:** `pip install -r requirements.txt` +3. **Customize `roadmap.yaml` and `rules.yaml`** to define your project guidance. +4. **Configure `configs/chatbot_config.yaml`** to set up LLMs, token limits, and chatbot behavior. +5. **Run the Gradio app:** `python app.py` +6. **Deploy to Hugging Face Spaces** (refer to Hugging Face Spaces documentation). + +**Available Models:** + +* DeepSeek-R1-Distill-Llama-8B +* Gemini 2.0 Flash (Exp 01-21) + +**Further Development:** + +* Enhance LLM response generation for more context-aware and project-specific guidance. +* Implement more sophisticated state management to track user progress through the roadmap. +* Improve code generation with more dynamic templates and customization options. +* Develop a more advanced GUI or web-based interface. +* Add more LLMs to the selection pool. +* Implement more robust error handling and logging. +* **Explore and potentially integrate keyword-based output filtering for basic safety control.** +* **Investigate using commercial LLM APIs for more advanced safety settings and control.** + +**License:** [Your License] \ No newline at end of file diff --git a/app.py b/app.py index 538c45c20b5adcf8645c1eafdc985afb36367981..b49c7b96dfa162f93c3db22b0cac17706f10748a 100644 --- a/app.py +++ b/app.py @@ -1,36 +1,49 @@ -# /app.py - -from fastapi import FastAPI, HTTPException -from transformers import AutoModelForCausalLM, AutoTokenizer -from pydantic import BaseModel -import os -import logging - -# Setup logging -logging.basicConfig(level=logging.INFO) - -# FastAPI instance -app = FastAPI() - -# Load the model and tokenizer -MODEL_NAME = 'deepseek-ai/DeepSeek-R1-Distill-Qwen-7B' -tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) -model = AutoModelForCausalLM.from_pretrained(MODEL_NAME) - -class UserInput(BaseModel): - input_text: str - -@app.post("/predict") -def predict(input: UserInput): - try: - inputs = tokenizer(input.input_text, return_tensors="pt") - outputs = model.generate(inputs["input_ids"], max_length=100) - response = tokenizer.decode(outputs[0], skip_special_tokens=True) - return {"response": response} - except Exception as e: - logging.error(f"Error during prediction: {e}") - raise HTTPException(status_code=500, detail="Error generating response") - -@app.get("/") -def read_root(): - return {"message": "Welcome to the Custom AI Chatbot"} +import gradio as gr +from scripts.chatbot_logic import ProjectGuidanceChatbot + +# Initialize Chatbot +chatbot = ProjectGuidanceChatbot( + roadmap_file="roadmap.yaml", + rules_file="rules.yaml", + config_file="configs/chatbot_config.yaml", + code_templates_dir="scripts/code_templates" +) + +def respond(message, chat_history): + bot_message = chatbot.process_query(message) + chat_history.append((message, bot_message)) + return "", chat_history + +def switch_model(model_key): + model_switch_result = chatbot.switch_llm_model(model_key) # Get result message + greeting_message = chatbot.get_chatbot_greeting() + + if "Error:" in model_switch_result: # Check if result contains "Error:" + return gr.Warning(model_switch_result), greeting_message # Display error as Gradio Warning + else: + return None, greeting_message # No warning, just update greeting + +with gr.Blocks() as demo: + chatbot_greeting_md = gr.Markdown(chatbot.get_chatbot_greeting()) + gr.Markdown(f"# {chatbot.chatbot_config.get('name', 'Project Guidance Chatbot')}") + + model_choices = [(model['name'], key) for key, model in chatbot.available_models_config.items()] + model_dropdown = gr.Dropdown( + choices=model_choices, + value=chatbot.active_model_info['name'] if chatbot.active_model_info else None, + label="Select LLM Model" + ) + model_error_output = gr.Warning(visible=False) # Initially hidden warning component + model_dropdown.change( + fn=switch_model, + inputs=model_dropdown, + outputs=[model_error_output, chatbot_greeting_md] # Output both warning and greeting + ) + + chatbot_ui = gr.Chatbot() + msg = gr.Textbox() + clear = gr.ClearButton([msg, chatbot_ui]) + + msg.submit(respond, [msg, chatbot_ui], [msg, chatbot_ui]) + +demo.launch() \ No newline at end of file diff --git a/configs/chatbot_config.yaml b/configs/chatbot_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4a5b06421c9eba1414dae0fd1330ac676a19baab --- /dev/null +++ b/configs/chatbot_config.yaml @@ -0,0 +1,24 @@ +chatbot: + name: "Project Guidance Chatbot" + description: "Your helpful AI assistant for project completion with LLM selection and token control." + default_llm_model_id: "deepseek-r1-distill-llama-8b" + max_response_tokens: 200 # Maximum tokens for LLM generated responses + +available_models: + deepseek-r1-distill-llama-8b: + name: "DeepSeek-R1-Distill-Llama-8B" + model_id: "DeepSeek-AI/DeepSeek-R1-Distill-Llama-8B" + gemini-flash-01-21: + name: "Gemini 2.0 Flash (Exp 01-21)" + model_id: "google/gemini-2.0-flash-thinking-exp-01-21" + +model_selection: + suggested_models: + - "mistralai/Mistral-7B-Instruct-v0.2" + - "google/flan-t5-xl" + - "facebook/bart-large" + criteria_prompt: "Consider these criteria when selecting a model: {rules.model_selection}" + +response_generation: + error_message: "Sorry, I encountered an issue. Please check your input and project files." + default_instruction: "How can I help you with your project?" \ No newline at end of file diff --git a/configs/requirments.txt b/configs/requirments.txt deleted file mode 100644 index b236672a1ca7095ea6e1f469dceb9206a2e8cced..0000000000000000000000000000000000000000 --- a/configs/requirments.txt +++ /dev/null @@ -1,8 +0,0 @@ -# /configs/requirements.txt - -fastapi -uvicorn -transformers -torch -datasets -pydantic diff --git a/details.txt b/details.txt new file mode 100644 index 0000000000000000000000000000000000000000..cb54dbcbfd5f9f74cc52e23befe7f174ccb5dfb8 --- /dev/null +++ b/details.txt @@ -0,0 +1,21 @@ +custom-llm-project/ +├── data/ +│ └── # (Optional: Datasets or example data - currently empty) +├── models/ +│ └── # (Optional: Could store cached models or local models in future) +├── scripts/ +│ ├── chatbot_logic.py # Core chatbot logic (parsing, response generation, code gen) +│ ├── parsing_utils.py # Utility functions for parsing roadmap and rules +│ └── code_templates/ # Directory for code templates +│ ├── preprocessing_template.py.txt +│ ├── training_template.py.txt +│ ├── evaluation_template.py.txt +│ └── api_template.py.txt +├── configs/ +│ └── chatbot_config.yaml # Configuration for chatbot behavior, LLM selection, etc. +├── api/ # (Placeholder for future API integration - currently empty) +├── roadmap.yaml # Project roadmap (YAML format) +├── rules.yaml # Project rules (YAML format) +├── requirements.txt # Python dependencies +├── app.py # Gradio application script (main entry point for HF Spaces) +└── README.md # Project README file (documentation) \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 42d7f7b6985ff7d6b595d05c9553cd49e122541e..5d8862d108b3c864927a138520743f9f6d37fdec 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,8 +1,4 @@ -# requirements.txt - -fastapi -uvicorn -transformers -torch -datasets -pydantic +gradio +PyYAML +transformers # For interacting with Hugging Face models (if needed directly) +torch \ No newline at end of file diff --git a/configs/roadmap.txt b/roadmap.txt similarity index 100% rename from configs/roadmap.txt rename to roadmap.txt diff --git a/roadmap.yaml b/roadmap.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/configs/rules.txt b/rules.txt similarity index 100% rename from configs/rules.txt rename to rules.txt diff --git a/rules.yaml b/rules.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/scripts/chatbot_logic.py b/scripts/chatbot_logic.py new file mode 100644 index 0000000000000000000000000000000000000000..245363311e4f4530a5cf648af6667892e20e8065 --- /dev/null +++ b/scripts/chatbot_logic.py @@ -0,0 +1,203 @@ +from scripts.parsing_utils import load_yaml_file, get_roadmap_phases, get_project_rules +import os +from transformers import AutoModelForCausalLM, AutoTokenizer # Import necessary classes + +class ProjectGuidanceChatbot: + def __init__(self, roadmap_file, rules_file, config_file, code_templates_dir): + self.roadmap_file = roadmap_file + self.rules_file = rules_file + self.config_file = config_file + self.code_templates_dir = code_templates_dir + + self.roadmap_data = load_yaml_file(self.roadmap_file) + self.rules_data = load_yaml_file(self.rules_file) + self.config_data = load_yaml_file(self.config_file) + + self.phases = get_roadmap_phases(self.roadmap_data) + self.rules = get_project_rules(self.rules_data) + self.chatbot_config = self.config_data.get('chatbot', {}) if self.config_data else {} + self.model_config = self.config_data.get('model_selection', {}) if self.config_data else {} + self.response_config = self.config_data.get('response_generation', {}) if self.config_data else {} + self.available_models_config = self.config_data.get('available_models', {}) if self.config_data else {} + + self.current_phase = None + self.active_model_key = self.chatbot_config.get('default_llm_model_id') # Get default model key + self.active_model_info = self.available_models_config.get(self.active_model_key) # Get model info from config + self.max_response_tokens = self.chatbot_config.get('max_response_tokens', 200) # Get max tokens from config + + self.llm_model = None + self.llm_tokenizer = None + self.load_llm_model(self.active_model_info) + + + def load_llm_model(self, model_info): + """Loads the LLM model and tokenizer based on model_info.""" + if not model_info: + print("Error: Model information not provided.") + self.llm_model = None + self.llm_tokenizer = None + return + + model_id = model_info.get('model_id') + model_name = model_info.get('name') + if not model_id: + print(f"Error: 'model_id' not found for model: {model_name}") + self.llm_model = None + self.llm_tokenizer = None + return + + print(f"Loading model: {model_name} ({model_id})...") + try: + self.llm_tokenizer = AutoTokenizer.from_pretrained(model_id) + self.llm_model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto") # device_map="auto" for GPU/CPU handling + print(f"Model {model_name} loaded successfully.") + except Exception as e: + print(f"Error loading model {model_name} ({model_id}): {e}") + self.llm_model = None + self.llm_tokenizer = None + self.active_model_info = model_info + + def switch_llm_model(self, model_key): + """Switches the active LLM model based on the provided model key.""" + if model_key in self.available_models_config: + model_info = self.available_models_config[model_key] + print(f"Switching LLM model to: {model_info.get('name')}") + self.load_llm_model(model_info) + self.active_model_key = model_key + return f"Switched to model: {model_info.get('name')}" + else: + return f"Error: Model key '{model_key}' not found in available models." + + + def get_chatbot_greeting(self): + current_model_name = self.active_model_info.get('name', 'Unknown Model') if self.active_model_info else 'Unknown Model' + return f"Hello! I am the {self.chatbot_config.get('name', 'Project Guidance Chatbot')}. Currently using **{current_model_name}**. Max response tokens: {self.max_response_tokens}. {self.chatbot_config.get('description', 'How can I help you with your project?')}" + + def generate_llm_response(self, user_query): + """Generates a response using the currently active LLM.""" + if not self.llm_model or not self.llm_tokenizer: + return "LLM model not loaded. Please select a model." + try: + inputs = self.llm_tokenizer(user_query, return_tensors="pt").to(self.llm_model.device) + outputs = self.llm_model.generate(**inputs, max_length=self.max_response_tokens, num_beams=5, no_repeat_ngram_size=2, early_stopping=True) # Use max_response_tokens + response = self.llm_tokenizer.decode(outputs[0], skip_special_tokens=True) + return response + except Exception as e: + print(f"Error generating LLM response: {e}") + return f"Error generating response from LLM: {e}" + + def process_query(self, user_query): + if not self.phases: + return "Error: Roadmap data not loaded correctly." + if not self.rules: + return "Error: Rules data not loaded correctly." + + query_lower = user_query.lower() + + if "roadmap" in query_lower: + return self.get_roadmap_summary() + if "rules" in query_lower: + return self.get_rules_summary() + if "switch model to" in query_lower: + parts = user_query.lower().split("switch model to") + if len(parts) > 1: + model_name_or_key = parts[1].strip() + for key, model_data in self.available_models_config.items(): + if model_data['name'].lower() == model_name_or_key.lower() or key.lower() == model_name_or_key.lower(): + switch_result = self.switch_llm_model(key) + return switch_result + "\n" + self.get_chatbot_greeting() + return f"Model '{model_name_or_key}' not found in available models." + + + if self.current_phase: + current_phase_data = self.phases.get(self.current_phase) + if current_phase_data: + phase_response = self.get_phase_guidance(current_phase_data, user_query) + if phase_response: + return phase_response + + for phase_key, phase_data in self.phases.items(): + if phase_data['name'].lower() in query_lower: + self.current_phase = phase_key + return self.get_phase_introduction(phase_data) + + llm_response = self.generate_llm_response(user_query) + if llm_response: + return llm_response + + return self.response_config.get('default_instruction', "I can guide you through project phases. Ask me about a specific phase or project aspect.") + + def get_roadmap_summary(self): + summary = "Project Roadmap:\n" + for phase_key, phase_data in self.phases.items(): + summary += f"- **Phase: {phase_data['name']}**\n" + summary += f" Description: {phase_data['description']}\n" + summary += f" Milestones: {', '.join(phase_data['milestones'])}\n" + return summary + + def get_rules_summary(self): + summary = "Project Rules:\n" + for rule_category, rules_list in self.rules.items(): + summary += f"**{rule_category.capitalize()} Rules:**\n" + for rule_key, rule_text in rules_list.items(): + summary += f"- {rule_text}\n" + return summary + + def get_phase_introduction(self, phase_data): + return f"Okay, let's focus on **Phase: {phase_data['name']}**. \nDescription: {phase_data['description']}. \nKey milestones are: {', '.join(phase_data['milestones'])}. \nWhat would you like to know or do in this phase?" + + def get_phase_guidance(self, phase_data, user_query): + query_lower = user_query.lower() + + if "milestones" in query_lower: + return "The milestones for this phase are: " + ", ".join(phase_data['milestones']) + if "actions" in query_lower or "how to" in query_lower: + if 'actions' in phase_data: + return "Recommended actions for this phase: " + ", ".join(phase_data['actions']) + else: + return "No specific actions are listed for this phase in the roadmap." + if "code" in query_lower or "script" in query_lower: + if 'code_generation_hint' in phase_data: + template_filename_prefix = phase_data['name'].lower().replace(" ", "_") + template_filepath = os.path.join(self.code_templates_dir, f"{template_filename_prefix}_template.py.txt") + if os.path.exists(template_filepath): + code_snippet = self.generate_code_snippet(template_filepath, phase_data) + return "Here's a starting code snippet for this phase:\n\n```python\n" + code_snippet + "\n```\n\nRemember to adapt it to your specific needs." + else: + return f"A code template for this phase ({phase_data['name']}) is not yet available. However, the hint is: {phase_data['code_generation_hint']}" + else: + return "No code generation hint is available for this phase." + + return f"For phase '{phase_data['name']}', remember the description: {phase_data['description']}. Consider the milestones and actions. What specific aspect are you interested in?" + + + def generate_code_snippet(self, template_filepath, phase_data): + """Generates code snippet from a template file. (Simple template filling example)""" + try: + with open(template_filepath, 'r') as f: + template_content = f.read() + + code_snippet = template_content.replace("{{phase_name}}", phase_data['name']) + return code_snippet + except FileNotFoundError: + return f"Error: Code template file not found at {template_filepath}" + except Exception as e: + return f"Error generating code snippet: {e}" + + +# Example usage (for testing - remove or adjust for app.py) +if __name__ == '__main__': + chatbot = ProjectGuidanceChatbot( + roadmap_file="roadmap.yaml", + rules_file="rules.yaml", + config_file="configs/chatbot_config.yaml", + code_templates_dir="scripts/code_templates" + ) + print(chatbot.get_chatbot_greeting()) + + while True: + user_input = input("You: ") + if user_input.lower() == "exit": + break + response = chatbot.process_query(user_input) + print("Chatbot:", response) \ No newline at end of file diff --git a/scripts/code_templates/api_template.py.txt b/scripts/code_templates/api_template.py.txt new file mode 100644 index 0000000000000000000000000000000000000000..28b76b70c8dbd0c04c72b990baca6f6c7c78e54b --- /dev/null +++ b/scripts/code_templates/api_template.py.txt @@ -0,0 +1,60 @@ +# Template for API integration script for {{phase_name}} (using Flask example) + +from flask import Flask, request, jsonify +from transformers import AutoModelForSequenceClassification, AutoTokenizer +import torch # Example PyTorch + +app = Flask(__name__) + +# --- Model and Tokenizer Loading --- +model_name = "models/fine_tuned_model" # Replace with your actual model path +tokenizer_name = "bert-base-uncased" # Replace with the tokenizer used for training, likely the base model tokenizer +try: + tokenizer = AutoTokenizer.from_pretrained(tokenizer_name) + model = AutoModelForSequenceClassification.from_pretrained(model_name) + print("Model and tokenizer loaded successfully.") + model.eval() # Set model to evaluation mode +except Exception as e: + print(f"Error loading model or tokenizer: {e}") + tokenizer = None + model = None + + +@app.route('/predict', methods=['POST']) +def predict(): + if not tokenizer or not model: + return jsonify({"error": "Model or tokenizer not loaded."}), 500 + + try: + data = request.get_json() + text = data.get('text') + + if not text: + return jsonify({"error": "No text input provided."}), 400 + + inputs = tokenizer(text, padding=True, truncation=True, return_tensors="pt") # Tokenize input text + + with torch.no_grad(): # Inference mode + outputs = model(**inputs) + logits = outputs.logits + predicted_class_id = torch.argmax(logits, dim=-1).item() # Get predicted class + + # --- Map class ID to label (if applicable) --- + # Example for binary classification (class 0 and 1) + labels = ["Negative", "Positive"] # Replace with your actual labels + predicted_label = labels[predicted_class_id] if predicted_class_id < len(labels) else f"Class {predicted_class_id}" + + + return jsonify({"prediction": predicted_label, "class_id": predicted_class_id}) + + except Exception as e: + print(f"Prediction error: {e}") + return jsonify({"error": "Error during prediction."}), 500 + +@app.route('/', methods=['GET']) +def health_check(): + return jsonify({"status": "API is healthy"}), 200 + + +if __name__ == '__main__': + app.run(debug=False, host='0.0.0.0', port=5000) # Run Flask app \ No newline at end of file diff --git a/scripts/code_templates/evaluation_template.py.txt b/scripts/code_templates/evaluation_template.py.txt new file mode 100644 index 0000000000000000000000000000000000000000..274cab5bb2528ed6f6b7cbba767f7615972848fb --- /dev/null +++ b/scripts/code_templates/evaluation_template.py.txt @@ -0,0 +1,67 @@ +# Template for model evaluation script for {{phase_name}} + +from transformers import AutoModelForSequenceClassification, AutoTokenizer +from datasets import load_dataset # Example datasets library +from sklearn.metrics import accuracy_score, classification_report # Example metrics +import torch # Example PyTorch +# Add other necessary imports + +def evaluate_model(model_path, dataset_path, model_name="bert-base-uncased"): + """ + Evaluates a trained model on a dataset. + """ + try: + # Load dataset for evaluation (replace with your actual dataset loading) + dataset = load_dataset('csv', data_files=dataset_path) # Example: CSV dataset loading, replace with your dataset format + + print("Evaluation dataset loaded. Loading model and tokenizer...") + + tokenizer = AutoTokenizer.from_pretrained(model_name) # Use base model tokenizer (or fine-tuned tokenizer if saved separately) + model = AutoModelForSequenceClassification.from_pretrained(model_path) + + def tokenize_function(examples): + return tokenizer(examples["text_column"], padding="max_length", truncation=True) # Example: tokenize 'text_column' + + tokenized_datasets = dataset.map(tokenize_function, batched=True) + + def compute_metrics(eval_pred): + predictions, labels = eval_pred + predictions = predictions.argmax(axis=-1) + accuracy = accuracy_score(labels, predictions) + report = classification_report(labels, predictions, output_dict=True) # Detailed report + return {"accuracy": accuracy, "classification_report": report} + + training_args = TrainingArguments( + output_dir="./evaluation_results", + per_device_eval_batch_size=64, + logging_dir='./eval_logs', + ) + + trainer = Trainer( + model=model, + args=training_args, + eval_dataset=tokenized_datasets["validation"], # Assuming 'validation' split exists + compute_metrics=compute_metrics, + tokenizer=tokenizer + ) + + evaluation_results = trainer.evaluate() + + print("Model evaluation completed.") + print("Evaluation Results:") + print(f"Accuracy: {evaluation_results['eval_accuracy']}") + print("Classification Report:\n", evaluation_results['eval_classification_report']) + + + except FileNotFoundError: + print(f"Error: Dataset file or model files not found.") + except Exception as e: + print(f"Error during model evaluation: {e}") + + +if __name__ == "__main__": + model_filepath = "models/fine_tuned_model" # Replace with your model path + evaluation_data_filepath = "data/evaluation_dataset.csv" # Replace with your evaluation data path + base_model_name = "bert-base-uncased" # Replace with your base model name + + evaluate_model(model_filepath, evaluation_data_filepath, model_name=base_model_name) \ No newline at end of file diff --git a/scripts/code_templates/preprocessing_template.py.txt b/scripts/code_templates/preprocessing_template.py.txt new file mode 100644 index 0000000000000000000000000000000000000000..9f6fda82d1fb12bff9a3ff41401186aef563d92c --- /dev/null +++ b/scripts/code_templates/preprocessing_template.py.txt @@ -0,0 +1,44 @@ +# Template for data preprocessing script for {{phase_name}} + +import pandas as pd +# Add other necessary imports + +def preprocess_data(raw_data_path, processed_data_path): + """ + Reads raw data, preprocesses it, and saves the processed data. + """ + try: + # Load raw data (replace with your actual data loading) + data = pd.read_csv(raw_data_path) # Example: CSV loading + + print("Data loaded successfully. Starting preprocessing...") + + # --- Data Preprocessing Steps --- + # Example steps (customize based on your data and project) + + # 1. Handle missing values + data = data.fillna(0) # Example: fill NaN with 0 + + # 2. Feature engineering (example: create a new feature) + data['feature_length'] = data['text_column'].str.len() # Example: length of text column + + # 3. Text cleaning (if applicable - example: lowercasing) + if 'text_column' in data.columns: + data['text_column'] = data['text_column'].str.lower() + + # --- End of Preprocessing Steps --- + + # Save processed data + data.to_csv(processed_data_path, index=False) + print(f"Processed data saved to {processed_data_path}") + + except FileNotFoundError: + print(f"Error: Raw data file not found at {raw_data_path}") + except Exception as e: + print(f"Error during data preprocessing: {e}") + +if __name__ == "__main__": + raw_data_filepath = "data/raw_dataset.csv" # Replace with your raw data path + processed_data_filepath = "data/processed_dataset.csv" # Replace with your desired output path + + preprocess_data(raw_data_filepath, processed_data_filepath) \ No newline at end of file diff --git a/scripts/code_templates/training_template.py.txt b/scripts/code_templates/training_template.py.txt new file mode 100644 index 0000000000000000000000000000000000000000..0e96daf6759ecfb4b291e8b0007f8a02463b24de --- /dev/null +++ b/scripts/code_templates/training_template.py.txt @@ -0,0 +1,58 @@ +# Template for model training script for {{phase_name}} + +from transformers import AutoModelForSequenceClassification, AutoTokenizer, TrainingArguments, Trainer +from datasets import load_dataset # Example - datasets library +import torch # Example - PyTorch +# Add other necessary imports + +def train_model(processed_dataset_path, model_name="bert-base-uncased", output_dir="./model_output"): + """ + Trains a model on the processed dataset. + """ + try: + # Load processed dataset (replace with your actual dataset loading) + dataset = load_dataset('csv', data_files=processed_dataset_path) # Example: CSV dataset loading, replace with your dataset format + + print("Dataset loaded. Preparing model and training...") + + tokenizer = AutoTokenizer.from_pretrained(model_name) + model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2) # Example: binary classification + + def tokenize_function(examples): + return tokenizer(examples["text_column"], padding="max_length", truncation=True) # Example: tokenize 'text_column' + + tokenized_datasets = dataset.map(tokenize_function, batched=True) + + training_args = TrainingArguments( + output_dir=output_dir, + num_train_epochs=3, # Example epochs + per_device_train_batch_size=16, # Example batch size + per_device_eval_batch_size=64, # Example batch size + warmup_steps=500, # Example warmup steps + weight_decay=0.01, # Example weight decay + logging_dir='./logs', # Directory for logs + logging_steps=10, + ) + + trainer = Trainer( + model=model, + args=training_args, + train_dataset=tokenized_datasets["train"], # Assuming 'train' split exists + eval_dataset=tokenized_datasets["validation"], # Assuming 'validation' split exists - optional + tokenizer=tokenizer, + ) + + trainer.train() + + print(f"Model training completed. Model saved to {output_dir}") + + except Exception as e: + print(f"Error during model training: {e}") + + +if __name__ == "__main__": + processed_data_filepath = "data/processed_dataset.csv" # Replace with your processed data path + model_output_directory = "models/fine_tuned_model" # Replace with your desired output directory + base_model_name = "bert-base-uncased" # Replace with your base model name + + train_model(processed_data_filepath, model_name=base_model_name, output_dir=model_output_directory) \ No newline at end of file diff --git a/scripts/generate_code.py b/scripts/generate_code.py deleted file mode 100644 index 7cf09592d56afb715b12d83a6ebec862540aa488..0000000000000000000000000000000000000000 --- a/scripts/generate_code.py +++ /dev/null @@ -1,58 +0,0 @@ -# /scripts/generate_code.py - -def generate_training_code(model_name, dataset): - return f""" -import torch -from transformers import {model_name}, Trainer, TrainingArguments -from datasets import load_dataset - -# Load your dataset (you may need to adjust this based on dataset format) -train_dataset = load_dataset("{dataset}", split="train") - -# Load pre-trained model -model = {model_name}.from_pretrained("{model_name}") - -# Specify the training arguments -training_args = TrainingArguments( - output_dir='./results', - num_train_epochs=3, - per_device_train_batch_size=16, - logging_dir='./logs', - save_steps=10_000, - save_total_limit=2, -) - -# Initialize the Trainer -trainer = Trainer( - model=model, - args=training_args, - train_dataset=train_dataset, -) - -# Start the training process -trainer.train() -""" - -def generate_api_code(): - return """ -from fastapi import FastAPI -from transformers import AutoModelForCausalLM, AutoTokenizer -from pydantic import BaseModel - -app = FastAPI() - -# Load tokenizer and model from the Hugging Face model hub -tokenizer = AutoTokenizer.from_pretrained('deepseek-ai/DeepSeek-R1-Distill-Qwen-7B') -model = AutoModelForCausalLM.from_pretrained('deepseek-ai/DeepSeek-R1-Distill-Qwen-7B') - -# Define request body model -class InputText(BaseModel): - input_text: str - -@app.post("/predict") -def predict(input: InputText): - inputs = tokenizer(input.input_text, return_tensors="pt") - outputs = model.generate(inputs["input_ids"], max_length=100) - response = tokenizer.decode(outputs[0], skip_special_tokens=True) - return {"response": response} -""" diff --git a/scripts/guidance.py b/scripts/guidance.py deleted file mode 100644 index 380d1d999040523794452ca1bc6f51ab06e3df26..0000000000000000000000000000000000000000 --- a/scripts/guidance.py +++ /dev/null @@ -1,18 +0,0 @@ -# guidance.py - -class ProjectGuide: - def __init__(self, roadmap, rules): - self.roadmap = roadmap - self.rules = rules - - def get_guidance_for_phase(self, phase_name): - """Returns guidance for a specific phase of the project.""" - for phase in self.roadmap: - if phase_name.lower() in phase['phase'].lower(): - milestones = "\n".join(phase['milestones']) - return f"Phase: {phase['phase']}\nMilestones:\n{milestones}" - return "Phase not found." - - def get_rules_for_phase(self, phase_name): - """Returns project rules related to a specific phase.""" - return self.rules.get(phase_name, "No specific rules for this phase.") diff --git a/scripts/parse.py b/scripts/parse.py deleted file mode 100644 index f9d6f7a1f284a746e2a9335b330b235f4f5cce20..0000000000000000000000000000000000000000 --- a/scripts/parse.py +++ /dev/null @@ -1,31 +0,0 @@ -# parse.py - -def read_file(file_path): - """Reads the contents of a file and returns it as a string.""" - with open(file_path, 'r') as file: - return file.read() - -def parse_roadmap(roadmap_text): - """Parses the roadmap text and converts it into a structured format.""" - phases = [] - current_phase = None - for line in roadmap_text.split('\n'): - line = line.strip() - if line.startswith("Phase:"): - if current_phase: - phases.append(current_phase) - current_phase = {'phase': line.split(":")[1].strip(), 'milestones': []} - elif line.startswith("Milestone:") and current_phase: - current_phase['milestones'].append(line.split(":")[1].strip()) - if current_phase: - phases.append(current_phase) - return phases - -def parse_rules(rules_text): - """Parses the rules file into a dictionary of rules.""" - rules = {} - for line in rules_text.split('\n'): - if line.strip(): - rule_name, rule_description = line.split(":") - rules[rule_name.strip()] = rule_description.strip() - return rules diff --git a/scripts/parsing_utils.py b/scripts/parsing_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..16258b5dc1b1978cd4877f24f5fc434f550958b8 --- /dev/null +++ b/scripts/parsing_utils.py @@ -0,0 +1,28 @@ +import yaml + +def load_yaml_file(filepath): + """Loads and parses a YAML file.""" + try: + with open(filepath, 'r') as f: + data = yaml.safe_load(f) + return data + except FileNotFoundError: + print(f"Error: File not found at {filepath}") + return None + except yaml.YAMLError as e: + print(f"Error parsing YAML file {filepath}: {e}") + return None + +def get_roadmap_phases(roadmap_data): + """Extracts phases from roadmap data.""" + if roadmap_data and 'roadmap' in roadmap_data: + return roadmap_data['roadmap'] + return None + +def get_project_rules(rules_data): + """Extracts project rules data.""" + if rules_data and 'project_rules' in rules_data: + return rules_data['project_rules'] + return None + +# You can add more parsing utility functions as needed \ No newline at end of file diff --git a/scripts/response_generator.py b/scripts/response_generator.py deleted file mode 100644 index c832debce15a5b6e11d48ae1809a317375ba1b3c..0000000000000000000000000000000000000000 --- a/scripts/response_generator.py +++ /dev/null @@ -1,24 +0,0 @@ -# response_generator.py -from transformers import AutoTokenizer, AutoModelForCausalLM -import torch - -class DynamicResponder: - def __init__(self, model_name="deepseek-ai/DeepSeek-R1-Distill-Qwen-7B"): - self.model_name = model_name - self.tokenizer = AutoTokenizer.from_pretrained(model_name) - self.model = AutoModelForCausalLM.from_pretrained(model_name) - - def generate_response(self, prompt, max_tokens=1024): - """Generates a response based on the current phase and user query.""" - inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True, padding=True) - with torch.no_grad(): - outputs = self.model.generate( - inputs["input_ids"], - max_length=max_tokens, - num_return_sequences=1, - no_repeat_ngram_size=2, - do_sample=True, - top_p=0.95, - temperature=0.7 - ) - return self.tokenizer.decode(outputs[0], skip_special_tokens=True)