| | |
| | import sys |
| | import os |
| | from pathlib import Path |
| |
|
| | IS_COLAB = 'google.colab' in sys.modules |
| |
|
| | |
| | current_dir = Path(__file__).parent.absolute() |
| |
|
| | |
| | agent_path = None |
| | search_dir = current_dir |
| | while search_dir != search_dir.parent: |
| | possible_path = search_dir / 'agent.py' |
| | if possible_path.exists(): |
| | agent_path = str(search_dir) |
| | break |
| | search_dir = search_dir.parent |
| |
|
| | if agent_path: |
| | sys.path.insert(0, agent_path) |
| | print(f"Added {agent_path} to Python path") |
| | else: |
| | print("Could not find agent.py") |
| |
|
| | |
| | try: |
| | from .agent import AutonomousWebAgent |
| | print("Successfully imported AutonomousWebAgent") |
| | except ImportError as e: |
| | print(f"Error importing AutonomousWebAgent: {e}") |
| | sys.exit(1) |
| |
|
| | |
| | from twisted.internet import reactor, defer, task |
| | import random |
| | import logging |
| | import time |
| | import codecs |
| | |
| | if IS_COLAB: |
| | logging.basicConfig(level=logging.INFO, |
| | format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') |
| | else: |
| | logging.basicConfig(level=logging.INFO, |
| | format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', |
| | handlers=[ |
| | logging.FileHandler("agent_training.log", encoding='utf-8'), |
| | logging.StreamHandler(codecs.getwriter('utf-8')(sys.stdout.buffer)) |
| | ]) |
| |
|
| | logger = logging.getLogger(__name__) |
| |
|
| | |
| | QUERIES = [ |
| | "machine learning", "climate change", "renewable energy", "artificial intelligence", |
| | "quantum computing", "blockchain technology", "gene editing", "virtual reality", |
| | "space exploration", "cybersecurity", "autonomous vehicles", "Internet of Things", |
| | "3D printing", "nanotechnology", "bioinformatics", "augmented reality", "robotics", |
| | "data science", "neural networks", "cloud computing", "edge computing", "5G technology", |
| | "cryptocurrency", "natural language processing", "computer vision" |
| | ] |
| |
|
| | @defer.inlineCallbacks |
| | def train_agent(): |
| | |
| | state_size = 7 |
| | action_size = 3 |
| | num_options = 3 |
| |
|
| | |
| | agent = AutonomousWebAgent( |
| | state_size=state_size, |
| | action_size=action_size, |
| | num_options=num_options, |
| | hidden_size=64, |
| | learning_rate=0.001, |
| | gamma=0.99, |
| | epsilon=1.0, |
| | epsilon_decay=0.995, |
| | epsilon_min=0.01, |
| | knowledge_base_path='knowledge_base.json' |
| | ) |
| | logger.info(f"Initialized AutonomousWebAgent with state_size={state_size}, action_size={action_size}, num_options={num_options}") |
| |
|
| | num_episodes = 10 |
| | total_training_reward = 0 |
| | start_time = time.time() |
| |
|
| | for episode in range(num_episodes): |
| | query = random.choice(QUERIES) |
| | logger.info(f"Starting episode {episode + 1}/{num_episodes} with query: {query}") |
| | episode_start_time = time.time() |
| | |
| | try: |
| | |
| | search_deferred = agent.search(query) |
| | search_deferred.addTimeout(300, reactor) |
| | total_reward = yield search_deferred |
| | total_training_reward += total_reward |
| | episode_duration = time.time() - episode_start_time |
| | logger.info(f"Episode {episode + 1}/{num_episodes}, Query: {query}, Total Reward: {total_reward}, Duration: {episode_duration:.2f} seconds") |
| | except defer.TimeoutError: |
| | logger.error(f"Episode {episode + 1} timed out") |
| | total_reward = -1 |
| | total_training_reward += total_reward |
| | except Exception as e: |
| | logger.error(f"Error in episode {episode + 1}: {str(e)}", exc_info=True) |
| | total_reward = -1 |
| | total_training_reward += total_reward |
| |
|
| | |
| | if (episode + 1) % 10 == 0: |
| | logger.info(f"Updating target models at episode {episode + 1}") |
| | agent.update_worker_target_model() |
| | agent.update_manager_target_model() |
| | agent.manager.update_target_model() |
| |
|
| | |
| | progress = (episode + 1) / num_episodes |
| | elapsed_time = time.time() - start_time |
| | estimated_total_time = elapsed_time / progress if progress > 0 else 0 |
| | remaining_time = estimated_total_time - elapsed_time |
| | logger.info(f"Overall progress: {progress:.2%}, Elapsed time: {elapsed_time:.2f}s, Estimated remaining time: {remaining_time:.2f}s") |
| |
|
| | total_training_time = time.time() - start_time |
| | average_reward = total_training_reward / num_episodes |
| | logger.info(f"Training completed. Total reward: {total_training_reward}, Average reward per episode: {average_reward:.2f}") |
| | logger.info(f"Total training time: {total_training_time:.2f} seconds") |
| | logger.info("Saving models.") |
| |
|
| | |
| | agent.save_worker_model("worker_model.pth") |
| | agent.save_manager_model("manager_model.pth") |
| | agent.save("web_agent_model.pth") |
| | |
| | if reactor.running: |
| | logger.info("Stopping reactor") |
| | reactor.stop() |
| |
|
| | def main(is_colab=False): |
| | global IS_COLAB |
| | IS_COLAB = is_colab |
| | print(f"Current working directory: {os.getcwd()}") |
| | print(f"Python path: {sys.path}") |
| | print(f"Contents of current directory:") |
| | for item in os.listdir(): |
| | print(f" {item}") |
| | logger.info("Starting agent training") |
| | d = task.deferLater(reactor, 0, train_agent) |
| | d.addErrback(lambda failure: logger.error(f"An error occurred: {failure}", exc_info=True)) |
| | d.addBoth(lambda _: reactor.stop()) |
| | reactor.run() |
| |
|
| | if __name__ == "__main__": |
| | main(IS_COLAB) |
| |
|