Spaces:
Sleeping
Sleeping
| from typing import Dict, List, Any | |
| import time | |
| from dotenv import load_dotenv | |
| from langchain_openai import ChatOpenAI | |
| from langgraph_supervisor import create_supervisor | |
| from langgraph.prebuilt import create_react_agent | |
| load_dotenv() | |
| model = ChatOpenAI(model="gpt-4o") | |
| def extract_events_from_rdb( | |
| table_name: str, | |
| start_date: str, | |
| end_date: str, | |
| event_types: List[str] = None | |
| ) -> Dict[str, Any]: | |
| """ | |
| RDB ν μ΄λΈμμ μ΄λ²€νΈ λ μ½λλ₯Ό μΆμΆνκ³ ν μ€νΈ νμμΌλ‘ λ³νν©λλ€. | |
| Args: | |
| table_name: RDB ν μ΄λΈ μ΄λ¦ | |
| start_date: μμ λ μ§ (YYYY-MM-DD νμ) | |
| end_date: μ’ λ£ λ μ§ (YYYY-MM-DD νμ) | |
| event_types: νν°λ§ν μ΄λ²€νΈ νμ λͺ©λ‘ (μ νμ¬ν) | |
| Returns: | |
| μΆμΆλ λ°μ΄ν° ν΅κ³ λ° νμΌ κ²½λ‘λ₯Ό ν¬ν¨ν λμ λ리 | |
| """ | |
| time.sleep(0.5) | |
| return { | |
| "status": "success", | |
| "records_extracted": 125847, | |
| "output_file": f"/data/events/{table_name}_{start_date}_{end_date}.txt", | |
| "total_size_mb": 482.3, | |
| "event_type_distribution": { | |
| "user_action": 45230, | |
| "system_event": 32145, | |
| "error_log": 18472, | |
| "transaction": 30000 | |
| }, | |
| "processing_time_seconds": 12.5 | |
| } | |
| def prepare_pretraining_data( | |
| input_file: str, | |
| tokenizer: str = "gpt2", | |
| max_length: int = 512, | |
| min_length: int = 50 | |
| ) -> Dict[str, Any]: | |
| """ | |
| ν ν¬λμ΄μ μ΄μ κ³Ό ν¬λ§€ν μ ν΅ν΄ μ¬μ νμ΅μ μν ν μ€νΈ λ°μ΄ν°λ₯Ό μ€λΉν©λλ€. | |
| Args: | |
| input_file: μ λ ₯ ν μ€νΈ νμΌ κ²½λ‘ | |
| tokenizer: μ¬μ©ν ν ν¬λμ΄μ | |
| max_length: μ΅λ μνμ€ κΈΈμ΄ | |
| min_length: μ΅μ μνμ€ κΈΈμ΄ | |
| Returns: | |
| μ€λΉλ λ°μ΄ν° ν΅κ³λ₯Ό ν¬ν¨ν λμ λ리 | |
| """ | |
| time.sleep(0.5) | |
| return { | |
| "status": "success", | |
| "output_file": "/data/pretraining/tokenized_data.bin", | |
| "total_sequences": 89234, | |
| "total_tokens": 45623890, | |
| "avg_sequence_length": 511.2, | |
| "vocab_size": 50257, | |
| "processing_time_seconds": 34.2 | |
| } | |
| def pretrain_model( | |
| data_file: str, | |
| model_architecture: str = "gpt2-small", | |
| num_epochs: int = 3, | |
| batch_size: int = 32, | |
| learning_rate: float = 5e-5 | |
| ) -> Dict[str, Any]: | |
| """ | |
| μ€λΉλ λ°μ΄ν°λ‘ μΈμ΄λͺ¨λΈμ μ¬μ νμ΅μν΅λλ€. | |
| Args: | |
| data_file: ν ν¬λμ΄μ¦λ λ°μ΄ν° νμΌ κ²½λ‘ | |
| model_architecture: μ¬μ©ν λͺ¨λΈ μν€ν μ² | |
| num_epochs: νμ΅ μν¬ν¬ μ | |
| batch_size: νμ΅ λ°°μΉ ν¬κΈ° | |
| learning_rate: νμ΅λ₯ | |
| Returns: | |
| νμ΅ μ§ν λ° λͺ¨λΈ κ²½λ‘λ₯Ό ν¬ν¨ν λμ λ리 | |
| """ | |
| time.sleep(0.5) | |
| return { | |
| "status": "success", | |
| "model_path": "/models/pretrained/model_checkpoint_epoch3", | |
| "final_loss": 2.341, | |
| "perplexity": 10.39, | |
| "training_time_hours": 4.5, | |
| "total_steps": 8340, | |
| "best_checkpoint": "checkpoint-7800", | |
| "gpu_hours": 36.0, | |
| "metrics": { | |
| "epoch_1_loss": 3.245, | |
| "epoch_2_loss": 2.789, | |
| "epoch_3_loss": 2.341 | |
| } | |
| } | |
| def create_finetuning_data( | |
| source_data: str, | |
| task_type: str = "classification", | |
| num_classes: int = 5, | |
| train_ratio: float = 0.8, | |
| augmentation: bool = True | |
| ) -> Dict[str, Any]: | |
| """ | |
| λΆλ₯ μμ μ μν νμΈνλ λ°μ΄ν°μ μ μμ±ν©λλ€. | |
| Args: | |
| source_data: μμ€ λ°μ΄ν° κ²½λ‘ | |
| task_type: μμ μ ν (classification, regression λ±) | |
| num_classes: λΆλ₯ ν΄λμ€ μ | |
| train_ratio: νμ΅ λ°μ΄ν° λΉμ¨ | |
| augmentation: λ°μ΄ν° μ¦κ° μ μ© μ¬λΆ | |
| Returns: | |
| λ°μ΄ν°μ ν΅κ³ λ° νμΌ κ²½λ‘λ₯Ό ν¬ν¨ν λμ λ리 | |
| """ | |
| time.sleep(0.5) | |
| return { | |
| "status": "success", | |
| "train_file": "/data/finetuning/train.jsonl", | |
| "val_file": "/data/finetuning/val.jsonl", | |
| "test_file": "/data/finetuning/test.jsonl", | |
| "train_samples": 12456, | |
| "val_samples": 3114, | |
| "test_samples": 3114, | |
| "class_distribution": { | |
| "class_0": 2489, | |
| "class_1": 3201, | |
| "class_2": 2845, | |
| "class_3": 2134, | |
| "class_4": 1787 | |
| }, | |
| "augmentation_applied": True, | |
| "processing_time_seconds": 8.3 | |
| } | |
| def train_classification_model( | |
| pretrained_model: str, | |
| train_data: str, | |
| val_data: str, | |
| num_classes: int = 5, | |
| num_epochs: int = 10, | |
| batch_size: int = 16, | |
| learning_rate: float = 2e-5 | |
| ) -> Dict[str, Any]: | |
| """ | |
| νμΈνλ λ°μ΄ν°λ₯Ό μ¬μ©νμ¬ λΆλ₯ λͺ¨λΈμ νμ΅μν΅λλ€. | |
| Args: | |
| pretrained_model: μ¬μ νμ΅λ λͺ¨λΈ κ²½λ‘ | |
| train_data: νμ΅ λ°μ΄ν° κ²½λ‘ | |
| val_data: κ²μ¦ λ°μ΄ν° κ²½λ‘ | |
| num_classes: ν΄λμ€ μ | |
| num_epochs: νμ΅ μν¬ν¬ μ | |
| batch_size: λ°°μΉ ν¬κΈ° | |
| learning_rate: νμ΅λ₯ | |
| Returns: | |
| νμ΅ κ²°κ³Ό λ° λͺ¨λΈ κ²½λ‘λ₯Ό ν¬ν¨ν λμ λ리 | |
| """ | |
| time.sleep(0.5) | |
| return { | |
| "status": "success", | |
| "model_path": "/models/finetuned/classification_model", | |
| "best_checkpoint": "checkpoint-epoch8", | |
| "final_train_loss": 0.234, | |
| "final_val_loss": 0.312, | |
| "best_val_accuracy": 0.923, | |
| "training_time_hours": 1.2, | |
| "total_steps": 7785, | |
| "early_stopping_epoch": 8, | |
| "metrics_per_epoch": { | |
| "epoch_1": {"train_loss": 0.892, "val_loss": 0.845, "val_acc": 0.712}, | |
| "epoch_5": {"train_loss": 0.345, "val_loss": 0.389, "val_acc": 0.887}, | |
| "epoch_8": {"train_loss": 0.234, "val_loss": 0.312, "val_acc": 0.923} | |
| } | |
| } | |
| def evaluate_model( | |
| model_path: str, | |
| test_data: str, | |
| metrics: List[str] = None | |
| ) -> Dict[str, Any]: | |
| """ | |
| ν μ€νΈ λ°μ΄ν°λ‘ νμ΅λ λͺ¨λΈμ μ’ ν©μ μΈ μ§νλ‘ νκ°ν©λλ€. | |
| Args: | |
| model_path: νμ΅λ λͺ¨λΈ κ²½λ‘ | |
| test_data: ν μ€νΈ λ°μ΄ν° κ²½λ‘ | |
| metrics: κ³μ°ν μ§ν λͺ©λ‘ | |
| Returns: | |
| νκ° μ§νλ₯Ό ν¬ν¨ν λμ λ리 | |
| """ | |
| time.sleep(0.5) | |
| if metrics is None: | |
| metrics = ["precision", "recall", "f1", "accuracy"] | |
| return { | |
| "status": "success", | |
| "test_samples": 3114, | |
| "overall_accuracy": 0.918, | |
| "macro_precision": 0.912, | |
| "macro_recall": 0.908, | |
| "macro_f1": 0.910, | |
| "weighted_precision": 0.916, | |
| "weighted_recall": 0.918, | |
| "weighted_f1": 0.917, | |
| "per_class_metrics": { | |
| "class_0": {"precision": 0.935, "recall": 0.921, "f1": 0.928, "support": 623}, | |
| "class_1": {"precision": 0.948, "recall": 0.952, "f1": 0.950, "support": 640}, | |
| "class_2": {"precision": 0.899, "recall": 0.887, "f1": 0.893, "support": 569}, | |
| "class_3": {"precision": 0.887, "recall": 0.901, "f1": 0.894, "support": 427}, | |
| "class_4": {"precision": 0.891, "recall": 0.879, "f1": 0.885, "support": 357} | |
| }, | |
| "confusion_matrix": [ | |
| [574, 12, 18, 10, 9], | |
| [8, 609, 11, 7, 5], | |
| [15, 9, 505, 28, 12], | |
| [11, 8, 22, 385, 1], | |
| [14, 6, 18, 5, 314] | |
| ], | |
| "inference_time_ms": 1247.5 | |
| } | |
| data_extraction_agent = create_react_agent( | |
| model=model, | |
| tools=[extract_events_from_rdb], | |
| name="data_extraction_expert", | |
| prompt=( | |
| "λΉμ μ SQLκ³Ό RDB μμ μ νΉνλ λ°μ΄ν° μΆμΆ μ λ¬Έκ°μ λλ€. " | |
| "λ°μ΄ν°λ² μ΄μ€ ν μ΄λΈμμ μ΄λ²€νΈ λ μ½λλ₯Ό μΆμΆνκ³ ν μ€νΈ νμμΌλ‘ λ³ννλ μν μ ν©λλ€. " | |
| "ν μ΄λΈ μ΄λ¦, λ μ§ λ²μ, μ΄λ²€νΈ νμ μ λν λͺ νν μ 보λ₯Ό μ 곡ν΄μΌ ν©λλ€. " | |
| "λ μ½λ μμ νμΌ ν¬κΈ°λ₯Ό ν¬ν¨ν μΆμΆ ν΅κ³λ₯Ό λ³΄κ³ νμΈμ." | |
| ) | |
| ) | |
| pretraining_agent = create_react_agent( | |
| model=model, | |
| tools=[prepare_pretraining_data, pretrain_model], | |
| name="pretraining_expert", | |
| prompt=( | |
| "λΉμ μ μΈμ΄λͺ¨λΈ μ¬μ νμ΅ μ λ¬Έκ°μ λλ€. " | |
| "ν ν°νλ λ°μ΄ν°λ₯Ό μ€λΉνκ³ λͺ¨λΈμ μ²μλΆν° νμ΅μν€λ μ± μμ λ§‘κ³ μμ΅λλ€. " | |
| "Lossμ Perplexity κ°μ νμ΅ μ§νλ₯Ό λͺ¨λν°λ§νμΈμ. " | |
| "λ°μ΄ν° μ€λΉ λ° λͺ¨λΈ νμ΅ μ§ν μν©μ λν μμΈν ν΅κ³λ₯Ό λ³΄κ³ νμΈμ. " | |
| "ν λ²μ νλμ λκ΅¬λ§ μ¬μ©νμΈμ." | |
| ) | |
| ) | |
| finetuning_agent = create_react_agent( | |
| model=model, | |
| tools=[create_finetuning_data, train_classification_model], | |
| name="finetuning_expert", | |
| prompt=( | |
| "λΉμ μ λΆλ₯ μμ μ νΉνλ νμΈνλ μ λ¬Έκ°μ λλ€. " | |
| "κ³ νμ§μ νμΈνλ λ°μ΄ν°μ μ λ§λ€κ³ λΆλ₯ λͺ¨λΈμ νμ΅μν€λ μν μ ν©λλ€. " | |
| "μ μ ν λ°μ΄ν° λΆν κ³Ό ν΄λμ€ λΆν¬λ₯Ό 보μ₯νμΈμ. " | |
| "νμΈνλ κ³Όμ μ λ°μ κ±Έμ³ νμ΅ λ° κ²μ¦ μ§νλ₯Ό λͺ¨λν°λ§νμΈμ. " | |
| "ν λ²μ νλμ λκ΅¬λ§ μ¬μ©νμΈμ." | |
| ) | |
| ) | |
| evaluation_agent = create_react_agent( | |
| model=model, | |
| tools=[evaluate_model], | |
| name="evaluation_expert", | |
| prompt=( | |
| "λΉμ μ λΆλ₯ μ§νμ νΉνλ λͺ¨λΈ νκ° μ λ¬Έκ°μ λλ€. " | |
| "Precision, Recall, F1-score, Accuracyλ₯Ό μ¬μ©νμ¬ νμ΅λ λͺ¨λΈμ μ² μ ν νκ°νλ μν μ ν©λλ€. " | |
| "ν΄λμ€λ³ μΈλΆ μ§νμ μ 체 μ±λ₯ ν΅κ³λ₯Ό μ 곡νμΈμ. " | |
| "Confusion matrixλ₯Ό λΆμνκ³ κ°μ μ΄ νμν μμμ νμ νμΈμ." | |
| ) | |
| ) | |
| workflow = create_supervisor( | |
| [data_extraction_agent, pretraining_agent, finetuning_agent, evaluation_agent], | |
| model=model, | |
| prompt=( | |
| "λΉμ μ ML νμ΄νλΌμΈ κ°λ μμ λλ€. " | |
| "μ¬μ©μμ μμ²μ μ΄ν΄νκ³ λͺ©νλ₯Ό λ¬μ±νκΈ° μν΄ νμν μ λ¬Έκ°λ§ μ ννμΈμ.\n\n" | |
| "μ¬μ© κ°λ₯ν μ λ¬Έκ°:\n" | |
| "- data_extraction_expert: RDBμμ μ΄λ²€νΈ λ°μ΄ν° μΆμΆ\n" | |
| "- pretraining_expert: λ°μ΄ν° μ€λΉ λ° μΈμ΄λͺ¨λΈ μ¬μ νμ΅\n" | |
| "- finetuning_expert: νμΈνλ λ°μ΄ν° μμ± λ° λΆλ₯ λͺ¨λΈ νμ΅\n" | |
| "- evaluation_expert: λͺ¨λΈ νκ° (Precision, Recall, F1 λ±)\n\n" | |
| "μ¬μ©μκ° μμ²ν μμ λ§ μννκ³ , μμ²νμ§ μμ μΆκ° μμ μ μ§ννμ§ λ§μΈμ." | |
| ) | |
| ) | |
| ml_app = workflow.compile() | |