Spaces:
Sleeping
Sleeping
| import os | |
| from pathlib import Path | |
| import torch | |
| from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer | |
| from peft import LoraConfig, get_peft_model | |
| import pandas as pd | |
| from datasets import Dataset | |
| import json | |
| import psutil | |
| import time | |
| from datetime import datetime | |
| import onnx | |
| import onnxruntime | |
| from functools import lru_cache | |
| import logging | |
| from typing import Dict, List, Optional | |
| class BankingModel: | |
| def __init__(self): | |
| # تنظیم لاگر | |
| self._setup_logging() | |
| # ساخت پوشهها | |
| self.base_dir = Path.cwd() | |
| self.dirs = { | |
| 'model': self.base_dir / "trained_model", | |
| 'data': self.base_dir / "data", | |
| 'logs': self.base_dir / "logs", | |
| 'backup': self.base_dir / "backups", | |
| 'cache': self.base_dir / "cache", | |
| 'reports': self.base_dir / "reports" | |
| } | |
| for dir_path in self.dirs.values(): | |
| dir_path.mkdir(exist_ok=True) | |
| # تنظیمات مدل برای CPU | |
| self.model_name = "meta-llama/Llama-2-13b-chat-hf" | |
| self.tokenizer = AutoTokenizer.from_pretrained(self.model_name) | |
| # بهینهسازی برای CPU | |
| self.model = AutoModelForCausalLM.from_pretrained( | |
| self.model_name, | |
| device_map='cpu', | |
| torch_dtype=torch.float32, | |
| low_cpu_mem_usage=True | |
| ) | |
| # تنظیمات LoRA | |
| self._setup_lora() | |
| # مقداردهی کش | |
| self.response_cache = {} | |
| # شروع مانیتورینگ | |
| self.start_monitoring() | |
| def _setup_logging(self): | |
| """راهاندازی سیستم لاگینگ""" | |
| logging.basicConfig( | |
| filename=f'logs/model_{datetime.now().strftime("%Y%m%d")}.log', | |
| level=logging.INFO, | |
| format='%(asctime)s - %(levelname)s - %(message)s' | |
| ) | |
| self.logger = logging.getLogger(__name__) | |
| def _setup_lora(self): | |
| """تنظیم LoRA برای CPU""" | |
| self.lora_config = LoraConfig( | |
| r=8, # کاهش برای CPU | |
| lora_alpha=16, | |
| target_modules=["q_proj", "v_proj"], | |
| lora_dropout=0.05, | |
| bias="none", | |
| task_type="CAUSAL_LM" | |
| ) | |
| self.model = get_peft_model(self.model, self.lora_config) | |
| def cached_predict(self, text: str) -> str: | |
| """پیشبینی با استفاده از کش""" | |
| return self.predict(text) | |
| def create_backup(self): | |
| """ایجاد نسخه پشتیبان""" | |
| backup_time = datetime.now().strftime("%Y%m%d_%H%M%S") | |
| backup_path = self.dirs['backup'] / f"model_backup_{backup_time}" | |
| self.save_model(backup_path) | |
| self.logger.info(f"Backup created at {backup_path}") | |
| def monitor_resources(self) -> Dict: | |
| """مانیتورینگ منابع سیستم""" | |
| cpu_percent = psutil.cpu_percent(interval=1) | |
| memory = psutil.virtual_memory() | |
| return { | |
| 'cpu_usage': cpu_percent, | |
| 'memory_used': memory.percent, | |
| 'memory_available': memory.available / (1024 * 1024 * 1024) # GB | |
| } | |
| def start_monitoring(self): | |
| """شروع مانیتورینگ مداوم""" | |
| self.monitoring_data = [] | |
| self.monitoring_start_time = time.time() | |
| def log_performance(self, input_text: str, response: str, response_time: float): | |
| """ثبت عملکرد مدل""" | |
| performance_data = { | |
| 'timestamp': datetime.now().isoformat(), | |
| 'input_length': len(input_text), | |
| 'response_length': len(response), | |
| 'response_time': response_time, | |
| 'resources': self.monitor_resources() | |
| } | |
| with open(self.dirs['reports'] / 'performance.jsonl', 'a') as f: | |
| f.write(json.dumps(performance_data) + '\n') | |
| def export_to_onnx(self): | |
| """تبدیل مدل به ONNX برای اجرای سریعتر""" | |
| dummy_input = self.tokenizer("test input", return_tensors="pt") | |
| onnx_path = self.dirs['model'] / "model.onnx" | |
| torch.onnx.export( | |
| self.model, | |
| (dummy_input['input_ids'],), | |
| onnx_path, | |
| opset_version=12, | |
| input_names=['input_ids'], | |
| output_names=['output'] | |
| ) | |
| self.logger.info(f"Model exported to ONNX at {onnx_path}") | |
| def generate_report(self) -> Dict: | |
| """تولید گزارش عملکرد""" | |
| with open(self.dirs['reports'] / 'performance.jsonl', 'r') as f: | |
| data = [json.loads(line) for line in f] | |
| return { | |
| 'total_requests': len(data), | |
| 'avg_response_time': sum(d['response_time'] for d in | |