import pandas as pd import numpy as np import requests import time import os import warnings import sys import re import matplotlib.pyplot as plt import matplotlib.dates as mdates import seaborn as sns from datetime import datetime, timedelta from tqdm import tqdm # --- GEREKLİ KÜTÜPHANELER --- from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler from sklearn.metrics import f1_score from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, HistGradientBoostingClassifier, \ AdaBoostClassifier from sklearn.neighbors import KNeighborsClassifier from sklearn.linear_model import SGDClassifier from sklearn.neural_network import MLPClassifier from sklearn.naive_bayes import GaussianNB import xgboost as xgb import lightgbm as lgb import catboost as cb # --- AYARLAR --- pd.set_option('display.max_columns', None) pd.set_option('display.width', 1000) warnings.filterwarnings("ignore") requests.packages.urllib3.disable_warnings() # --- AKADEMİK RENK PALETİ TANIMI --- # Bilimsel yayınlara uygun, ciddi ve net ayrım sağlayan renkler ACADEMIC_COLORS = { 'dusuk': '#2E7D32', # Koyu Yeşil (Orman Yeşili) 'orta': '#F9A825', # Koyu Hardal Sarısı 'riskli': '#EF6C00', # Koyu Turuncu (Kiremit) 'yuksek': '#C62828', # Koyu Bordo 'tehlikeli': '#37474F' # Koyu Antrasit Mavi/Siyah } # DOSYA VE KLASÖR MGM_DATA_FILE = "mgm.csv" FAULT_FILE = "faults.csv" MAIN_FOLDER = "ariza_grafikleri" SUB_FOLDER = "TANZER_PROHIBRIT_RESULTS" OUTPUT_DIR = os.path.join(MAIN_FOLDER, SUB_FOLDER) if not os.path.exists(OUTPUT_DIR): os.makedirs(OUTPUT_DIR) OUTPUT_EXCEL = os.path.join(OUTPUT_DIR, "PROHIBRIT_RISK_RAPORU.xlsx") PLOT_BAR = os.path.join(OUTPUT_DIR, "PROHIBRIT_BAR_CHART.png") PLOT_LINE = os.path.join(OUTPUT_DIR, "PROHIBRIT_LINE_CHART.png") MGM_MAPPING = { "Tarih": "Zaman", "Sicaklik": "Sıcaklık", "Nem": "Nispi Nem", "Yagis": "Toplam Yağış OMGI mm", "Ruzgar_Hizi": "Rüzgar Yönü ve Hızı", "Basinc": "Deniz Seviyesine İndirgenmiş Basınç hPa" } print("========================================================================") print(" TANZER PROHİBRİT MODEL (AKADEMİK SÜRÜM)") print(" (11 Model Ensemble + Gelişmiş Bilimsel Görselleştirme)") print("========================================================================\n") # --------------------------------------------------------- # 1. VERİ İŞLEME # --------------------------------------------------------- def clean_number(val): if pd.isna(val) or val == "": return 0.0 try: s = str(val).replace(',', '.').strip() nums = re.findall(r"[-+]?\d*\.\d+|\d+", s) if nums: return float(nums[0]) return 0.0 except: return 0.0 def convert_excel_date(val): if pd.isna(val) or val == "": return pd.NaT try: s = str(val).replace(',', '.').strip() if re.match(r'^\d+(\.\d+)?$', s): serial = float(s) if 30000 < serial < 60000: return pd.Timestamp('1899-12-30') + pd.to_timedelta(serial, unit='D') except: pass try: return pd.to_datetime(val, dayfirst=True) except: pass try: return pd.to_datetime(val) except: return pd.NaT def clean_coord(val): try: s = str(val).replace(',', '.') s = re.sub(r"[^0-9\.\-]", "", s) f = float(s) if -90 <= f <= 90: return f except: return None def calculate_features(df): if 'Tarih' in df.columns: df = df.set_index('Tarih') if not isinstance(df.index, pd.DatetimeIndex): df.index = pd.to_datetime(df.index, dayfirst=True, errors='coerce') df = df[df.index.notnull()].sort_index() if 'Yagis' in df.columns: df['Yagis_7G'] = df['Yagis'].rolling('7d').sum().fillna(0) else: df['Yagis_7G'] = 0 if 'Basinc' in df.columns: df['Basinc_Trend'] = df['Basinc'].diff(24).fillna(0) df['Basinc_Stabilite'] = df['Basinc'].rolling('3d').std().fillna(0) else: df['Basinc_Trend'] = 0 df['Basinc_Stabilite'] = 0 if 'Sicaklik' in df.columns: df['Sicaklik_Soku'] = df['Sicaklik'].diff(6).abs().fillna(0) df['Donma_Indeksi'] = (df['Sicaklik'] < 0).astype(int) * df['Yagis_7G'] else: df['Sicaklik_Soku'] = 0 df['Donma_Indeksi'] = 0 if 'Ruzgar_Hizi' in df.columns: df['Ruzgar_Enerjisi'] = df['Ruzgar_Hizi'] ** 2 df['Firtina_Gucu'] = df['Ruzgar_Enerjisi'] * (df['Yagis_7G'] + 1).apply(np.log) else: df['Ruzgar_Enerjisi'] = 0 df['Firtina_Gucu'] = 0 return df.dropna() def get_risk_cat(score): if score < 40: return "Düşük Risk" elif 40 <= score < 60: return "Orta Risk" elif 60 <= score < 70: return "RİSKLİ" elif 70 <= score < 80: return "YÜKSEK RİSKLİ" else: return "TEHLİKELİ" # --------------------------------------------------------- # 2. TANZER PROHİBRİT MODEL (MEGA ENSEMBLE) # --------------------------------------------------------- class TanzerProhibitModel: def __init__(self): self.models = {} self.weights = {} self.model_performance = [] self.scaler = StandardScaler() def train(self, X, y): X_scaled = self.scaler.fit_transform(X) X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42, stratify=y) print("\n--- PROHİBRİT MODEL EĞİTİM SÜRECİ ---") models_to_train = { "RandomForest": RandomForestClassifier(n_estimators=150, max_depth=12, class_weight='balanced', n_jobs=-1, random_state=42), "ExtraTrees": ExtraTreesClassifier(n_estimators=150, max_depth=12, class_weight='balanced', n_jobs=-1, random_state=42), "XGBoost": xgb.XGBClassifier(n_estimators=150, max_depth=6, learning_rate=0.1, n_jobs=-1, eval_metric='logloss'), "LightGBM": lgb.LGBMClassifier(n_estimators=150, learning_rate=0.1, class_weight='balanced', verbose=-1, n_jobs=-1), "CatBoost": cb.CatBoostClassifier(iterations=150, depth=6, learning_rate=0.1, auto_class_weights='Balanced', verbose=0, thread_count=-1), "HistGradient": HistGradientBoostingClassifier(learning_rate=0.1, max_iter=150, random_state=42), "AdaBoost": AdaBoostClassifier(n_estimators=100, random_state=42), "KNN": KNeighborsClassifier(n_neighbors=5, weights='distance', algorithm='kd_tree', leaf_size=40, n_jobs=-1), "FastSVM": SGDClassifier(loss='hinge', penalty='l2', alpha=0.0001, max_iter=1000, class_weight='balanced', n_jobs=-1, random_state=42), "NeuralNet": MLPClassifier(hidden_layer_sizes=(100, 50), max_iter=300, activation='relu', solver='adam', early_stopping=True, random_state=42), "NaiveBayes": GaussianNB() } pbar = tqdm(models_to_train.items(), desc="Model Eğitimi", unit="model", ncols=100, bar_format="{l_bar}{bar}| {n_fmt}/{total_fmt}") for name, model in pbar: try: model.fit(X_train, y_train) y_pred = model.predict(X_test) score = f1_score(y_test, y_pred) tqdm.write(f" 🔹 {name:<15}: {score:.4f} (F1 Score)") status = "✅ Aktif" if score > 0.35 else "❌ Elendi" self.model_performance.append({"Model": name, "F1 Score": score, "Durum": status}) if score > 0.35: self.models[name] = model self.weights[name] = score except Exception as e: tqdm.write(f" ❌ {name} Hatası: {e}") self.model_performance.append({"Model": name, "F1 Score": 0.0, "Durum": "HATA"}) total = sum(self.weights.values()) if total > 0: for k in self.weights: self.weights[k] /= total else: rf = RandomForestClassifier() rf.fit(X_train, y_train) self.models['RF'] = rf self.weights['RF'] = 1.0 print("\n" + "=" * 50) print(" 🏆 TANZER PROHİBRİT - PERFORMANS KARNESİ 🏆") print("=" * 50) df_perf = pd.DataFrame(self.model_performance).sort_values("F1 Score", ascending=False) print(df_perf.to_string(index=False, formatters={'F1 Score': '{:.4f}'.format})) print("=" * 50 + "\n") def predict(self, df_features): X = self.scaler.transform(df_features) final_prob = np.zeros(len(X)) for name, model in self.models.items(): try: if hasattr(model, "predict_proba"): prob = model.predict_proba(X)[:, 1] elif hasattr(model, "decision_function"): d = model.decision_function(X) prob = 1 / (1 + np.exp(-d)) else: prob = model.predict(X) final_prob += prob * self.weights[name] except: pass return final_prob * 100 # --------------------------------------------------------- # 3. ANA AKIŞ # --------------------------------------------------------- def main(): print("⏳ Veri Tabanı Yükleniyor...") if not os.path.exists(MGM_DATA_FILE): return try: use_cols = list(MGM_MAPPING.values()) df_mgm = pd.read_csv(MGM_DATA_FILE, sep=None, engine='python', encoding='utf-8-sig', usecols=lambda c: c.strip() in use_cols) df_mgm.columns = df_mgm.columns.str.strip() clean_df = pd.DataFrame() target_date_col = MGM_MAPPING["Tarih"] if target_date_col in df_mgm.columns: clean_df["Tarih"] = pd.to_datetime(df_mgm[target_date_col], dayfirst=True, errors='coerce') for kod, dosya in MGM_MAPPING.items(): if kod == "Tarih": continue if dosya in df_mgm.columns: clean_df[kod] = pd.to_numeric(df_mgm[dosya].astype(str).str.replace(',', '.'), errors='coerce').fillna( 0) else: clean_df[kod] = 0.0 clean_df = clean_df.dropna(subset=['Tarih']).sort_values('Tarih').reset_index(drop=True) except: return if not os.path.exists(FAULT_FILE): return try: df_fault = pd.read_csv(FAULT_FILE, sep=None, engine='python') target_col = [c for c in df_fault.columns if 'tarih' in c.lower() or 'date' in c.lower()][0] df_fault['Tarih'] = df_fault[target_col].apply(convert_excel_date) if 'Enlem' in df_fault.columns: df_fault['Enlem'] = df_fault['Enlem'].apply(clean_coord) df_fault['Boylam'] = df_fault['Boylam'].apply(clean_coord) df_fault = df_fault.dropna(subset=['Tarih']) except: return print("⏳ Veri Seti İşleniyor (Pencere: -7 / +5 Saat)...") full_data = calculate_features(clean_df) full_data['Ariza_Durumu'] = 0 for f_date in df_fault['Tarih']: try: start_risk = f_date - timedelta(days=7) end_risk = f_date + timedelta(hours=5) if start_risk < full_data.index.max() and end_risk > full_data.index.min(): full_data.loc[start_risk:end_risk, 'Ariza_Durumu'] = 1 except: continue pos = full_data[full_data['Ariza_Durumu'] == 1] neg_pool = full_data[full_data['Ariza_Durumu'] == 0] n_neg = min(len(pos) * 5, len(neg_pool)) if n_neg > 0: neg = neg_pool.sample(n=n_neg, random_state=42) train_set = pd.concat([pos, neg]) else: return features = ['Sicaklik', 'Nem', 'Yagis', 'Ruzgar_Hizi', 'Basinc', 'Yagis_7G', 'Basinc_Trend', 'Basinc_Stabilite', 'Sicaklik_Soku', 'Donma_Indeksi', 'Ruzgar_Enerjisi', 'Firtina_Gucu'] # EĞİTİM print("🚀 TANZER PROHİBRİT MODEL EĞİTİLİYOR...") ensemble = TanzerProhibitModel() ensemble.train(train_set[features], train_set['Ariza_Durumu']) print("✅ Eğitim Tamamlandı.") # TAHMİN print("\n⏳ 14 Hat İçin Analiz Başlıyor (Lütfen Bekleyiniz)...") possible_names = ["Hat_Adı_2", "Hat Adı", "Hat_Adi", "HAT_ADI", "HAT ADI"] hat_col = next((c for c in df_fault.columns if c in possible_names), df_fault.columns[0]) unique_lines = df_fault[[hat_col, 'Enlem', 'Boylam']].drop_duplicates(subset=[hat_col]).dropna().head(14) results = [] line_data = [] session = requests.Session() pbar = tqdm(unique_lines.iterrows(), total=len(unique_lines), unit="hat", bar_format="{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}]") for _, row in pbar: hat_adi = row[hat_col] lat, lon = row['Enlem'], row['Boylam'] pbar.set_description(f"Analiz: {str(hat_adi)[:20]}") try: url = "https://api.open-meteo.com/v1/forecast" params = { "latitude": lat, "longitude": lon, "hourly": "temperature_2m,relative_humidity_2m,rain,wind_speed_10m,surface_pressure", "past_days": 7, "forecast_days": 3, "timezone": "auto" } r = session.get(url, params=params, timeout=10, verify=False) if r.status_code == 200: data = r.json() df_api = pd.DataFrame(data['hourly']) df_api['time'] = pd.to_datetime(df_api['time']) df_api = df_api.rename( columns={'time': 'Tarih', 'temperature_2m': 'Sicaklik', 'relative_humidity_2m': 'Nem', 'rain': 'Yagis', 'wind_speed_10m': 'Ruzgar_Hizi', 'surface_pressure': 'Basinc'}) df_proc = calculate_features(df_api) df_proc.index = df_proc.index.tz_localize(None) now = datetime.now() future_df = df_proc[df_proc.index >= now].copy() if not future_df.empty: for c in features: if c not in future_df.columns: future_df[c] = 0 risk = ensemble.predict(future_df[features]) future_df['Risk'] = risk max_idx = future_df['Risk'].idxmax() max_risk = future_df.loc[max_idx, 'Risk'] cat = get_risk_cat(max_risk) tqdm.write(f"✅ {str(hat_adi)[:30]:<30} : %{max_risk:.1f} ({cat})") results.append({"Hat": hat_adi, "Risk (%)": max_risk, "Kategori": cat, "Zaman": max_idx}) p_data = future_df[['Risk']].reset_index() p_data['Hat'] = hat_adi line_data.append(p_data) else: tqdm.write(f"❌ {str(hat_adi)[:30]} : API Hatası") except: tqdm.write(f"❌ {str(hat_adi)[:30]} : Bağlantı Hatası") # --- AKADEMİK RAPORLAMA VE GÖRSELLEŞTİRME --- if results: df_res = pd.DataFrame(results).sort_values("Risk (%)", ascending=False) df_res.to_excel(OUTPUT_EXCEL, index=False) # Seaborn Akademik Tema Ayarı sns.set_theme(style="whitegrid", font_scale=1.1, rc={"grid.linewidth": 0.6, "axes.linewidth": 1}) # --- GRAFİK 1: ÇUBUK (BAR) GRAFİĞİ --- plt.figure(figsize=(14, 10)) # Renkleri skora göre akademik paletten seç colors = [ACADEMIC_COLORS['dusuk'] if x < 40 else ACADEMIC_COLORS['orta'] if x < 60 else ACADEMIC_COLORS['riskli'] if x < 70 else ACADEMIC_COLORS['yuksek'] if x < 80 else ACADEMIC_COLORS['tehlikeli'] for x in df_res['Risk (%)']] ax = sns.barplot(x='Risk (%)', y='Hat', data=df_res, palette=colors, edgecolor='.2', linewidth=0.8) # Kritik Eşik Çizgisi (Daha belirgin) plt.axvline(75, color=ACADEMIC_COLORS['yuksek'], linestyle='--', linewidth=2.5, label='Kritik Risk Eşiği (%75)') # Değerleri çubukların ucuna yaz for i, v in enumerate(df_res['Risk (%)']): ax.text(v + 0.5, i, f"%{v:.1f}", fontweight='bold', va='center', fontsize=12, color='black') plt.title('Enerji İletim Hatlarında Maksimum Risk Analizi\n(TANZER PROHİBRİT MODEL SONUÇLARI)', fontweight='bold', fontsize=16, pad=20) plt.xlabel('Hesaplanan Risk Skoru (%)', fontweight='bold', fontsize=12) plt.ylabel('Hat Adı', fontweight='bold', fontsize=12) plt.legend(loc='lower right', frameon=True) plt.tight_layout() plt.savefig(PLOT_BAR, dpi=300) # Yüksek çözünürlük # --- GRAFİK 2: ÇİZGİ (LINE) GRAFİĞİ --- if line_data: all_lines = pd.concat(line_data) plt.figure(figsize=(16, 10)) # Ana Çizgiler (Daha kalın ve profesyonel palet) sns.lineplot(data=all_lines, x='Tarih', y='Risk', hue='Hat', palette='tab10', linewidth=3, alpha=0.9) # Maksimum Noktaları İşaretle for hat_name in df_res['Hat']: hat_df = all_lines[all_lines['Hat'] == hat_name] if not hat_df.empty: max_row = hat_df.loc[hat_df['Risk'].idxmax()] # Kırmızı nokta ve belirgin beyaz çerçeve plt.scatter(max_row['Tarih'], max_row['Risk'], color=ACADEMIC_COLORS['yuksek'], s=120, zorder=5, edgecolor='white', linewidth=2) # Etiket kutusu plt.annotate(f"%{max_row['Risk']:.0f}", (max_row['Tarih'], max_row['Risk']), textcoords="offset points", xytext=(0, 12), ha='center', bbox=dict(boxstyle="round,pad=0.4", fc="white", ec=ACADEMIC_COLORS['yuksek'], lw=1.5), fontsize=10, fontweight='bold') # --- ARKA PLAN RİSK BÖLGELERİ (ÇOK DAHA BELİRGİN) --- # Alpha değerleri 0.08'den 0.20-0.25 seviyesine çıkarıldı plt.axhspan(0, 40, color=ACADEMIC_COLORS['dusuk'], alpha=0.20, label='Düşük Risk Bölgesi') plt.axhspan(40, 60, color=ACADEMIC_COLORS['orta'], alpha=0.20, label='Orta Risk Bölgesi') plt.axhspan(60, 70, color=ACADEMIC_COLORS['riskli'], alpha=0.25, label='Riskli Bölge') plt.axhspan(70, 80, color=ACADEMIC_COLORS['yuksek'], alpha=0.25, label='Yüksek Risk Bölgesi') plt.axhspan(80, 105, color=ACADEMIC_COLORS['tehlikeli'], alpha=0.30, label='Tehlikeli Bölge') plt.ylim(0, 105) plt.title('72 Saatlik Detaylı Zamansal Risk Değişimi', fontweight='bold', fontsize=18, pad=20) plt.xlabel('Zaman (Gün/Saat)', fontweight='bold', fontsize=12) plt.ylabel('Risk Yüzdesi (%)', fontweight='bold', fontsize=12) # Tarih formatını iyileştir plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%d-%m %H:%M')) plt.xticks(rotation=45) # Lejantı dışarı al ve düzenle plt.legend(bbox_to_anchor=(1.02, 1), loc='upper left', title="Hatlar ve Risk Bölgeleri", frameon=True, shadow=True, title_fontsize='12', fontsize='11') plt.grid(True, linestyle='-', linewidth=0.8, alpha=0.7) # Gridleri belirginleştir plt.tight_layout() plt.savefig(PLOT_LINE, dpi=300) # Yüksek çözünürlük print(f"\n✅ BAŞARILI! Akademik Rapor ve Yüksek Çözünürlüklü Grafikler Oluşturuldu.") print(f" Çıktı Klasörü: {OUTPUT_DIR}") else: print("\n❌ Sonuç üretilemedi.") if __name__ == "__main__": main()