Tanzer-Infinity-AI / tanzerultimate.py
mzekcy's picture
Upload 2 files
56ac446 verified
import pandas as pd
import numpy as np
import requests
import time
import os
import warnings
import sys
import re
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import seaborn as sns
from datetime import datetime, timedelta
from tqdm import tqdm
# --- GEREKLİ KÜTÜPHANELER ---
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import f1_score
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, HistGradientBoostingClassifier, \
AdaBoostClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import SGDClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.naive_bayes import GaussianNB
import xgboost as xgb
import lightgbm as lgb
import catboost as cb
# --- AYARLAR ---
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 1000)
warnings.filterwarnings("ignore")
requests.packages.urllib3.disable_warnings()
# --- AKADEMİK RENK PALETİ TANIMI ---
# Bilimsel yayınlara uygun, ciddi ve net ayrım sağlayan renkler
ACADEMIC_COLORS = {
'dusuk': '#2E7D32', # Koyu Yeşil (Orman Yeşili)
'orta': '#F9A825', # Koyu Hardal Sarısı
'riskli': '#EF6C00', # Koyu Turuncu (Kiremit)
'yuksek': '#C62828', # Koyu Bordo
'tehlikeli': '#37474F' # Koyu Antrasit Mavi/Siyah
}
# DOSYA VE KLASÖR
MGM_DATA_FILE = "mgm.csv"
FAULT_FILE = "faults.csv"
MAIN_FOLDER = "ariza_grafikleri"
SUB_FOLDER = "TANZER_PROHIBRIT_RESULTS"
OUTPUT_DIR = os.path.join(MAIN_FOLDER, SUB_FOLDER)
if not os.path.exists(OUTPUT_DIR): os.makedirs(OUTPUT_DIR)
OUTPUT_EXCEL = os.path.join(OUTPUT_DIR, "PROHIBRIT_RISK_RAPORU.xlsx")
PLOT_BAR = os.path.join(OUTPUT_DIR, "PROHIBRIT_BAR_CHART.png")
PLOT_LINE = os.path.join(OUTPUT_DIR, "PROHIBRIT_LINE_CHART.png")
MGM_MAPPING = {
"Tarih": "Zaman",
"Sicaklik": "Sıcaklık",
"Nem": "Nispi Nem",
"Yagis": "Toplam Yağış OMGI mm",
"Ruzgar_Hizi": "Rüzgar Yönü ve Hızı",
"Basinc": "Deniz Seviyesine İndirgenmiş Basınç hPa"
}
print("========================================================================")
print(" TANZER PROHİBRİT MODEL (AKADEMİK SÜRÜM)")
print(" (11 Model Ensemble + Gelişmiş Bilimsel Görselleştirme)")
print("========================================================================\n")
# ---------------------------------------------------------
# 1. VERİ İŞLEME
# ---------------------------------------------------------
def clean_number(val):
if pd.isna(val) or val == "": return 0.0
try:
s = str(val).replace(',', '.').strip()
nums = re.findall(r"[-+]?\d*\.\d+|\d+", s)
if nums: return float(nums[0])
return 0.0
except:
return 0.0
def convert_excel_date(val):
if pd.isna(val) or val == "": return pd.NaT
try:
s = str(val).replace(',', '.').strip()
if re.match(r'^\d+(\.\d+)?$', s):
serial = float(s)
if 30000 < serial < 60000:
return pd.Timestamp('1899-12-30') + pd.to_timedelta(serial, unit='D')
except:
pass
try:
return pd.to_datetime(val, dayfirst=True)
except:
pass
try:
return pd.to_datetime(val)
except:
return pd.NaT
def clean_coord(val):
try:
s = str(val).replace(',', '.')
s = re.sub(r"[^0-9\.\-]", "", s)
f = float(s)
if -90 <= f <= 90: return f
except:
return None
def calculate_features(df):
if 'Tarih' in df.columns: df = df.set_index('Tarih')
if not isinstance(df.index, pd.DatetimeIndex): df.index = pd.to_datetime(df.index, dayfirst=True, errors='coerce')
df = df[df.index.notnull()].sort_index()
if 'Yagis' in df.columns:
df['Yagis_7G'] = df['Yagis'].rolling('7d').sum().fillna(0)
else:
df['Yagis_7G'] = 0
if 'Basinc' in df.columns:
df['Basinc_Trend'] = df['Basinc'].diff(24).fillna(0)
df['Basinc_Stabilite'] = df['Basinc'].rolling('3d').std().fillna(0)
else:
df['Basinc_Trend'] = 0
df['Basinc_Stabilite'] = 0
if 'Sicaklik' in df.columns:
df['Sicaklik_Soku'] = df['Sicaklik'].diff(6).abs().fillna(0)
df['Donma_Indeksi'] = (df['Sicaklik'] < 0).astype(int) * df['Yagis_7G']
else:
df['Sicaklik_Soku'] = 0
df['Donma_Indeksi'] = 0
if 'Ruzgar_Hizi' in df.columns:
df['Ruzgar_Enerjisi'] = df['Ruzgar_Hizi'] ** 2
df['Firtina_Gucu'] = df['Ruzgar_Enerjisi'] * (df['Yagis_7G'] + 1).apply(np.log)
else:
df['Ruzgar_Enerjisi'] = 0
df['Firtina_Gucu'] = 0
return df.dropna()
def get_risk_cat(score):
if score < 40:
return "Düşük Risk"
elif 40 <= score < 60:
return "Orta Risk"
elif 60 <= score < 70:
return "RİSKLİ"
elif 70 <= score < 80:
return "YÜKSEK RİSKLİ"
else:
return "TEHLİKELİ"
# ---------------------------------------------------------
# 2. TANZER PROHİBRİT MODEL (MEGA ENSEMBLE)
# ---------------------------------------------------------
class TanzerProhibitModel:
def __init__(self):
self.models = {}
self.weights = {}
self.model_performance = []
self.scaler = StandardScaler()
def train(self, X, y):
X_scaled = self.scaler.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42, stratify=y)
print("\n--- PROHİBRİT MODEL EĞİTİM SÜRECİ ---")
models_to_train = {
"RandomForest": RandomForestClassifier(n_estimators=150, max_depth=12, class_weight='balanced', n_jobs=-1,
random_state=42),
"ExtraTrees": ExtraTreesClassifier(n_estimators=150, max_depth=12, class_weight='balanced', n_jobs=-1,
random_state=42),
"XGBoost": xgb.XGBClassifier(n_estimators=150, max_depth=6, learning_rate=0.1, n_jobs=-1,
eval_metric='logloss'),
"LightGBM": lgb.LGBMClassifier(n_estimators=150, learning_rate=0.1, class_weight='balanced', verbose=-1,
n_jobs=-1),
"CatBoost": cb.CatBoostClassifier(iterations=150, depth=6, learning_rate=0.1, auto_class_weights='Balanced',
verbose=0, thread_count=-1),
"HistGradient": HistGradientBoostingClassifier(learning_rate=0.1, max_iter=150, random_state=42),
"AdaBoost": AdaBoostClassifier(n_estimators=100, random_state=42),
"KNN": KNeighborsClassifier(n_neighbors=5, weights='distance', algorithm='kd_tree', leaf_size=40,
n_jobs=-1),
"FastSVM": SGDClassifier(loss='hinge', penalty='l2', alpha=0.0001, max_iter=1000, class_weight='balanced',
n_jobs=-1, random_state=42),
"NeuralNet": MLPClassifier(hidden_layer_sizes=(100, 50), max_iter=300, activation='relu', solver='adam',
early_stopping=True, random_state=42),
"NaiveBayes": GaussianNB()
}
pbar = tqdm(models_to_train.items(), desc="Model Eğitimi", unit="model", ncols=100,
bar_format="{l_bar}{bar}| {n_fmt}/{total_fmt}")
for name, model in pbar:
try:
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
score = f1_score(y_test, y_pred)
tqdm.write(f" 🔹 {name:<15}: {score:.4f} (F1 Score)")
status = "✅ Aktif" if score > 0.35 else "❌ Elendi"
self.model_performance.append({"Model": name, "F1 Score": score, "Durum": status})
if score > 0.35:
self.models[name] = model
self.weights[name] = score
except Exception as e:
tqdm.write(f" ❌ {name} Hatası: {e}")
self.model_performance.append({"Model": name, "F1 Score": 0.0, "Durum": "HATA"})
total = sum(self.weights.values())
if total > 0:
for k in self.weights: self.weights[k] /= total
else:
rf = RandomForestClassifier()
rf.fit(X_train, y_train)
self.models['RF'] = rf
self.weights['RF'] = 1.0
print("\n" + "=" * 50)
print(" 🏆 TANZER PROHİBRİT - PERFORMANS KARNESİ 🏆")
print("=" * 50)
df_perf = pd.DataFrame(self.model_performance).sort_values("F1 Score", ascending=False)
print(df_perf.to_string(index=False, formatters={'F1 Score': '{:.4f}'.format}))
print("=" * 50 + "\n")
def predict(self, df_features):
X = self.scaler.transform(df_features)
final_prob = np.zeros(len(X))
for name, model in self.models.items():
try:
if hasattr(model, "predict_proba"):
prob = model.predict_proba(X)[:, 1]
elif hasattr(model, "decision_function"):
d = model.decision_function(X)
prob = 1 / (1 + np.exp(-d))
else:
prob = model.predict(X)
final_prob += prob * self.weights[name]
except:
pass
return final_prob * 100
# ---------------------------------------------------------
# 3. ANA AKIŞ
# ---------------------------------------------------------
def main():
print("⏳ Veri Tabanı Yükleniyor...")
if not os.path.exists(MGM_DATA_FILE): return
try:
use_cols = list(MGM_MAPPING.values())
df_mgm = pd.read_csv(MGM_DATA_FILE, sep=None, engine='python', encoding='utf-8-sig',
usecols=lambda c: c.strip() in use_cols)
df_mgm.columns = df_mgm.columns.str.strip()
clean_df = pd.DataFrame()
target_date_col = MGM_MAPPING["Tarih"]
if target_date_col in df_mgm.columns:
clean_df["Tarih"] = pd.to_datetime(df_mgm[target_date_col], dayfirst=True, errors='coerce')
for kod, dosya in MGM_MAPPING.items():
if kod == "Tarih": continue
if dosya in df_mgm.columns:
clean_df[kod] = pd.to_numeric(df_mgm[dosya].astype(str).str.replace(',', '.'), errors='coerce').fillna(
0)
else:
clean_df[kod] = 0.0
clean_df = clean_df.dropna(subset=['Tarih']).sort_values('Tarih').reset_index(drop=True)
except:
return
if not os.path.exists(FAULT_FILE): return
try:
df_fault = pd.read_csv(FAULT_FILE, sep=None, engine='python')
target_col = [c for c in df_fault.columns if 'tarih' in c.lower() or 'date' in c.lower()][0]
df_fault['Tarih'] = df_fault[target_col].apply(convert_excel_date)
if 'Enlem' in df_fault.columns:
df_fault['Enlem'] = df_fault['Enlem'].apply(clean_coord)
df_fault['Boylam'] = df_fault['Boylam'].apply(clean_coord)
df_fault = df_fault.dropna(subset=['Tarih'])
except:
return
print("⏳ Veri Seti İşleniyor (Pencere: -7 / +5 Saat)...")
full_data = calculate_features(clean_df)
full_data['Ariza_Durumu'] = 0
for f_date in df_fault['Tarih']:
try:
start_risk = f_date - timedelta(days=7)
end_risk = f_date + timedelta(hours=5)
if start_risk < full_data.index.max() and end_risk > full_data.index.min():
full_data.loc[start_risk:end_risk, 'Ariza_Durumu'] = 1
except:
continue
pos = full_data[full_data['Ariza_Durumu'] == 1]
neg_pool = full_data[full_data['Ariza_Durumu'] == 0]
n_neg = min(len(pos) * 5, len(neg_pool))
if n_neg > 0:
neg = neg_pool.sample(n=n_neg, random_state=42)
train_set = pd.concat([pos, neg])
else:
return
features = ['Sicaklik', 'Nem', 'Yagis', 'Ruzgar_Hizi', 'Basinc',
'Yagis_7G', 'Basinc_Trend', 'Basinc_Stabilite',
'Sicaklik_Soku', 'Donma_Indeksi', 'Ruzgar_Enerjisi', 'Firtina_Gucu']
# EĞİTİM
print("🚀 TANZER PROHİBRİT MODEL EĞİTİLİYOR...")
ensemble = TanzerProhibitModel()
ensemble.train(train_set[features], train_set['Ariza_Durumu'])
print("✅ Eğitim Tamamlandı.")
# TAHMİN
print("\n⏳ 14 Hat İçin Analiz Başlıyor (Lütfen Bekleyiniz)...")
possible_names = ["Hat_Adı_2", "Hat Adı", "Hat_Adi", "HAT_ADI", "HAT ADI"]
hat_col = next((c for c in df_fault.columns if c in possible_names), df_fault.columns[0])
unique_lines = df_fault[[hat_col, 'Enlem', 'Boylam']].drop_duplicates(subset=[hat_col]).dropna().head(14)
results = []
line_data = []
session = requests.Session()
pbar = tqdm(unique_lines.iterrows(), total=len(unique_lines), unit="hat",
bar_format="{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}]")
for _, row in pbar:
hat_adi = row[hat_col]
lat, lon = row['Enlem'], row['Boylam']
pbar.set_description(f"Analiz: {str(hat_adi)[:20]}")
try:
url = "https://api.open-meteo.com/v1/forecast"
params = {
"latitude": lat, "longitude": lon,
"hourly": "temperature_2m,relative_humidity_2m,rain,wind_speed_10m,surface_pressure",
"past_days": 7, "forecast_days": 3, "timezone": "auto"
}
r = session.get(url, params=params, timeout=10, verify=False)
if r.status_code == 200:
data = r.json()
df_api = pd.DataFrame(data['hourly'])
df_api['time'] = pd.to_datetime(df_api['time'])
df_api = df_api.rename(
columns={'time': 'Tarih', 'temperature_2m': 'Sicaklik', 'relative_humidity_2m': 'Nem',
'rain': 'Yagis', 'wind_speed_10m': 'Ruzgar_Hizi', 'surface_pressure': 'Basinc'})
df_proc = calculate_features(df_api)
df_proc.index = df_proc.index.tz_localize(None)
now = datetime.now()
future_df = df_proc[df_proc.index >= now].copy()
if not future_df.empty:
for c in features:
if c not in future_df.columns: future_df[c] = 0
risk = ensemble.predict(future_df[features])
future_df['Risk'] = risk
max_idx = future_df['Risk'].idxmax()
max_risk = future_df.loc[max_idx, 'Risk']
cat = get_risk_cat(max_risk)
tqdm.write(f"✅ {str(hat_adi)[:30]:<30} : %{max_risk:.1f} ({cat})")
results.append({"Hat": hat_adi, "Risk (%)": max_risk, "Kategori": cat, "Zaman": max_idx})
p_data = future_df[['Risk']].reset_index()
p_data['Hat'] = hat_adi
line_data.append(p_data)
else:
tqdm.write(f"❌ {str(hat_adi)[:30]} : API Hatası")
except:
tqdm.write(f"❌ {str(hat_adi)[:30]} : Bağlantı Hatası")
# --- AKADEMİK RAPORLAMA VE GÖRSELLEŞTİRME ---
if results:
df_res = pd.DataFrame(results).sort_values("Risk (%)", ascending=False)
df_res.to_excel(OUTPUT_EXCEL, index=False)
# Seaborn Akademik Tema Ayarı
sns.set_theme(style="whitegrid", font_scale=1.1, rc={"grid.linewidth": 0.6, "axes.linewidth": 1})
# --- GRAFİK 1: ÇUBUK (BAR) GRAFİĞİ ---
plt.figure(figsize=(14, 10))
# Renkleri skora göre akademik paletten seç
colors = [ACADEMIC_COLORS['dusuk'] if x < 40 else
ACADEMIC_COLORS['orta'] if x < 60 else
ACADEMIC_COLORS['riskli'] if x < 70 else
ACADEMIC_COLORS['yuksek'] if x < 80 else
ACADEMIC_COLORS['tehlikeli'] for x in df_res['Risk (%)']]
ax = sns.barplot(x='Risk (%)', y='Hat', data=df_res, palette=colors, edgecolor='.2', linewidth=0.8)
# Kritik Eşik Çizgisi (Daha belirgin)
plt.axvline(75, color=ACADEMIC_COLORS['yuksek'], linestyle='--', linewidth=2.5, label='Kritik Risk Eşiği (%75)')
# Değerleri çubukların ucuna yaz
for i, v in enumerate(df_res['Risk (%)']):
ax.text(v + 0.5, i, f"%{v:.1f}", fontweight='bold', va='center', fontsize=12, color='black')
plt.title('Enerji İletim Hatlarında Maksimum Risk Analizi\n(TANZER PROHİBRİT MODEL SONUÇLARI)',
fontweight='bold', fontsize=16, pad=20)
plt.xlabel('Hesaplanan Risk Skoru (%)', fontweight='bold', fontsize=12)
plt.ylabel('Hat Adı', fontweight='bold', fontsize=12)
plt.legend(loc='lower right', frameon=True)
plt.tight_layout()
plt.savefig(PLOT_BAR, dpi=300) # Yüksek çözünürlük
# --- GRAFİK 2: ÇİZGİ (LINE) GRAFİĞİ ---
if line_data:
all_lines = pd.concat(line_data)
plt.figure(figsize=(16, 10))
# Ana Çizgiler (Daha kalın ve profesyonel palet)
sns.lineplot(data=all_lines, x='Tarih', y='Risk', hue='Hat', palette='tab10', linewidth=3, alpha=0.9)
# Maksimum Noktaları İşaretle
for hat_name in df_res['Hat']:
hat_df = all_lines[all_lines['Hat'] == hat_name]
if not hat_df.empty:
max_row = hat_df.loc[hat_df['Risk'].idxmax()]
# Kırmızı nokta ve belirgin beyaz çerçeve
plt.scatter(max_row['Tarih'], max_row['Risk'], color=ACADEMIC_COLORS['yuksek'], s=120, zorder=5,
edgecolor='white', linewidth=2)
# Etiket kutusu
plt.annotate(f"%{max_row['Risk']:.0f}",
(max_row['Tarih'], max_row['Risk']),
textcoords="offset points", xytext=(0, 12), ha='center',
bbox=dict(boxstyle="round,pad=0.4", fc="white", ec=ACADEMIC_COLORS['yuksek'], lw=1.5),
fontsize=10, fontweight='bold')
# --- ARKA PLAN RİSK BÖLGELERİ (ÇOK DAHA BELİRGİN) ---
# Alpha değerleri 0.08'den 0.20-0.25 seviyesine çıkarıldı
plt.axhspan(0, 40, color=ACADEMIC_COLORS['dusuk'], alpha=0.20, label='Düşük Risk Bölgesi')
plt.axhspan(40, 60, color=ACADEMIC_COLORS['orta'], alpha=0.20, label='Orta Risk Bölgesi')
plt.axhspan(60, 70, color=ACADEMIC_COLORS['riskli'], alpha=0.25, label='Riskli Bölge')
plt.axhspan(70, 80, color=ACADEMIC_COLORS['yuksek'], alpha=0.25, label='Yüksek Risk Bölgesi')
plt.axhspan(80, 105, color=ACADEMIC_COLORS['tehlikeli'], alpha=0.30, label='Tehlikeli Bölge')
plt.ylim(0, 105)
plt.title('72 Saatlik Detaylı Zamansal Risk Değişimi', fontweight='bold', fontsize=18, pad=20)
plt.xlabel('Zaman (Gün/Saat)', fontweight='bold', fontsize=12)
plt.ylabel('Risk Yüzdesi (%)', fontweight='bold', fontsize=12)
# Tarih formatını iyileştir
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%d-%m %H:%M'))
plt.xticks(rotation=45)
# Lejantı dışarı al ve düzenle
plt.legend(bbox_to_anchor=(1.02, 1), loc='upper left', title="Hatlar ve Risk Bölgeleri",
frameon=True, shadow=True, title_fontsize='12', fontsize='11')
plt.grid(True, linestyle='-', linewidth=0.8, alpha=0.7) # Gridleri belirginleştir
plt.tight_layout()
plt.savefig(PLOT_LINE, dpi=300) # Yüksek çözünürlük
print(f"\n✅ BAŞARILI! Akademik Rapor ve Yüksek Çözünürlüklü Grafikler Oluşturuldu.")
print(f" Çıktı Klasörü: {OUTPUT_DIR}")
else:
print("\n❌ Sonuç üretilemedi.")
if __name__ == "__main__":
main()