Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| import numpy as np | |
| import gradio as gr | |
| from sklearn.model_selection import train_test_split | |
| from sklearn.pipeline import Pipeline | |
| from sklearn.preprocessing import OneHotEncoder | |
| from sklearn.feature_extraction.text import TfidfVectorizer | |
| from sklearn.compose import ColumnTransformer | |
| from sklearn.linear_model import LogisticRegression | |
| # ---------------------------- | |
| # CSV laden und Spalten bereinigen | |
| # ---------------------------- | |
| df = pd.read_csv("Testdaten_Mersen_kompatibel.csv", sep=";", encoding="utf-8-sig") | |
| df.columns = ( | |
| df.columns | |
| .str.strip() | |
| .str.replace(" ", " ", regex=False) # geschützte Leerzeichen | |
| ) | |
| # Datum verarbeiten | |
| if "Anfrage_Datum" in df.columns: | |
| df["Anfrage_Datum"] = pd.to_datetime(df["Anfrage_Datum"], errors="coerce") | |
| df["Wochentag"] = df["Anfrage_Datum"].dt.day_name() | |
| else: | |
| raise ValueError("Spalte 'Anfrage_Datum' nicht gefunden. Verfügbare Spalten: " + str(df.columns.tolist())) | |
| # ---------------------------- | |
| # Features & Ziel definieren | |
| # ---------------------------- | |
| X = df[[ | |
| "Kundentyp", "Branche", "Produktgruppe", "Region", "Kanal", | |
| "Dringlichkeit", "Wochentag", "Anfrage_Text", "Projektgröße (€)" | |
| ]] | |
| y = df["Abschluss"] | |
| categorical_features = ["Kundentyp", "Branche", "Produktgruppe", "Region", | |
| "Kanal", "Dringlichkeit", "Wochentag"] | |
| text_feature = "Anfrage_Text" | |
| numeric_feature = "Projektgröße (€)" | |
| # ---------------------------- | |
| # Modell-Pipeline | |
| # ---------------------------- | |
| preprocessor = ColumnTransformer([ | |
| ("cat", OneHotEncoder(handle_unknown="ignore"), categorical_features), | |
| ("text", TfidfVectorizer(), text_feature), | |
| ("num", "passthrough", [numeric_feature]) | |
| ]) | |
| pipeline = Pipeline([ | |
| ("preprocessor", preprocessor), | |
| ("classifier", LogisticRegression(max_iter=1000)) | |
| ]) | |
| pipeline.fit(X, y) | |
| # ---------------------------- | |
| # Vorhersagefunktion | |
| # ---------------------------- | |
| def predict_lead(kundentyp, branche, produktgruppe, region, kanal, | |
| dringlichkeit, wochentag, anfrage_text, projektgroesse): | |
| input_data = pd.DataFrame([{ | |
| "Kundentyp": kundentyp, | |
| "Branche": branche, | |
| "Produktgruppe": produktgruppe, | |
| "Region": region, | |
| "Kanal": kanal, | |
| "Dringlichkeit": dringlichkeit, | |
| "Wochentag": wochentag, | |
| "Anfrage_Text": anfrage_text, | |
| "Projektgröße (€)": float(projektgroesse) | |
| }]) | |
| prob = pipeline.predict_proba(input_data)[0][1] | |
| klasse = "hoch" if prob >= 0.75 else "mittel" if prob >= 0.4 else "niedrig" | |
| return f"Abschlusswahrscheinlichkeit: {prob:.2f} → Priorität: {klasse.upper()}" | |
| # ---------------------------- | |
| # Gradio UI | |
| # ---------------------------- | |
| demo = gr.Interface( | |
| fn=predict_lead, | |
| inputs=[ | |
| gr.Dropdown(["Neukunde", "Bestandskunde", "OEM"], label="Kundentyp"), | |
| gr.Dropdown(["Gebäude", "Infrastruktur"], label="Branche"), | |
| gr.Dropdown(["Sicherung", "Graphitmodul", "Isolationsmaterial", "Spezialfertigung"], label="Produktgruppe"), | |
| gr.Dropdown(["DACH"], label="Region"), | |
| gr.Dropdown(["Webformular", "E-Mail", "Vertriebspartner"], label="Kanal"), | |
| gr.Dropdown(["sofort", "Q1 2025", "Q2 2025", "nächstes Jahr", "unklar"], label="Dringlichkeit"), | |
| gr.Dropdown(["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"], label="Wochentag"), | |
| gr.Textbox(label="Anfrage-Text"), | |
| gr.Number(label="Projektgröße (€)") | |
| ], | |
| outputs="text", | |
| title="📈 Lead-Priorisierung für Angebotsanfragen bei Mersen", | |
| description="Dieses Modell bewertet Angebotsanfragen nach ihrer Abschlusswahrscheinlichkeit und priorisiert Leads." | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |