Spaces:

ElPierrito
/

Lead_Prio

Sleeping

App Files Files Community

Lead_Prio / app.py

ElPierrito

Update app.py

342fbf9 verified 8 months ago

raw

history blame contribute delete

3.77 kB


	import pandas as pd
	import numpy as np
	import gradio as gr
	from sklearn.model_selection import train_test_split
	from sklearn.pipeline import Pipeline
	from sklearn.preprocessing import OneHotEncoder
	from sklearn.feature_extraction.text import TfidfVectorizer
	from sklearn.compose import ColumnTransformer
	from sklearn.linear_model import LogisticRegression

	# ----------------------------
	# CSV laden und Spalten bereinigen
	# ----------------------------

	df = pd.read_csv("Testdaten_Mersen_kompatibel.csv", sep=";", encoding="utf-8-sig")
	df.columns = (
	df.columns
	.str.strip()
	.str.replace(" ", " ", regex=False) # geschützte Leerzeichen
	)

	# Datum verarbeiten
	if "Anfrage_Datum" in df.columns:
	df["Anfrage_Datum"] = pd.to_datetime(df["Anfrage_Datum"], errors="coerce")
	df["Wochentag"] = df["Anfrage_Datum"].dt.day_name()
	else:
	raise ValueError("Spalte 'Anfrage_Datum' nicht gefunden. Verfügbare Spalten: " + str(df.columns.tolist()))

	# ----------------------------
	# Features & Ziel definieren
	# ----------------------------

	X = df[[
	"Kundentyp", "Branche", "Produktgruppe", "Region", "Kanal",
	"Dringlichkeit", "Wochentag", "Anfrage_Text", "Projektgröße (€)"
	]]
	y = df["Abschluss"]

	categorical_features = ["Kundentyp", "Branche", "Produktgruppe", "Region",
	"Kanal", "Dringlichkeit", "Wochentag"]
	text_feature = "Anfrage_Text"
	numeric_feature = "Projektgröße (€)"

	# ----------------------------
	# Modell-Pipeline
	# ----------------------------

	preprocessor = ColumnTransformer([
	("cat", OneHotEncoder(handle_unknown="ignore"), categorical_features),
	("text", TfidfVectorizer(), text_feature),
	("num", "passthrough", [numeric_feature])
	])

	pipeline = Pipeline([
	("preprocessor", preprocessor),
	("classifier", LogisticRegression(max_iter=1000))
	])

	pipeline.fit(X, y)

	# ----------------------------
	# Vorhersagefunktion
	# ----------------------------

	def predict_lead(kundentyp, branche, produktgruppe, region, kanal,
	dringlichkeit, wochentag, anfrage_text, projektgroesse):

	input_data = pd.DataFrame([{
	"Kundentyp": kundentyp,
	"Branche": branche,
	"Produktgruppe": produktgruppe,
	"Region": region,
	"Kanal": kanal,
	"Dringlichkeit": dringlichkeit,
	"Wochentag": wochentag,
	"Anfrage_Text": anfrage_text,
	"Projektgröße (€)": float(projektgroesse)
	}])

	prob = pipeline.predict_proba(input_data)[0][1]
	klasse = "hoch" if prob >= 0.75 else "mittel" if prob >= 0.4 else "niedrig"

	return f"Abschlusswahrscheinlichkeit: {prob:.2f} → Priorität: {klasse.upper()}"

	# ----------------------------
	# Gradio UI
	# ----------------------------

	demo = gr.Interface(
	fn=predict_lead,
	inputs=[
	gr.Dropdown(["Neukunde", "Bestandskunde", "OEM"], label="Kundentyp"),
	gr.Dropdown(["Gebäude", "Infrastruktur"], label="Branche"),
	gr.Dropdown(["Sicherung", "Graphitmodul", "Isolationsmaterial", "Spezialfertigung"], label="Produktgruppe"),
	gr.Dropdown(["DACH"], label="Region"),
	gr.Dropdown(["Webformular", "E-Mail", "Vertriebspartner"], label="Kanal"),
	gr.Dropdown(["sofort", "Q1 2025", "Q2 2025", "nächstes Jahr", "unklar"], label="Dringlichkeit"),
	gr.Dropdown(["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"], label="Wochentag"),
	gr.Textbox(label="Anfrage-Text"),
	gr.Number(label="Projektgröße (€)")
	],
	outputs="text",
	title="📈 Lead-Priorisierung für Angebotsanfragen bei Mersen",
	description="Dieses Modell bewertet Angebotsanfragen nach ihrer Abschlusswahrscheinlichkeit und priorisiert Leads."
	)

	if __name__ == "__main__":
	demo.launch()