Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,56 +1,66 @@
|
|
| 1 |
import os
|
| 2 |
import pandas as pd
|
| 3 |
import matplotlib.pyplot as plt
|
|
|
|
| 4 |
import gradio as gr
|
| 5 |
import torch
|
| 6 |
-
from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 7 |
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
model = AutoModelForCausalLM.from_pretrained(
|
| 12 |
model_id,
|
|
|
|
| 13 |
device_map="auto",
|
| 14 |
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
|
| 15 |
)
|
|
|
|
| 16 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 17 |
model.to(device)
|
| 18 |
|
|
|
|
| 19 |
def load_ga4_data():
|
| 20 |
return pd.read_csv("synthetic_ga4_data.csv")
|
| 21 |
-
|
| 22 |
-
def load_channel_data():
|
| 23 |
-
df = load_ga4_data()
|
| 24 |
-
agg = df.groupby("Channel").agg({
|
| 25 |
-
"Sessions": "sum",
|
| 26 |
-
"Transactions": "sum",
|
| 27 |
-
"Revenue": "sum"
|
| 28 |
-
})
|
| 29 |
-
agg["CPA (TRY)"] = (agg["Revenue"] / agg["Transactions"]).fillna(0)
|
| 30 |
-
return agg.reset_index()
|
| 31 |
-
|
| 32 |
-
def generate_insight(prompt):
|
| 33 |
-
try:
|
| 34 |
df = load_ga4_data()
|
| 35 |
prompt = prompt.lower().strip()
|
| 36 |
|
|
|
|
| 37 |
if "most users" in prompt:
|
| 38 |
top_city = df.groupby("City")["Users"].sum().idxmax()
|
| 39 |
users = df.groupby("City")["Users"].sum().max()
|
| 40 |
-
base_answer = f"{top_city} has the most users with {int(users)} total."
|
| 41 |
-
elif "best conversion" in prompt:
|
| 42 |
-
df["ConversionRate"] = df["Transactions"] / df["Sessions"]
|
| 43 |
-
conv = df.groupby("City")["ConversionRate"].mean()
|
| 44 |
-
top_city = conv.idxmax()
|
| 45 |
-
base_answer = f"{top_city} has the best average conversion rate of {conv.max():.2%}."
|
| 46 |
else:
|
| 47 |
-
|
| 48 |
|
| 49 |
-
input_text = f"
|
| 50 |
inputs = tokenizer(input_text, return_tensors="pt").to(device)
|
| 51 |
-
outputs = model.generate(**inputs, max_new_tokens=100)
|
| 52 |
-
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 53 |
|
| 54 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
except Exception as e:
|
| 56 |
-
return f"⚠️ Error: {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import os
|
| 2 |
import pandas as pd
|
| 3 |
import matplotlib.pyplot as plt
|
| 4 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 5 |
import gradio as gr
|
| 6 |
import torch
|
|
|
|
| 7 |
|
| 8 |
+
|
| 9 |
+
# --- Load HF token from environment ---
|
| 10 |
+
token = os.getenv("HF_TOKEN")
|
| 11 |
+
if token is None:
|
| 12 |
+
raise ValueError("HF_TOKEN environment variable not set")
|
| 13 |
+
|
| 14 |
+
# --- Use a better and faster model ---
|
| 15 |
+
model_id = "HuggingFaceH4/zephyr-7b-beta"
|
| 16 |
+
|
| 17 |
+
tokenizer = AutoTokenizer.from_pretrained(model_id, token=token)
|
| 18 |
model = AutoModelForCausalLM.from_pretrained(
|
| 19 |
model_id,
|
| 20 |
+
token=token,
|
| 21 |
device_map="auto",
|
| 22 |
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
|
| 23 |
)
|
| 24 |
+
|
| 25 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 26 |
model.to(device)
|
| 27 |
|
| 28 |
+
# --- Load GA4 CSV data ---
|
| 29 |
def load_ga4_data():
|
| 30 |
return pd.read_csv("synthetic_ga4_data.csv")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
df = load_ga4_data()
|
| 32 |
prompt = prompt.lower().strip()
|
| 33 |
|
| 34 |
+
|
| 35 |
if "most users" in prompt:
|
| 36 |
top_city = df.groupby("City")["Users"].sum().idxmax()
|
| 37 |
users = df.groupby("City")["Users"].sum().max()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
else:
|
| 39 |
+
base_answer = "Sorry, I can only currently analyze questions about users and conversion rates."
|
| 40 |
|
| 41 |
+
input_text = f"Rephrase this like a digital marketing analyst for a business report: {base_answer}"
|
| 42 |
inputs = tokenizer(input_text, return_tensors="pt").to(device)
|
|
|
|
|
|
|
| 43 |
|
| 44 |
+
outputs = model.generate(
|
| 45 |
+
**inputs,
|
| 46 |
+
max_new_tokens=80,
|
| 47 |
+
do_sample=False,
|
| 48 |
+
temperature=0.3,
|
| 49 |
+
repetition_penalty=1.1
|
| 50 |
+
)
|
| 51 |
+
|
| 52 |
+
response = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
|
| 53 |
+
return response, "✅ Insight generated successfully."
|
| 54 |
+
|
| 55 |
except Exception as e:
|
| 56 |
+
return f"⚠️ Error: {str(e)}", "❌ Insight generation failed."
|
| 57 |
+
|
| 58 |
+
# --- City Performance Plot ---
|
| 59 |
+
def plot_city_performance():
|
| 60 |
+
prompt = gr.Textbox(label="Your Analysis Question", placeholder="Which city has the best conversion rate?")
|
| 61 |
+
generate = gr.Button("Generate Insight")
|
| 62 |
+
output = gr.Textbox(label="AI Response", interactive=False)
|
| 63 |
+
status = gr.Textbox(label="Status", interactive=False, visible=True)
|
| 64 |
+
generate.click(fn=generate_insight, inputs=prompt, outputs=[output, status])
|
| 65 |
+
|
| 66 |
+
gr.Markdown("---\n**Available GA4 Metrics:** Users, Sessions, Transactions, Revenue, Avg Session Duration, etc.")
|