UHRCRU commited on
Commit
f62223a
·
verified ·
1 Parent(s): ac495a8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -29
app.py CHANGED
@@ -1,56 +1,66 @@
1
  import os
2
  import pandas as pd
3
  import matplotlib.pyplot as plt
 
4
  import gradio as gr
5
  import torch
6
- from transformers import AutoTokenizer, AutoModelForCausalLM
7
 
8
- # Use TinyLlama (Open-source & lightweight)
9
- model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
10
- tokenizer = AutoTokenizer.from_pretrained(model_id)
 
 
 
 
 
 
 
11
  model = AutoModelForCausalLM.from_pretrained(
12
  model_id,
 
13
  device_map="auto",
14
  torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
15
  )
 
16
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
17
  model.to(device)
18
 
 
19
  def load_ga4_data():
20
  return pd.read_csv("synthetic_ga4_data.csv")
21
-
22
- def load_channel_data():
23
- df = load_ga4_data()
24
- agg = df.groupby("Channel").agg({
25
- "Sessions": "sum",
26
- "Transactions": "sum",
27
- "Revenue": "sum"
28
- })
29
- agg["CPA (TRY)"] = (agg["Revenue"] / agg["Transactions"]).fillna(0)
30
- return agg.reset_index()
31
-
32
- def generate_insight(prompt):
33
- try:
34
  df = load_ga4_data()
35
  prompt = prompt.lower().strip()
36
 
 
37
  if "most users" in prompt:
38
  top_city = df.groupby("City")["Users"].sum().idxmax()
39
  users = df.groupby("City")["Users"].sum().max()
40
- base_answer = f"{top_city} has the most users with {int(users)} total."
41
- elif "best conversion" in prompt:
42
- df["ConversionRate"] = df["Transactions"] / df["Sessions"]
43
- conv = df.groupby("City")["ConversionRate"].mean()
44
- top_city = conv.idxmax()
45
- base_answer = f"{top_city} has the best average conversion rate of {conv.max():.2%}."
46
  else:
47
- return "📌 I can currently only analyze user counts and conversion rates."
48
 
49
- input_text = f"Act like a digital marketing analyst. Rephrase for a report: {base_answer}"
50
  inputs = tokenizer(input_text, return_tensors="pt").to(device)
51
- outputs = model.generate(**inputs, max_new_tokens=100)
52
- response = tokenizer.decode(outputs[0], skip_special_tokens=True)
53
 
54
- return response
 
 
 
 
 
 
 
 
 
 
55
  except Exception as e:
56
- return f"⚠️ Error: {s
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
  import pandas as pd
3
  import matplotlib.pyplot as plt
4
+ from transformers import AutoTokenizer, AutoModelForCausalLM
5
  import gradio as gr
6
  import torch
 
7
 
8
+
9
+ # --- Load HF token from environment ---
10
+ token = os.getenv("HF_TOKEN")
11
+ if token is None:
12
+ raise ValueError("HF_TOKEN environment variable not set")
13
+
14
+ # --- Use a better and faster model ---
15
+ model_id = "HuggingFaceH4/zephyr-7b-beta"
16
+
17
+ tokenizer = AutoTokenizer.from_pretrained(model_id, token=token)
18
  model = AutoModelForCausalLM.from_pretrained(
19
  model_id,
20
+ token=token,
21
  device_map="auto",
22
  torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
23
  )
24
+
25
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
26
  model.to(device)
27
 
28
+ # --- Load GA4 CSV data ---
29
  def load_ga4_data():
30
  return pd.read_csv("synthetic_ga4_data.csv")
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  df = load_ga4_data()
32
  prompt = prompt.lower().strip()
33
 
34
+
35
  if "most users" in prompt:
36
  top_city = df.groupby("City")["Users"].sum().idxmax()
37
  users = df.groupby("City")["Users"].sum().max()
 
 
 
 
 
 
38
  else:
39
+ base_answer = "Sorry, I can only currently analyze questions about users and conversion rates."
40
 
41
+ input_text = f"Rephrase this like a digital marketing analyst for a business report: {base_answer}"
42
  inputs = tokenizer(input_text, return_tensors="pt").to(device)
 
 
43
 
44
+ outputs = model.generate(
45
+ **inputs,
46
+ max_new_tokens=80,
47
+ do_sample=False,
48
+ temperature=0.3,
49
+ repetition_penalty=1.1
50
+ )
51
+
52
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
53
+ return response, "✅ Insight generated successfully."
54
+
55
  except Exception as e:
56
+ return f"⚠️ Error: {str(e)}", "❌ Insight generation failed."
57
+
58
+ # --- City Performance Plot ---
59
+ def plot_city_performance():
60
+ prompt = gr.Textbox(label="Your Analysis Question", placeholder="Which city has the best conversion rate?")
61
+ generate = gr.Button("Generate Insight")
62
+ output = gr.Textbox(label="AI Response", interactive=False)
63
+ status = gr.Textbox(label="Status", interactive=False, visible=True)
64
+ generate.click(fn=generate_insight, inputs=prompt, outputs=[output, status])
65
+
66
+ gr.Markdown("---\n**Available GA4 Metrics:** Users, Sessions, Transactions, Revenue, Avg Session Duration, etc.")