yoniif commited on
Commit
b2b432a
·
verified ·
1 Parent(s): 2476aae

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -18
app.py CHANGED
@@ -1,6 +1,7 @@
1
-
2
  import os
3
  import random
 
4
  import pandas as pd
5
  import gradio as gr
6
  from sentence_transformers import SentenceTransformer, util
@@ -16,11 +17,11 @@ def create_synthetic_influencer_dataset(n=1200, out_csv="synthetic_influencers.c
16
  """
17
  Creates a synthetic dataset that mirrors your current schema:
18
  Columns: Rank, Name, Followers, ER, Country, Niche, Reach, Source File, Source Path
19
- Uses a Hugging Face text-generation model to generate influencer names.
20
  """
21
  random.seed(seed)
22
 
23
- # Lazy import to keep dependencies minimal in environments where transformers isn't preinstalled
24
  try:
25
  from transformers import pipeline
26
  except Exception as e:
@@ -28,7 +29,7 @@ def create_synthetic_influencer_dataset(n=1200, out_csv="synthetic_influencers.c
28
  "Transformers not installed. Install with: pip install transformers torch"
29
  ) from e
30
 
31
- # Use a lighter model to keep HF Space startup snappy (swap to 'gpt2' if you prefer)
32
  name_gen = pipeline("text-generation", model="distilgpt2")
33
 
34
  countries = [
@@ -43,12 +44,22 @@ def create_synthetic_influencer_dataset(n=1200, out_csv="synthetic_influencers.c
43
 
44
  rows = []
45
  for rank in range(1, n + 1):
46
- # --- Name via HF text generation ---
47
- prompt = "Short, catchy influencer name:"
48
- raw = name_gen(prompt, max_new_tokens=8, num_return_sequences=1, do_sample=True, temperature=0.9)[0]["generated_text"]
 
 
 
 
 
 
 
 
49
  # cleanup
50
- name = raw.replace(prompt, "").strip().split("\n")[0]
51
- name = "".join(ch for ch in name if ch.isalnum() or ch in " -_.").strip()
 
 
52
  if len(name) < 3 or len(name) > 40:
53
  name = f"Creator_{rank}"
54
 
@@ -87,7 +98,7 @@ def load_or_build_synthetic():
87
  df = load_or_build_synthetic()
88
 
89
  # =========================
90
- # FEATURE ENGINEERING (same as yours)
91
  # =========================
92
  # Extract platform name from Source File (first token before '_'), capitalize
93
  df['Platform'] = df['Source File'].astype(str).str.split('_').str[0].str.capitalize()
@@ -97,7 +108,7 @@ profile_fields = ["Name", "Platform", "Niche", "Country"]
97
  df["profile_text"] = df[profile_fields].agg(" - ".join, axis=1)
98
 
99
  # =========================
100
- # EMBEDDINGS & RECOMMENDER (same as yours)
101
  # =========================
102
  model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
103
  influencer_embeddings = model.encode(df["profile_text"].tolist(), convert_to_tensor=True)
@@ -116,8 +127,8 @@ def recommend_influencers(brand_description):
116
  "Niche": row["Niche"],
117
  "Country": row["Country"],
118
  "ER": f"{row.get('ER', 'N/A')}",
119
- "Followers": row["Followers"],
120
- "Reach": row.get("Reach", "")
121
  })
122
  return recs
123
 
@@ -131,8 +142,8 @@ def format_output(brand_input):
131
  <p style='margin:0.5em 0;'><strong>Niche:</strong> {rec['Niche']}</p>
132
  <p style='margin:0.5em 0;'><strong>Country:</strong> {rec['Country']}</p>
133
  <p style='margin:0.5em 0;'><strong>Engagement:</strong> {rec['ER']}%</p>
134
- <p style='margin:0.5em 0;'><strong>Followers:</strong> {rec['Followers']}</p>
135
- {f"<p style='margin:0.5em 0;'><strong>Reach:</strong> {rec['Reach']}</p>" if rec['Reach'] else ""}
136
  </div>
137
  """
138
  return html
@@ -156,9 +167,9 @@ iface = gr.Interface(
156
  article=(
157
  "**Project:** AI-Powered Influencer Recommender for Social Media Marketing\n\n"
158
  "**Models:**\n"
159
- "- text-generation (Hugging Face) for synthetic influencer names (dataset creation)\n"
160
  "- sentence-transformers/all-MiniLM-L6-v2 for semantic embeddings (recommendations)\n\n"
161
- "**Dataset:** 1,200-row synthetic influencer dataset generated at runtime."
162
  ),
163
  examples=[
164
  ["Sustainable fashion campaign targeting eco-conscious millennials"],
@@ -172,4 +183,3 @@ iface = gr.Interface(
172
 
173
  if __name__ == "__main__":
174
  iface.launch(share=True)
175
-
 
1
+ # app.py
2
  import os
3
  import random
4
+ import re
5
  import pandas as pd
6
  import gradio as gr
7
  from sentence_transformers import SentenceTransformer, util
 
17
  """
18
  Creates a synthetic dataset that mirrors your current schema:
19
  Columns: Rank, Name, Followers, ER, Country, Niche, Reach, Source File, Source Path
20
+ Uses a Hugging Face text-generation model to generate influencer names (simple catchy-username prompt).
21
  """
22
  random.seed(seed)
23
 
24
+ # Lazy import so the app still runs even if transformers isn't preinstalled locally
25
  try:
26
  from transformers import pipeline
27
  except Exception as e:
 
29
  "Transformers not installed. Install with: pip install transformers torch"
30
  ) from e
31
 
32
+ # Small model for fast startup on Spaces; swap to "gpt2" if you prefer
33
  name_gen = pipeline("text-generation", model="distilgpt2")
34
 
35
  countries = [
 
44
 
45
  rows = []
46
  for rank in range(1, n + 1):
47
+ # --- Name via HF text generation (simple catchy prompt) ---
48
+ prompt = "Short, catchy influencer username:"
49
+ out = name_gen(
50
+ prompt,
51
+ max_new_tokens=8,
52
+ num_return_sequences=1,
53
+ do_sample=True,
54
+ temperature=0.9,
55
+ top_p=0.92
56
+ )[0]["generated_text"]
57
+
58
  # cleanup
59
+ name = out.replace(prompt, "").strip().split("\n")[0]
60
+ # keep letters/digits/space/dot/underscore/hyphen; then compress spaces
61
+ name = re.sub(r"[^A-Za-z0-9 _\.-]", "", name).strip()
62
+ name = name.replace(" ", "")
63
  if len(name) < 3 or len(name) > 40:
64
  name = f"Creator_{rank}"
65
 
 
98
  df = load_or_build_synthetic()
99
 
100
  # =========================
101
+ # FEATURE ENGINEERING
102
  # =========================
103
  # Extract platform name from Source File (first token before '_'), capitalize
104
  df['Platform'] = df['Source File'].astype(str).str.split('_').str[0].str.capitalize()
 
108
  df["profile_text"] = df[profile_fields].agg(" - ".join, axis=1)
109
 
110
  # =========================
111
+ # EMBEDDINGS & RECOMMENDER
112
  # =========================
113
  model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
114
  influencer_embeddings = model.encode(df["profile_text"].tolist(), convert_to_tensor=True)
 
127
  "Niche": row["Niche"],
128
  "Country": row["Country"],
129
  "ER": f"{row.get('ER', 'N/A')}",
130
+ "Followers": int(row["Followers"]),
131
+ "Reach": int(row["Reach"]) if str(row.get("Reach", "")).isdigit() else row.get("Reach", "")
132
  })
133
  return recs
134
 
 
142
  <p style='margin:0.5em 0;'><strong>Niche:</strong> {rec['Niche']}</p>
143
  <p style='margin:0.5em 0;'><strong>Country:</strong> {rec['Country']}</p>
144
  <p style='margin:0.5em 0;'><strong>Engagement:</strong> {rec['ER']}%</p>
145
+ <p style='margin:0.5em 0;'><strong>Followers:</strong> {rec['Followers']:,}</p>
146
+ {f"<p style='margin:0.5em 0;'><strong>Reach:</strong> {int(rec['Reach']):,}</p>" if isinstance(rec['Reach'], int) else ""}
147
  </div>
148
  """
149
  return html
 
167
  article=(
168
  "**Project:** AI-Powered Influencer Recommender for Social Media Marketing\n\n"
169
  "**Models:**\n"
170
+ "- text-generation (Hugging Face) for synthetic influencer usernames (dataset creation)\n"
171
  "- sentence-transformers/all-MiniLM-L6-v2 for semantic embeddings (recommendations)\n\n"
172
+ "**Dataset:** 1,200-row synthetic influencer dataset generated at runtime (same schema as original)."
173
  ),
174
  examples=[
175
  ["Sustainable fashion campaign targeting eco-conscious millennials"],
 
183
 
184
  if __name__ == "__main__":
185
  iface.launch(share=True)