yoniif commited on
Commit
f998a9b
·
verified ·
1 Parent(s): b2b432a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -12
app.py CHANGED
@@ -17,7 +17,7 @@ def create_synthetic_influencer_dataset(n=1200, out_csv="synthetic_influencers.c
17
  """
18
  Creates a synthetic dataset that mirrors your current schema:
19
  Columns: Rank, Name, Followers, ER, Country, Niche, Reach, Source File, Source Path
20
- Uses a Hugging Face text-generation model to generate influencer names (simple catchy-username prompt).
21
  """
22
  random.seed(seed)
23
 
@@ -38,14 +38,15 @@ def create_synthetic_influencer_dataset(n=1200, out_csv="synthetic_influencers.c
38
  ]
39
  niches = [
40
  "Fashion","Travel","Food","Fitness","Tech","Beauty","Gaming","Music","Photography",
41
- "Lifestyle","Education","Finance","Sports","Parenting","DIY"
 
42
  ]
43
  platforms = ["youtube", "instagram", "tiktok", "twitch", "x"] # lowercase -> file prefix
44
 
45
  rows = []
46
  for rank in range(1, n + 1):
47
- # --- Name via HF text generation (simple catchy prompt) ---
48
- prompt = "Short, catchy influencer username:"
49
  out = name_gen(
50
  prompt,
51
  max_new_tokens=8,
@@ -55,13 +56,12 @@ def create_synthetic_influencer_dataset(n=1200, out_csv="synthetic_influencers.c
55
  top_p=0.92
56
  )[0]["generated_text"]
57
 
58
- # cleanup
59
  name = out.replace(prompt, "").strip().split("\n")[0]
60
- # keep letters/digits/space/dot/underscore/hyphen; then compress spaces
61
- name = re.sub(r"[^A-Za-z0-9 _\.-]", "", name).strip()
62
- name = name.replace(" ", "")
63
- if len(name) < 3 or len(name) > 40:
64
- name = f"Creator_{rank}"
65
 
66
  # --- Structured fields sampled to look realistic ---
67
  followers = random.randint(5_000, 5_000_000)
@@ -167,9 +167,9 @@ iface = gr.Interface(
167
  article=(
168
  "**Project:** AI-Powered Influencer Recommender for Social Media Marketing\n\n"
169
  "**Models:**\n"
170
- "- text-generation (Hugging Face) for synthetic influencer usernames (dataset creation)\n"
171
  "- sentence-transformers/all-MiniLM-L6-v2 for semantic embeddings (recommendations)\n\n"
172
- "**Dataset:** 1,200-row synthetic influencer dataset generated at runtime (same schema as original)."
173
  ),
174
  examples=[
175
  ["Sustainable fashion campaign targeting eco-conscious millennials"],
 
17
  """
18
  Creates a synthetic dataset that mirrors your current schema:
19
  Columns: Rank, Name, Followers, ER, Country, Niche, Reach, Source File, Source Path
20
+ Uses a Hugging Face text-generation model to generate realistic first + last names.
21
  """
22
  random.seed(seed)
23
 
 
38
  ]
39
  niches = [
40
  "Fashion","Travel","Food","Fitness","Tech","Beauty","Gaming","Music","Photography",
41
+ "Lifestyle","Education","Finance","Sports","Parenting","DIY", "Fashion + Lifestyle",
42
+ "Rech + Gaming", "Food + Fitness", "Beauty + Fashion", "Sports + Fitness"
43
  ]
44
  platforms = ["youtube", "instagram", "tiktok", "twitch", "x"] # lowercase -> file prefix
45
 
46
  rows = []
47
  for rank in range(1, n + 1):
48
+ # --- Name via HF text generation (realistic first + last name) ---
49
+ prompt = "Generate a realistic influencer's first and last name:"
50
  out = name_gen(
51
  prompt,
52
  max_new_tokens=8,
 
56
  top_p=0.92
57
  )[0]["generated_text"]
58
 
59
+ # cleanup: remove the prompt text, keep letters & spaces only, single line
60
  name = out.replace(prompt, "").strip().split("\n")[0]
61
+ name = re.sub(r"[^A-Za-z\s]", "", name).strip()
62
+ # ensure it has at least two words; light fallback if the model returns junk
63
+ if len(name.split()) < 2:
64
+ name = f"Alex {rank}son" # simple readable fallback
 
65
 
66
  # --- Structured fields sampled to look realistic ---
67
  followers = random.randint(5_000, 5_000_000)
 
167
  article=(
168
  "**Project:** AI-Powered Influencer Recommender for Social Media Marketing\n\n"
169
  "**Models:**\n"
170
+ "- text-generation (Hugging Face) for synthetic influencer full names (dataset creation)\n"
171
  "- sentence-transformers/all-MiniLM-L6-v2 for semantic embeddings (recommendations)\n\n"
172
+ "**Dataset:** 1,200-row synthetic influencer dataset generated at runtime."
173
  ),
174
  examples=[
175
  ["Sustainable fashion campaign targeting eco-conscious millennials"],