Update app.py
Browse files
app.py
CHANGED
|
@@ -17,7 +17,7 @@ def create_synthetic_influencer_dataset(n=1200, out_csv="synthetic_influencers.c
|
|
| 17 |
"""
|
| 18 |
Creates a synthetic dataset that mirrors your current schema:
|
| 19 |
Columns: Rank, Name, Followers, ER, Country, Niche, Reach, Source File, Source Path
|
| 20 |
-
Uses a Hugging Face text-generation model to generate
|
| 21 |
"""
|
| 22 |
random.seed(seed)
|
| 23 |
|
|
@@ -38,14 +38,15 @@ def create_synthetic_influencer_dataset(n=1200, out_csv="synthetic_influencers.c
|
|
| 38 |
]
|
| 39 |
niches = [
|
| 40 |
"Fashion","Travel","Food","Fitness","Tech","Beauty","Gaming","Music","Photography",
|
| 41 |
-
"Lifestyle","Education","Finance","Sports","Parenting","DIY"
|
|
|
|
| 42 |
]
|
| 43 |
platforms = ["youtube", "instagram", "tiktok", "twitch", "x"] # lowercase -> file prefix
|
| 44 |
|
| 45 |
rows = []
|
| 46 |
for rank in range(1, n + 1):
|
| 47 |
-
# --- Name via HF text generation (
|
| 48 |
-
prompt = "
|
| 49 |
out = name_gen(
|
| 50 |
prompt,
|
| 51 |
max_new_tokens=8,
|
|
@@ -55,13 +56,12 @@ def create_synthetic_influencer_dataset(n=1200, out_csv="synthetic_influencers.c
|
|
| 55 |
top_p=0.92
|
| 56 |
)[0]["generated_text"]
|
| 57 |
|
| 58 |
-
# cleanup
|
| 59 |
name = out.replace(prompt, "").strip().split("\n")[0]
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
name = f"Creator_{rank}"
|
| 65 |
|
| 66 |
# --- Structured fields sampled to look realistic ---
|
| 67 |
followers = random.randint(5_000, 5_000_000)
|
|
@@ -167,9 +167,9 @@ iface = gr.Interface(
|
|
| 167 |
article=(
|
| 168 |
"**Project:** AI-Powered Influencer Recommender for Social Media Marketing\n\n"
|
| 169 |
"**Models:**\n"
|
| 170 |
-
"- text-generation (Hugging Face) for synthetic influencer
|
| 171 |
"- sentence-transformers/all-MiniLM-L6-v2 for semantic embeddings (recommendations)\n\n"
|
| 172 |
-
"**Dataset:** 1,200-row synthetic influencer dataset generated at runtime
|
| 173 |
),
|
| 174 |
examples=[
|
| 175 |
["Sustainable fashion campaign targeting eco-conscious millennials"],
|
|
|
|
| 17 |
"""
|
| 18 |
Creates a synthetic dataset that mirrors your current schema:
|
| 19 |
Columns: Rank, Name, Followers, ER, Country, Niche, Reach, Source File, Source Path
|
| 20 |
+
Uses a Hugging Face text-generation model to generate realistic first + last names.
|
| 21 |
"""
|
| 22 |
random.seed(seed)
|
| 23 |
|
|
|
|
| 38 |
]
|
| 39 |
niches = [
|
| 40 |
"Fashion","Travel","Food","Fitness","Tech","Beauty","Gaming","Music","Photography",
|
| 41 |
+
"Lifestyle","Education","Finance","Sports","Parenting","DIY", "Fashion + Lifestyle",
|
| 42 |
+
"Rech + Gaming", "Food + Fitness", "Beauty + Fashion", "Sports + Fitness"
|
| 43 |
]
|
| 44 |
platforms = ["youtube", "instagram", "tiktok", "twitch", "x"] # lowercase -> file prefix
|
| 45 |
|
| 46 |
rows = []
|
| 47 |
for rank in range(1, n + 1):
|
| 48 |
+
# --- Name via HF text generation (realistic first + last name) ---
|
| 49 |
+
prompt = "Generate a realistic influencer's first and last name:"
|
| 50 |
out = name_gen(
|
| 51 |
prompt,
|
| 52 |
max_new_tokens=8,
|
|
|
|
| 56 |
top_p=0.92
|
| 57 |
)[0]["generated_text"]
|
| 58 |
|
| 59 |
+
# cleanup: remove the prompt text, keep letters & spaces only, single line
|
| 60 |
name = out.replace(prompt, "").strip().split("\n")[0]
|
| 61 |
+
name = re.sub(r"[^A-Za-z\s]", "", name).strip()
|
| 62 |
+
# ensure it has at least two words; light fallback if the model returns junk
|
| 63 |
+
if len(name.split()) < 2:
|
| 64 |
+
name = f"Alex {rank}son" # simple readable fallback
|
|
|
|
| 65 |
|
| 66 |
# --- Structured fields sampled to look realistic ---
|
| 67 |
followers = random.randint(5_000, 5_000_000)
|
|
|
|
| 167 |
article=(
|
| 168 |
"**Project:** AI-Powered Influencer Recommender for Social Media Marketing\n\n"
|
| 169 |
"**Models:**\n"
|
| 170 |
+
"- text-generation (Hugging Face) for synthetic influencer full names (dataset creation)\n"
|
| 171 |
"- sentence-transformers/all-MiniLM-L6-v2 for semantic embeddings (recommendations)\n\n"
|
| 172 |
+
"**Dataset:** 1,200-row synthetic influencer dataset generated at runtime."
|
| 173 |
),
|
| 174 |
examples=[
|
| 175 |
["Sustainable fashion campaign targeting eco-conscious millennials"],
|