Spaces:

yoniif
/

final_assignment_yoni_gavriel

Sleeping

App Files Files Community

yoniif commited on Aug 13, 2025

Commit

f998a9b

verified ·

1 Parent(s): b2b432a

Update app.py

Browse files

Files changed (1) hide show

app.py +12 -12

app.py CHANGED Viewed

@@ -17,7 +17,7 @@ def create_synthetic_influencer_dataset(n=1200, out_csv="synthetic_influencers.c
     """
     Creates a synthetic dataset that mirrors your current schema:
     Columns: Rank, Name, Followers, ER, Country, Niche, Reach, Source File, Source Path
-    Uses a Hugging Face text-generation model to generate influencer names (simple catchy-username prompt).
     """
     random.seed(seed)
@@ -38,14 +38,15 @@ def create_synthetic_influencer_dataset(n=1200, out_csv="synthetic_influencers.c
     ]
     niches = [
         "Fashion","Travel","Food","Fitness","Tech","Beauty","Gaming","Music","Photography",
-        "Lifestyle","Education","Finance","Sports","Parenting","DIY"
     ]
     platforms = ["youtube", "instagram", "tiktok", "twitch", "x"]  # lowercase -> file prefix
     rows = []
     for rank in range(1, n + 1):
-        # --- Name via HF text generation (simple catchy prompt) ---
-        prompt = "Short, catchy influencer username:"
         out = name_gen(
             prompt,
             max_new_tokens=8,
@@ -55,13 +56,12 @@ def create_synthetic_influencer_dataset(n=1200, out_csv="synthetic_influencers.c
             top_p=0.92
         )[0]["generated_text"]
-        # cleanup
         name = out.replace(prompt, "").strip().split("\n")[0]
-        # keep letters/digits/space/dot/underscore/hyphen; then compress spaces
-        name = re.sub(r"[^A-Za-z0-9 _\.-]", "", name).strip()
-        name = name.replace(" ", "")
-        if len(name) < 3 or len(name) > 40:
-            name = f"Creator_{rank}"
         # --- Structured fields sampled to look realistic ---
         followers = random.randint(5_000, 5_000_000)
@@ -167,9 +167,9 @@ iface = gr.Interface(
     article=(
         "**Project:** AI-Powered Influencer Recommender for Social Media Marketing\n\n"
         "**Models:**\n"
-        "- text-generation (Hugging Face) for synthetic influencer usernames (dataset creation)\n"
         "- sentence-transformers/all-MiniLM-L6-v2 for semantic embeddings (recommendations)\n\n"
-        "**Dataset:** 1,200-row synthetic influencer dataset generated at runtime (same schema as original)."
     ),
     examples=[
         ["Sustainable fashion campaign targeting eco-conscious millennials"],

     """
     Creates a synthetic dataset that mirrors your current schema:
     Columns: Rank, Name, Followers, ER, Country, Niche, Reach, Source File, Source Path
+    Uses a Hugging Face text-generation model to generate realistic first + last names.
     """
     random.seed(seed)
     ]
     niches = [
         "Fashion","Travel","Food","Fitness","Tech","Beauty","Gaming","Music","Photography",
+        "Lifestyle","Education","Finance","Sports","Parenting","DIY", "Fashion + Lifestyle",
+        "Rech + Gaming", "Food + Fitness", "Beauty + Fashion", "Sports + Fitness"
     ]
     platforms = ["youtube", "instagram", "tiktok", "twitch", "x"]  # lowercase -> file prefix
     rows = []
     for rank in range(1, n + 1):
+        # --- Name via HF text generation (realistic first + last name) ---
+        prompt = "Generate a realistic influencer's first and last name:"
         out = name_gen(
             prompt,
             max_new_tokens=8,
             top_p=0.92
         )[0]["generated_text"]
+        # cleanup: remove the prompt text, keep letters & spaces only, single line
         name = out.replace(prompt, "").strip().split("\n")[0]
+        name = re.sub(r"[^A-Za-z\s]", "", name).strip()
+        # ensure it has at least two words; light fallback if the model returns junk
+        if len(name.split()) < 2:
+            name = f"Alex {rank}son"  # simple readable fallback
         # --- Structured fields sampled to look realistic ---
         followers = random.randint(5_000, 5_000_000)
     article=(
         "**Project:** AI-Powered Influencer Recommender for Social Media Marketing\n\n"
         "**Models:**\n"
+        "- text-generation (Hugging Face) for synthetic influencer full names (dataset creation)\n"
         "- sentence-transformers/all-MiniLM-L6-v2 for semantic embeddings (recommendations)\n\n"
+        "**Dataset:** 1,200-row synthetic influencer dataset generated at runtime."
     ),
     examples=[
         ["Sustainable fashion campaign targeting eco-conscious millennials"],