Spaces:

yoniif
/

final_assignment

Sleeping

App Files Files Community

yoniif commited on Aug 8, 2025

Commit

0d6d77d

verified ·

1 Parent(s): 9e29eab

Update app.py

Browse files

Files changed (1) hide show

app.py +54 -12

app.py CHANGED Viewed

@@ -1,31 +1,74 @@
-# 🔧 Install dependencies first (uncomment for local testing)
 # !pip install gradio pandas sentence-transformers
-import gradio as gr
 import pandas as pd
 from sentence_transformers import SentenceTransformer, util
-# Load influencer dataset (replace path with uploaded HF dataset if needed)
-df = pd.read_csv("top_100_influencers.csv")  # <- upload to HF Space alongside this script
-# Fill NA just in case
 df.fillna("", inplace=True)
-# Combine fields for embedding
 df["profile_text"] = df["Name"] + " - " + df["Platform"] + " - " + df["Niche"] + " - " + df["Country"]
-# Load embedding model
 model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
-# Precompute embeddings
 influencer_embeddings = model.encode(df["profile_text"].tolist(), convert_to_tensor=True)
-# 🔍 Recommendation Function
 def recommend_influencers(brand_description):
     query_embedding = model.encode(brand_description, convert_to_tensor=True)
     cosine_scores = util.pytorch_cos_sim(query_embedding, influencer_embeddings)[0]
     top_indices = cosine_scores.topk(3).indices.tolist()
     recommendations = []
     for idx in top_indices:
         row = df.iloc[idx]
@@ -39,7 +82,6 @@ def recommend_influencers(brand_description):
         })
     return recommendations
-# 🖼️ Gradio UI
 def format_output(brand_input):
     recs = recommend_influencers(brand_input)
     output = ""
@@ -53,7 +95,7 @@ def format_output(brand_input):
 demo = gr.Interface(
     fn=format_output,
-    inputs=gr.Textbox(label="Enter your brand description (e.g. 'Sustainable fashion for Gen Z')", placeholder="Describe your brand..."),
     outputs=gr.Markdown(label="Top 3 Influencer Matches"),
     title="InfluMatch: Influencer Recommender",
     description="Describe your brand or campaign and get 3 matching influencer suggestions.",

+# 🔧 Install dependencies (uncomment if running locally)
 # !pip install gradio pandas sentence-transformers
+import os
+import zipfile
+import requests
 import pandas as pd
+import gradio as gr
 from sentence_transformers import SentenceTransformer, util
+### STEP 1: Download and unzip the influencer dataset from Hugging Face
+# Replace this with your actual dataset ZIP URL
+url = "https://huggingface.co/datasets/your-username/influencer-dataset-merged/resolve/main/top_100_influencers.zip"
+zip_path = "top_100_influencers.zip"
+# Download zip file if not already present
+if not os.path.exists(zip_path):
+    print("📥 Downloading influencer dataset...")
+    r = requests.get(url)
+    with open(zip_path, "wb") as f:
+        f.write(r.content)
+# Unzip the file into a folder
+unzip_dir = "influencer_data"
+if not os.path.exists(unzip_dir):
+    print("📦 Unzipping dataset...")
+    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
+        zip_ref.extractall(unzip_dir)
+### STEP 2: Merge all CSVs into one
+print("🔗 Merging influencer files...")
+all_dfs = []
+for file in os.listdir(unzip_dir):
+    if file.endswith(".csv"):
+        df = pd.read_csv(os.path.join(unzip_dir, file))
+        df["Source File"] = file  # Optional: keep track of file origin
+        all_dfs.append(df)
+df = pd.concat(all_dfs, ignore_index=True)
+# Basic cleanup
+df.drop_duplicates(inplace=True)
+df.dropna(subset=["Name", "Niche"], inplace=True)
 df.fillna("", inplace=True)
+# Save combined dataset (optional)
+df.to_csv("top_100_influencers_combined.csv", index=False)
+print("✅ Combined dataset ready!")
+### STEP 3: Build the recommender engine
+# Combine fields for semantic embedding
 df["profile_text"] = df["Name"] + " - " + df["Platform"] + " - " + df["Niche"] + " - " + df["Country"]
+# Load sentence embedding model
+print("🧠 Loading embedding model...")
 model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
+# Precompute influencer embeddings
+print("🔢 Encoding influencer profiles...")
 influencer_embeddings = model.encode(df["profile_text"].tolist(), convert_to_tensor=True)
+### STEP 4: Define similarity search + UI
 def recommend_influencers(brand_description):
     query_embedding = model.encode(brand_description, convert_to_tensor=True)
     cosine_scores = util.pytorch_cos_sim(query_embedding, influencer_embeddings)[0]
     top_indices = cosine_scores.topk(3).indices.tolist()
     recommendations = []
     for idx in top_indices:
         row = df.iloc[idx]
         })
     return recommendations
 def format_output(brand_input):
     recs = recommend_influencers(brand_input)
     output = ""
 demo = gr.Interface(
     fn=format_output,
+    inputs=gr.Textbox(label="Enter your brand or campaign description", placeholder="e.g. Sustainable fashion for Gen Z"),
     outputs=gr.Markdown(label="Top 3 Influencer Matches"),
     title="InfluMatch: Influencer Recommender",
     description="Describe your brand or campaign and get 3 matching influencer suggestions.",