Spaces:

yoniif
/

final_assignment

Sleeping

App Files Files Community

yoniif commited on Aug 8, 2025

Commit

aea3859

verified ·

1 Parent(s): 04f0f11

Update app.py

Browse files

Files changed (1) hide show

app.py +1 -72

app.py CHANGED Viewed

@@ -1,87 +1,16 @@
-# 🔧 Install dependencies (uncomment if running locally)
-# !pip install gradio pandas sentence-transformers
-import os
-import zipfile
-import requests
 import pandas as pd
 import gradio as gr
 from sentence_transformers import SentenceTransformer, util
-### STEP 1: Download and unzip the influencer dataset from Hugging Face
-# url = "https://huggingface.co/spaces/yoniif/final_assignment/resolve/main/top_influencers.zip"
-# zip_path = "top_100_influencers.zip"
-# # Download zip file if not already present
-# if not os.path.exists(zip_path):
-#     print("📥 Downloading influencer dataset...")
-#     headers = {"User-Agent": "Mozilla/5.0"}
-#     r = requests.get(url, headers=headers)
-#     # Confirm file is binary ZIP
-#     if r.status_code != 200 or b"PK" not in r.content[:10]:
-#         raise ValueError("❌ Invalid ZIP file downloaded. Check URL or access permissions.")
-#     with open(zip_path, "wb") as f:
-#         f.write(r.content)
-# # Unzip the file into a folder
-# unzip_dir = "influencer_data"
-# if not os.path.exists(unzip_dir):
-#     print("📦 Unzipping dataset...")
-#     with zipfile.ZipFile(zip_path, 'r') as zip_ref:
-#         zip_ref.extractall(unzip_dir)
-# ### STEP 2: Merge all CSVs into one
-# print("🔗 Merging influencer files...")
-# all_dfs = []
-# for file in os.listdir(unzip_dir):
-#     if file.endswith(".csv"):
-#         df = pd.read_csv(os.path.join(unzip_dir, file))
-#         df["Source File"] = file  # Optional: keep track of file origin
-#         all_dfs.append(df)
-# df = pd.concat(all_dfs, ignore_index=True)
-# ✅ Load the combined CSV directly
 df = pd.read_csv("top_100_influencers_combined_sample.csv")
-# Fill NA just in case
 df.fillna("", inplace=True)
-# Combine fields for embeddings
 df["profile_text"] = df["Name"] + " - " + df["Platform"] + " - " + df["Niche"] + " - " + df["Country"]
-# Basic cleanup
-df.drop_duplicates(inplace=True)
-df.dropna(subset=["Name", "Niche"], inplace=True)
-df.fillna("", inplace=True)
-# Save combined dataset (optional)
-df.to_csv("top_100_influencers_combined.csv", index=False)
-print("✅ Combined dataset ready!")
-### STEP 3: Build the recommender engine
-# Combine fields for semantic embedding
-df["profile_text"] = df["Name"] + " - " + df["Platform"] + " - " + df["Niche"] + " - " + df["Country"]
-# Load sentence embedding model
-print("🧠 Loading embedding model...")
 model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
-# Precompute influencer embeddings
-print("🔢 Encoding influencer profiles...")
 influencer_embeddings = model.encode(df["profile_text"].tolist(), convert_to_tensor=True)
-### STEP 4: Define similarity search + UI
 def recommend_influencers(brand_description):
     query_embedding = model.encode(brand_description, convert_to_tensor=True)
     cosine_scores = util.pytorch_cos_sim(query_embedding, influencer_embeddings)[0]

 import pandas as pd
 import gradio as gr
 from sentence_transformers import SentenceTransformer, util
+# ✅ Load the CSV you uploaded into the Space
 df = pd.read_csv("top_100_influencers_combined_sample.csv")
 df.fillna("", inplace=True)
 df["profile_text"] = df["Name"] + " - " + df["Platform"] + " - " + df["Niche"] + " - " + df["Country"]
 model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
 influencer_embeddings = model.encode(df["profile_text"].tolist(), convert_to_tensor=True)
 def recommend_influencers(brand_description):
     query_embedding = model.encode(brand_description, convert_to_tensor=True)
     cosine_scores = util.pytorch_cos_sim(query_embedding, influencer_embeddings)[0]