import json, faiss, numpy as np, gradio as gr from datasets import load_dataset from sentence_transformers import SentenceTransformer DATASET_REPO = "samvlad/pitchcompass_dataset" EMB_MODEL = "intfloat/e5-small-v2" ds = load_dataset(DATASET_REPO, split="train", data_files={"train": "dataset.csv"}) records = list(ds) X = np.load("embeddings.npy").astype("float32") index = faiss.read_index("faiss.index") emb = SentenceTransformer(EMB_MODEL) def recommend(user_text, k=3): if not user_text or len(user_text.strip()) < 5: return [] q = f"query: {user_text.strip()}" qv = emb.encode([q], normalize_embeddings=True) qv = np.asarray(qv, dtype="float32") scores, idxs = index.search(qv, k) out = [] for rank, (i, s) in enumerate(zip(idxs[0].tolist(), scores[0].tolist()), start=1): rec = records[i] out.append({"Rank": rank, "Score": round(float(s), 4), "Category": rec["category"], "Idea": rec["idea_text"]}) return out examples = [ "A fintech app that rounds up purchases and invests the spare change for students", "An AI tutor that explains biology clearly with quizzes", "A climate app that helps small shops measure and offset emissions", "A privacy-first family photo sharing app with automatic face clustering", ] with gr.Blocks(title="PitchCompass — Top‑3 Similar Startup Ideas") as demo: gr.Markdown("# PitchCompass\nTop‑3 similar ideas from a corpus of 1,200 startup pitches.") inp = gr.Textbox(label="Your startup idea (1-3 sentences)", lines=3, value=examples[0]) btn = gr.Button("Find similar ideas") out = gr.Dataframe(headers=["Rank","Score","Category","Idea"], label="Top 3", datatype=["number","number","str","str"]) gr.Examples(examples=examples, inputs=[inp], label="Try one‑click examples") btn.click(lambda t: recommend(t, 3), inputs=[inp], outputs=[out]) if __name__ == "__main__": demo.launch()