from fastapi import FastAPI, UploadFile, File import pandas as pd import faiss import pickle import os import json from sentence_transformers import SentenceTransformer from exa_py import Exa from groq import Groq app = FastAPI() # ============================= # 🔑 KEYS # ============================= GROQ_API_KEY = os.getenv("GROQ_API_KEY") EXA_API_KEY = os.getenv("EXA_API_KEY") client = Groq(api_key=GROQ_API_KEY) exa = Exa(api_key=EXA_API_KEY) # ============================= # 🧠 LOAD MODELS # ============================= embed_model = SentenceTransformer('all-mpnet-base-v2') index = faiss.read_index("faiss_index.index") with open("startup_texts.pkl", "rb") as f: startup_texts = pickle.load(f) # ============================= # 🔍 RETRIEVAL # ============================= def retrieve_similar(problem, k=3): vec = embed_model.encode([problem], convert_to_numpy=True) distances, indices = index.search(vec, k) return [ {"text": startup_texts[idx], "score": float(distances[0][i])} for i, idx in enumerate(indices[0]) ] # ============================= # 🌐 WEB SEARCH # ============================= def search_web(query): try: response = exa.search(query, num_results=5) return [r.text or r.summary or "" for r in response.results] except: return [] # ============================= # 🤖 QWEN (Groq) # ============================= def ask_qwen(prompt): completion = client.chat.completions.create( model="qwen/qwen3-32b", messages=[ {"role": "system", "content": "You are a strict fact-checking analyst. Return ONLY valid JSON."}, {"role": "user", "content": prompt} ], temperature=0.3, max_tokens=512 ) return completion.choices[0].message.content # ============================= # 🧠 PIPELINE (FIXED) # ============================= def analyze_problem(problem): retrieved = retrieve_similar(problem) if retrieved and retrieved[0]["score"] < 2.0: context = "\n\n".join([r["text"] for r in retrieved]) else: web = search_web(problem) context = "\n\n".join(web[:3]) prompt = f""" Problem: {problem} Evidence: {context} Return ONLY valid JSON in this format: {{ "status": "SOLVED or UNSOLVED", "reason": "one short sentence", "gaps": ["gap1", "gap2"], "new_problem": "rewrite of the problem" }} """ response = ask_qwen(prompt) try: parsed = json.loads(response) except: parsed = { "status": "ERROR", "reason": "invalid JSON from model", "gaps": [], "new_problem": "" } parsed["problem"] = problem return parsed # ============================= # 🚀 API # ============================= @app.post("/analyze") async def analyze(file: UploadFile = File(...)): df = pd.read_csv(file.file) results = [] for p in df.iloc[:, 0].tolist(): try: results.append(analyze_problem(p)) except Exception as e: results.append({ "problem": p, "status": "ERROR", "reason": str(e), "gaps": [], "new_problem": "" }) return { "success": True, "results": results }