| import numpy as np
|
| import pandas as pd
|
| from sentence_transformers import SentenceTransformer
|
| from pathlib import Path
|
| from contextlib import asynccontextmanager
|
| from fastapi import FastAPI, Query
|
| from fastapi.middleware.cors import CORSMiddleware
|
| import uvicorn
|
| from typing import List, Dict, Any
|
|
|
|
|
|
|
|
|
| BASE_DIR = Path(__file__).resolve().parent
|
| EMBEDDINGS_PATH = BASE_DIR / "embeddings" / "course_embeddings.npy"
|
| METADATA_PATH = BASE_DIR / "embeddings" / "course_metadata.csv"
|
|
|
|
|
| model = None
|
| course_embeddings = None
|
| df = None
|
|
|
| def load_resources():
|
| global model, course_embeddings, df
|
| print("Loading model and data...")
|
|
|
| model = SentenceTransformer("all-MiniLM-L6-v2")
|
|
|
|
|
| if not EMBEDDINGS_PATH.exists() or not METADATA_PATH.exists():
|
| raise FileNotFoundError(f"Data files not found. Please ensure {EMBEDDINGS_PATH} and {METADATA_PATH} exist.")
|
|
|
| course_embeddings = np.load(EMBEDDINGS_PATH)
|
| df = pd.read_csv(METADATA_PATH)
|
| print("Resources loaded successfully.")
|
|
|
| @asynccontextmanager
|
| async def lifespan(app: FastAPI):
|
| load_resources()
|
| yield
|
|
|
|
|
|
|
|
|
| app = FastAPI(title="Course Recommendation API", lifespan=lifespan)
|
|
|
|
|
| app.add_middleware(
|
| CORSMiddleware,
|
| allow_origins=["*"],
|
| allow_credentials=True,
|
| allow_methods=["*"],
|
| allow_headers=["*"],
|
| )
|
|
|
|
|
|
|
|
|
|
|
| @app.get("/")
|
| async def root():
|
| return {"message": "Course Recommendation API is running!", "docs_url": "/docs"}
|
|
|
| @app.get("/search")
|
| async def search(q: str = Query(..., min_length=1)):
|
| """
|
| Semantic search for courses based on query 'q'.
|
| Replicates the logic from the Streamlit app.
|
| """
|
| global model, course_embeddings, df
|
|
|
| if model is None or course_embeddings is None or df is None:
|
| return {"error": "Server is not ready. Resources not loaded."}
|
|
|
|
|
| query_embedding = model.encode(q, normalize_embeddings=True)
|
|
|
|
|
|
|
|
|
| similarities = np.dot(course_embeddings, query_embedding)
|
|
|
|
|
|
|
| res_df = df.copy()
|
| res_df["similarity"] = similarities
|
|
|
|
|
|
|
| if "free" in q.lower().split():
|
| print("Smart Filter: 'free' detected. Filtering for unpaid courses.")
|
|
|
|
|
| res_df = res_df[res_df["is_paid"] == False]
|
|
|
|
|
| res_df["rating_norm"] = res_df["rating"] / 5.0
|
| res_df["reviews_norm"] = np.log1p(res_df["num_reviews"])
|
|
|
|
|
|
|
| res_df = res_df.sort_values(by="similarity", ascending=False).head(50)
|
|
|
|
|
|
|
| res_df["final_score"] = (
|
| 0.90 * res_df["similarity"] +
|
| 0.05 * res_df["rating_norm"] +
|
| 0.05 * res_df["reviews_norm"]
|
| )
|
|
|
|
|
| top_results = res_df.sort_values(by="final_score", ascending=False).head(6)
|
|
|
|
|
| print(f"\n--- Search Logic Debug ---")
|
| print(f"Query: '{q}'")
|
| print("Top results raw similarity vs final score:")
|
| print(top_results[["title", "similarity", "final_score"]].to_string(index=False))
|
| print("--------------------------\n")
|
|
|
|
|
| response = []
|
| for _, row in top_results.iterrows():
|
| response.append({
|
| "title": row["title"],
|
| "rating": float(row["rating"]),
|
| "num_reviews": int(row["num_reviews"]),
|
| "is_paid": bool(row["is_paid"]),
|
| "url": row["url"]
|
| })
|
|
|
| return response
|
|
|
| if __name__ == "__main__":
|
| uvicorn.run("server:app", host="127.0.0.1", port=8000, reload=True)
|
|
|