import logging import pandas as pd from src.similarity_model import find_similar_projects from src.similarity_model import load_metadata logging.basicConfig( level=logging.INFO, format="%(asctime)s | %(levelname)s | %(message)s" ) logger = logging.getLogger(__name__) TOP_K = 5 SELF_TEST_SAMPLES = 20 def run_self_test(): df = load_metadata() total = min(len(df), SELF_TEST_SAMPLES) success = 0 for i in range(total): row = df.loc[i] results = find_similar_projects( title=row.get("project_title", ""), abstract=row.get("abstract", ""), description=row.get("description", ""), features=row.get("features", []), top_k=1 ) if "project_id" in results.columns: pred = int(results.iloc[0]["project_id"]) if pred == i: success += 1 score = success / total print("\n==============================") print("SELF RETRIEVAL TEST") print("==============================") print(f"Projects Tested : {total}") print(f"Top1 Accuracy : {score:.2%}") print("==============================") return score def run_real_queries(): queries = [ { "title": "AI Clinic Management System", "description": """ Smart clinic with booking, chatbot, patient records, doctor dashboard. """ }, { "title": "Smart Library Assistant", "description": """ Library app with chatbot, recommendation system, qr code borrowing. """ }, { "title": "Attendance Face Recognition", "description": """ Attendance system using face recognition and reports. """ }, { "title": "E-commerce Recommendation Platform", "description": """ Online shopping website with recommendation engine, payments and dashboard. """ } ] print("\n==============================") print("REAL QUERY TEST") print("==============================") total_score = 0 count = 0 for q in queries: results = find_similar_projects( title=q["title"], description=q["description"], top_k=1 ) if "hybrid_score" in results.columns: score = float( results.iloc[0]["hybrid_score"] ) risk = str( results.iloc[0]["duplicate_risk"] ) top_title = str( results.iloc[0]["project_title"] ) total_score += score count += 1 print() print("Query:", q["title"]) print("Top Match:", top_title) print("Score:", round(score, 4)) print("Risk:", risk) avg = total_score / count if count else 0 print("\n==============================") print(f"Average Query Score: {avg:.4f}") print("==============================") return avg def final_status( self_score, query_score ): print("\n==============================") print("FINAL MODEL STATUS") print("==============================") final_score = ( 0.60 * self_score + 0.40 * query_score ) if final_score >= 0.90: print("EXCELLENT [OK]") elif final_score >= 0.75: print("VERY GOOD [OK]") elif final_score >= 0.60: print("GOOD [WARN]") else: print("NEEDS IMPROVEMENT [FAIL]") print("Overall Score:", round(final_score, 4)) print("==============================\n") if __name__ == "__main__": self_score = run_self_test() query_score = run_real_queries() final_status(self_score, query_score)