Spaces:
Sleeping
Sleeping
| import os | |
| import json | |
| import datetime | |
| import requests | |
| import logging | |
| # import gspread | |
| from dotenv import load_dotenv | |
| # Configure logging | |
| logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') | |
| logger = logging.getLogger(__name__) | |
| # from huggingface_hub import login as hf_login | |
| from langchain_community.vectorstores import FAISS | |
| from langchain.embeddings.base import Embeddings | |
| from sentence_transformers import SentenceTransformer | |
| from langchain_tavily import TavilySearch | |
| from google.adk.tools import FunctionTool | |
| # === LOAD ENV === | |
| load_dotenv() | |
| # HF_TOKEN = os.getenv("HUGGINGFACE_TOKEN") | |
| # GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY") | |
| TAVILY_API_KEY = os.getenv("TAVILY_API_KEY") | |
| # SERVICE_ACCOUNT_JSON = os.getenv("GOOGLE_SERVICE_ACCOUNT_JSON") | |
| # SHEET_KEY = os.getenv("SHEET_KEY") | |
| PREDICTOR_API_URL = os.getenv("PREDICTOR_API_URL") | |
| PREDICTOR_API_KEY = os.getenv("PREDICTOR_API_KEY") | |
| # hf_login(token=HF_TOKEN) | |
| # === GOOGLE SHEET LOGGING === | |
| # service_account_dict = json.loads(SERVICE_ACCOUNT_JSON) if isinstance(SERVICE_ACCOUNT_JSON, str) else SERVICE_ACCOUNT_JSON | |
| # def add_query_to_sheet(user_id: str, query: str, response: str): | |
| # gc = gspread.service_account_from_dict(service_account_dict) | |
| # sh = gc.open_by_key(SHEET_KEY) | |
| # ws = sh.worksheet("Sheet1") | |
| # timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") | |
| # ws.append_row([user_id, timestamp, query, response]) | |
| # === VECTOR STORE === | |
| def load_vector_store(data_dir: str): | |
| texts = [] | |
| for fname in os.listdir(data_dir): | |
| if fname.lower().endswith(".md"): | |
| path = os.path.join(data_dir, fname) | |
| try: | |
| with open(path, "r", encoding="utf-8") as f: | |
| texts.append(f.read()) | |
| except UnicodeDecodeError: | |
| with open(path, "r", encoding="latin-1") as f: | |
| texts.append(f.read()) | |
| st_model = SentenceTransformer("all-MiniLM-L6-v2") | |
| class LocalEmbeddings(Embeddings): | |
| def embed_documents(self, docs): | |
| return st_model.encode(docs).tolist() | |
| def embed_query(self, q): | |
| return st_model.encode([q])[0].tolist() | |
| return FAISS.from_texts(texts, LocalEmbeddings()) | |
| vector_store = load_vector_store("College_markdown") | |
| # === TOOL DEFINITIONS === | |
| def db_search(query: str) -> dict: | |
| docs = vector_store.similarity_search(query, k=6) | |
| if not docs: return {"results": []} | |
| return {"results": [d.page_content for d in docs]} | |
| def tavily_search(query: str) -> dict: | |
| if not TAVILY_API_KEY: | |
| return {"results": ["Tavily API key not configured"]} | |
| tool = TavilySearch(api_key=TAVILY_API_KEY, max_results=6, topic="general", include_raw_content=True) | |
| result = tool.invoke({"query": query}) | |
| snippets = [item.get('content') for item in result.get('results', [])] | |
| return {"results": snippets or []} | |
| def college_predictor( | |
| userCrl: int, | |
| userCategory: str, | |
| userGender: str, | |
| userHomeState: str, | |
| limit: int = 4, | |
| counsellingName: str = "csab", | |
| collegeName: str = "national institute of technology", | |
| branchName: str = "computer science and engineering" | |
| ) -> str: | |
| # Log the function call with all parameters | |
| logger.info("=" * 80) | |
| logger.info("PREDICTOR API CALL STARTED") | |
| logger.info("=" * 80) | |
| logger.info("Input Parameters:") | |
| logger.info(f" userCrl: {userCrl} (type: {type(userCrl)})") | |
| logger.info(f" userCategory: {userCategory} (type: {type(userCategory)})") | |
| logger.info(f" userGender: {userGender} (type: {type(userGender)})") | |
| logger.info(f" userHomeState: {userHomeState} (type: {type(userHomeState)})") | |
| logger.info(f" limit: {limit} (type: {type(limit)})") | |
| logger.info(f" counsellingName: {counsellingName} (type: {type(counsellingName)})") | |
| logger.info(f" collegeName: {collegeName} (type: {type(collegeName)})") | |
| logger.info(f" branchName: {branchName} (type: {type(branchName)})") | |
| headers = { | |
| "Content-Type": "application/json", | |
| "Authorization": f"Bearer {PREDICTOR_API_KEY}" | |
| } | |
| try: | |
| # Log parameter conversion | |
| logger.info("Converting parameters...") | |
| converted_crl = int(userCrl) | |
| logger.info(f" userCrl converted: {converted_crl} (type: {type(converted_crl)})") | |
| params = { | |
| "userCrl": converted_crl, | |
| "userCategory": userCategory, | |
| "userGender": userGender, | |
| "userHomeState": userHomeState, | |
| "limit": limit, | |
| "counsellingName": counsellingName, | |
| } | |
| if collegeName: | |
| params["collegeQuery"] = collegeName | |
| logger.info(f" Added collegeQuery: {collegeName}") | |
| if branchName: | |
| params["branchQuery"] = branchName | |
| logger.info(f" Added branchQuery: {branchName}") | |
| # Log the final payload | |
| logger.info("Final API Request:") | |
| logger.info(f" URL: {PREDICTOR_API_URL}") | |
| logger.info(f" Headers: {json.dumps(headers, indent=2)}") | |
| logger.info(f" Payload: {json.dumps(params, indent=2)}") | |
| logger.info("Making API request...") | |
| response = requests.post(PREDICTOR_API_URL, json=params, headers=headers, timeout=30) | |
| # Log response details | |
| logger.info("API Response:") | |
| logger.info(f" Status Code: {response.status_code}") | |
| logger.info(f" Response Headers: {dict(response.headers)}") | |
| logger.info(f" Response Content: {response.text}") | |
| response.raise_for_status() | |
| data = response.json() | |
| logger.info("Response parsed successfully") | |
| logger.info(f" Parsed data keys: {list(data.keys()) if data else 'None'}") | |
| if not data or 'data' not in data or 'colleges' not in data['data']: | |
| logger.warning("No college predictions found in response") | |
| return "No college predictions found with the given criteria." | |
| colleges = data['data']['colleges'] | |
| logger.info(f"Found {len(colleges)} colleges in response") | |
| if not colleges: | |
| logger.warning("Colleges list is empty") | |
| return "No college predictions found with the given criteria." | |
| results = [] | |
| for i, college in enumerate(colleges[:limit], start=1): | |
| logger.info(f"Processing college {i}: {college.get('Institute', 'N/A')}") | |
| parts = [f"{i}. College: {college.get('Institute', 'N/A')}"] | |
| if college.get('Academic_Program_Name'): | |
| parts.append(f"Branch: {college['Academic_Program_Name']}") | |
| if college.get('Seat_Type'): | |
| parts.append(f"Category: {college['Seat_Type']}") | |
| if college.get('Max_ClosingRank'): | |
| parts.append(f"Closing Rank: {college['Max_ClosingRank']}") | |
| results.append(", ".join(parts)) | |
| final_result = f"Based on your rank {userCrl}, here are college predictions:\n\n" + "\n".join(results) | |
| logger.info("=" * 80) | |
| logger.info("PREDICTOR API CALL COMPLETED SUCCESSFULLY") | |
| logger.info("=" * 80) | |
| return final_result | |
| except ValueError as e: | |
| error_msg = f"Parameter conversion error: {str(e)}" | |
| logger.error(f"ValueError: {error_msg}") | |
| logger.error("=" * 80) | |
| return error_msg | |
| except requests.exceptions.HTTPError as e: | |
| error_msg = f"HTTP Error: {str(e)}" | |
| logger.error(f"HTTPError: {error_msg}") | |
| logger.error(f"Response body: {response.text if 'response' in locals() else 'No response'}") | |
| logger.error("=" * 80) | |
| return f"Error fetching college predictions: {error_msg}" | |
| except requests.exceptions.RequestException as e: | |
| error_msg = f"Request Error: {str(e)}" | |
| logger.error(f"RequestException: {error_msg}") | |
| logger.error("=" * 80) | |
| return f"Error fetching college predictions: {error_msg}" | |
| except Exception as e: | |
| error_msg = f"Unexpected error: {str(e)}" | |
| logger.error(f"Exception: {error_msg}") | |
| logger.error("=" * 80) | |
| return f"Error fetching college predictions: {error_msg}" | |
| def mentor_search(college_query: str) -> str: | |
| """Search mentors by college name and return formatted links.""" | |
| url = f"https://api.precollege.in/api/v1/mentor/list?college={college_query}&limit=3" | |
| try: | |
| response = requests.get(url, timeout=10) | |
| response.raise_for_status() | |
| data = response.json() | |
| if not data or "data" not in data or not data["data"]: | |
| return f"No mentors found for '{college_query}'." | |
| mentors = data["data"] | |
| lines = [] | |
| for mentor in mentors: | |
| name = mentor.get("name", "Unknown") | |
| username = mentor.get("username", "") | |
| profile_url = f"https://www.precollege.in/mentor/{username}" if username else "No profile link" | |
| lines.append(f"{name}: {profile_url}") | |
| return f"Mentors for '{college_query}':\n\n" + "\n".join(lines) | |
| except requests.exceptions.RequestException as e: | |
| return f"Failed to fetch mentors: {str(e)}" | |
| # === FUNCTION TOOL WRAPPERS === | |
| db_tool = FunctionTool(db_search) | |
| tavily_tool = FunctionTool(tavily_search) | |
| predictor_tool = FunctionTool(college_predictor) | |
| mentor_tool = FunctionTool(mentor_search) | |