import os from pathlib import Path from typing import List from fastapi import FastAPI, HTTPException from app.config import settings from app.models import ( ChatRequest, ChatResponse, CourseInput, CourseProcessError, CourseSummary, SyllabusProcessResponse, ) from app.services.gemini_service import GeminiService from app.services.intent_service import classify_intent from app.services.pdf_service import chunk_text, fetch_pdf_text from app.services.student_service import fetch_student_info from app.vector_store import LocalVectorStore app = FastAPI(title="GitConnect Chatbot Service", version="0.1.0") _CONTEXT_LOG_PATH = Path(__file__).resolve().parents[1] / "context.txt" def _write_context_log(content: str) -> None: _CONTEXT_LOG_PATH.write_text(content, encoding="utf-8") @app.on_event("startup") def warmup_embedding_model() -> None: try: GeminiService.preload_embedding_model(settings.embedding_model_name) except Exception as exc: # Startup should continue even if warmup fails. print(f"Embedding warmup skipped due to error: {exc}") @app.get("/health") def health() -> dict: return {"status": "ok"} @app.post("/api/syllabus/process", response_model=SyllabusProcessResponse) def process_syllabus(courses: List[CourseInput]) -> SyllabusProcessResponse: if not courses: return SyllabusProcessResponse( results=[], failed=[], total_received=0, total_processed=0, total_failed=0, ) try: gemini = GeminiService( settings.gemini_api_key, settings.gemini_model, settings.embedding_model_name, ) except ValueError as exc: raise HTTPException(status_code=500, detail=str(exc)) from exc vector_store = LocalVectorStore( settings.vector_data_dir, rag_index_db_url=settings.rag_index_db_url, neon_max_retries=settings.neon_max_retries, neon_retry_backoff_sec=settings.neon_retry_backoff_sec, neon_connect_timeout_sec=settings.neon_connect_timeout_sec, ) results: List[CourseSummary] = [] failed: List[CourseProcessError] = [] for course in courses: try: syllabus_text = fetch_pdf_text( str(course.syllabus_url), timeout=settings.pdf_timeout_sec, max_retries=settings.pdf_max_retries, backoff_sec=settings.pdf_retry_backoff_sec, ) if not syllabus_text: raise RuntimeError("No text extracted from PDF.") chunks = chunk_text( syllabus_text, chunk_size=settings.rag_chunk_size, overlap=settings.rag_chunk_overlap, ) if not chunks: raise RuntimeError("Unable to create text chunks from syllabus content.") embeddings = [ gemini.embed_text(chunk, task_type="retrieval_document") for chunk in chunks ] vector_store.upsert_documents(course.semester, course.course_code, chunks, embeddings) ai_summary = gemini.summarize_multilingual(course.name, syllabus_text) results.append(CourseSummary(course_code=course.course_code, ai_summary=ai_summary)) except Exception as exc: failed.append(CourseProcessError(course_code=course.course_code, error=str(exc))) return SyllabusProcessResponse( results=results, failed=failed, total_received=len(courses), total_processed=len(results), total_failed=len(failed), ) @app.post("/api/chat", response_model=ChatResponse) def chat(req: ChatRequest) -> ChatResponse: try: gemini = GeminiService( settings.gemini_api_key, settings.gemini_model, settings.embedding_model_name, ) except ValueError as exc: raise HTTPException(status_code=500, detail=str(exc)) from exc history_text = "\n".join( [f"{msg.role}: {msg.content}" for msg in req.history] ) intent, in_scope = classify_intent(req.query) if not in_scope: reply = ( "I can help only with education-related queries such as syllabus, attendance, " "results, study planning, and course guidance." ) _write_context_log( "Intent: out_of_scope\n" f"Query: {req.query}\n" "LLM Called: no\n" f"Response: {reply}\n" ) return ChatResponse(reply_markdown=reply) prompt = "" chunks_passed = 0 try: if intent in {"attendance", "result"}: student_info = fetch_student_info( settings.student_performance_url_template, req.student_id, semester=req.semester, intent=intent, ) prompt = f""" You are a college assistant. Respond in language code: {req.lang_code}. Return markdown only. Intent: {intent} User query: {req.query} Recent chat history: {history_text} Student performance context (authoritative): {student_info} Rules: - Answer only from the provided student performance context. - If asked for something unavailable in the context, clearly say it is unavailable. - Be concise and practical. """ elif intent == "syllabus": vector_store = LocalVectorStore( settings.vector_data_dir, rag_index_db_url=settings.rag_index_db_url, neon_max_retries=settings.neon_max_retries, neon_retry_backoff_sec=settings.neon_retry_backoff_sec, neon_connect_timeout_sec=settings.neon_connect_timeout_sec, ) query_embedding = gemini.embed_text(req.query, task_type="retrieval_query") hits = vector_store.search( req.semester, query_embedding, top_k=settings.rag_syllabus_top_k, ) hits = hits[: settings.rag_syllabus_top_k] chunks_passed = len(hits) syllabus_context = "\n\n---\n\n".join( [f"[{h.get('course_code', '')}] {h.get('chunk', '')}" for h in hits] ) prompt = f""" You are a college assistant. Respond in language code: {req.lang_code}. Return markdown only. Intent: syllabus User query: {req.query} Recent chat history: {history_text} Syllabus context (authoritative): {syllabus_context} Rules: - Answer only from the provided syllabus context. - For unit/module queries, list units clearly with headings/bullets. - If exact detail is unavailable, state what is missing. """ else: prompt = f""" You are a helpful college assistant. Respond in language code: {req.lang_code}. Return markdown only. Intent: other (education-related) User query: {req.query} Recent chat history: {history_text} Rules: - Keep the response casual, helpful, and education-focused. - Do not answer non-education requests. - If needed, ask a brief clarifying question. """ reply = gemini.generate_markdown(prompt) except Exception as exc: raise HTTPException(status_code=500, detail=f"LLM response failed: {exc}") from exc _write_context_log( f"Intent: {intent}\n" f"Query: {req.query}\n" f"Student ID: {req.student_id}\n" f"Semester: {req.semester}\n" f"Language: {req.lang_code}\n" f"Student endpoint intent param: {intent if intent in {'attendance', 'result'} else 'not_called'}\n" f"Student endpoint semester param: {req.semester if intent in {'attendance', 'result'} else 'not_called'}\n" f"Syllabus top_k configured: {settings.rag_syllabus_top_k}\n" f"RAG chunk size configured: {settings.rag_chunk_size}\n" f"RAG chunk overlap configured: {settings.rag_chunk_overlap}\n" f"Syllabus chunks passed: {chunks_passed}\n" "LLM Called: yes\n" "\n--- Prompt Passed To LLM ---\n" f"{prompt}\n" ) return ChatResponse(reply_markdown=reply)