| import os |
| from pathlib import Path |
| from typing import List |
|
|
| from fastapi import FastAPI, HTTPException |
|
|
| from app.config import settings |
| from app.models import ( |
| ChatRequest, |
| ChatResponse, |
| CourseInput, |
| CourseProcessError, |
| CourseSummary, |
| SyllabusProcessResponse, |
| ) |
| from app.services.gemini_service import GeminiService |
| from app.services.intent_service import classify_intent |
| from app.services.pdf_service import chunk_text, fetch_pdf_text |
| from app.services.student_service import fetch_student_info |
| from app.vector_store import LocalVectorStore |
|
|
|
|
| app = FastAPI(title="GitConnect Chatbot Service", version="0.1.0") |
| _CONTEXT_LOG_PATH = Path(__file__).resolve().parents[1] / "context.txt" |
|
|
|
|
| def _write_context_log(content: str) -> None: |
| _CONTEXT_LOG_PATH.write_text(content, encoding="utf-8") |
|
|
|
|
| @app.on_event("startup") |
| def warmup_embedding_model() -> None: |
| try: |
| GeminiService.preload_embedding_model(settings.embedding_model_name) |
| except Exception as exc: |
| |
| print(f"Embedding warmup skipped due to error: {exc}") |
|
|
|
|
| @app.get("/health") |
| def health() -> dict: |
| return {"status": "ok"} |
|
|
|
|
| @app.post("/api/syllabus/process", response_model=SyllabusProcessResponse) |
| def process_syllabus(courses: List[CourseInput]) -> SyllabusProcessResponse: |
| if not courses: |
| return SyllabusProcessResponse( |
| results=[], |
| failed=[], |
| total_received=0, |
| total_processed=0, |
| total_failed=0, |
| ) |
|
|
| try: |
| gemini = GeminiService( |
| settings.gemini_api_key, |
| settings.gemini_model, |
| settings.embedding_model_name, |
| ) |
| except ValueError as exc: |
| raise HTTPException(status_code=500, detail=str(exc)) from exc |
|
|
| vector_store = LocalVectorStore( |
| settings.vector_data_dir, |
| rag_index_db_url=settings.rag_index_db_url, |
| neon_max_retries=settings.neon_max_retries, |
| neon_retry_backoff_sec=settings.neon_retry_backoff_sec, |
| neon_connect_timeout_sec=settings.neon_connect_timeout_sec, |
| ) |
|
|
| results: List[CourseSummary] = [] |
| failed: List[CourseProcessError] = [] |
|
|
| for course in courses: |
| try: |
| syllabus_text = fetch_pdf_text( |
| str(course.syllabus_url), |
| timeout=settings.pdf_timeout_sec, |
| max_retries=settings.pdf_max_retries, |
| backoff_sec=settings.pdf_retry_backoff_sec, |
| ) |
| if not syllabus_text: |
| raise RuntimeError("No text extracted from PDF.") |
|
|
| chunks = chunk_text( |
| syllabus_text, |
| chunk_size=settings.rag_chunk_size, |
| overlap=settings.rag_chunk_overlap, |
| ) |
| if not chunks: |
| raise RuntimeError("Unable to create text chunks from syllabus content.") |
|
|
| embeddings = [ |
| gemini.embed_text(chunk, task_type="retrieval_document") |
| for chunk in chunks |
| ] |
| vector_store.upsert_documents(course.semester, course.course_code, chunks, embeddings) |
|
|
| ai_summary = gemini.summarize_multilingual(course.name, syllabus_text) |
| results.append(CourseSummary(course_code=course.course_code, ai_summary=ai_summary)) |
| except Exception as exc: |
| failed.append(CourseProcessError(course_code=course.course_code, error=str(exc))) |
|
|
| return SyllabusProcessResponse( |
| results=results, |
| failed=failed, |
| total_received=len(courses), |
| total_processed=len(results), |
| total_failed=len(failed), |
| ) |
|
|
|
|
| @app.post("/api/chat", response_model=ChatResponse) |
| def chat(req: ChatRequest) -> ChatResponse: |
| try: |
| gemini = GeminiService( |
| settings.gemini_api_key, |
| settings.gemini_model, |
| settings.embedding_model_name, |
| ) |
| except ValueError as exc: |
| raise HTTPException(status_code=500, detail=str(exc)) from exc |
|
|
| history_text = "\n".join( |
| [f"{msg.role}: {msg.content}" for msg in req.history] |
| ) |
| intent, in_scope = classify_intent(req.query) |
|
|
| if not in_scope: |
| reply = ( |
| "I can help only with education-related queries such as syllabus, attendance, " |
| "results, study planning, and course guidance." |
| ) |
| _write_context_log( |
| "Intent: out_of_scope\n" |
| f"Query: {req.query}\n" |
| "LLM Called: no\n" |
| f"Response: {reply}\n" |
| ) |
| return ChatResponse(reply_markdown=reply) |
|
|
| prompt = "" |
| chunks_passed = 0 |
|
|
| try: |
| if intent in {"attendance", "result"}: |
| student_info = fetch_student_info( |
| settings.student_performance_url_template, |
| req.student_id, |
| semester=req.semester, |
| intent=intent, |
| ) |
| prompt = f""" |
| You are a college assistant. Respond in language code: {req.lang_code}. |
| Return markdown only. |
| |
| Intent: {intent} |
| User query: {req.query} |
| |
| Recent chat history: |
| {history_text} |
| |
| Student performance context (authoritative): |
| {student_info} |
| |
| Rules: |
| - Answer only from the provided student performance context. |
| - If asked for something unavailable in the context, clearly say it is unavailable. |
| - Be concise and practical. |
| """ |
| elif intent == "syllabus": |
| vector_store = LocalVectorStore( |
| settings.vector_data_dir, |
| rag_index_db_url=settings.rag_index_db_url, |
| neon_max_retries=settings.neon_max_retries, |
| neon_retry_backoff_sec=settings.neon_retry_backoff_sec, |
| neon_connect_timeout_sec=settings.neon_connect_timeout_sec, |
| ) |
| query_embedding = gemini.embed_text(req.query, task_type="retrieval_query") |
| hits = vector_store.search( |
| req.semester, |
| query_embedding, |
| top_k=settings.rag_syllabus_top_k, |
| ) |
| hits = hits[: settings.rag_syllabus_top_k] |
| chunks_passed = len(hits) |
| syllabus_context = "\n\n---\n\n".join( |
| [f"[{h.get('course_code', '')}] {h.get('chunk', '')}" for h in hits] |
| ) |
|
|
| prompt = f""" |
| You are a college assistant. Respond in language code: {req.lang_code}. |
| Return markdown only. |
| |
| Intent: syllabus |
| User query: {req.query} |
| |
| Recent chat history: |
| {history_text} |
| |
| Syllabus context (authoritative): |
| {syllabus_context} |
| |
| Rules: |
| - Answer only from the provided syllabus context. |
| - For unit/module queries, list units clearly with headings/bullets. |
| - If exact detail is unavailable, state what is missing. |
| """ |
| else: |
| prompt = f""" |
| You are a helpful college assistant. Respond in language code: {req.lang_code}. |
| Return markdown only. |
| |
| Intent: other (education-related) |
| User query: {req.query} |
| |
| Recent chat history: |
| {history_text} |
| |
| Rules: |
| - Keep the response casual, helpful, and education-focused. |
| - Do not answer non-education requests. |
| - If needed, ask a brief clarifying question. |
| """ |
|
|
| reply = gemini.generate_markdown(prompt) |
| except Exception as exc: |
| raise HTTPException(status_code=500, detail=f"LLM response failed: {exc}") from exc |
|
|
| _write_context_log( |
| f"Intent: {intent}\n" |
| f"Query: {req.query}\n" |
| f"Student ID: {req.student_id}\n" |
| f"Semester: {req.semester}\n" |
| f"Language: {req.lang_code}\n" |
| f"Student endpoint intent param: {intent if intent in {'attendance', 'result'} else 'not_called'}\n" |
| f"Student endpoint semester param: {req.semester if intent in {'attendance', 'result'} else 'not_called'}\n" |
| f"Syllabus top_k configured: {settings.rag_syllabus_top_k}\n" |
| f"RAG chunk size configured: {settings.rag_chunk_size}\n" |
| f"RAG chunk overlap configured: {settings.rag_chunk_overlap}\n" |
| f"Syllabus chunks passed: {chunks_passed}\n" |
| "LLM Called: yes\n" |
| "\n--- Prompt Passed To LLM ---\n" |
| f"{prompt}\n" |
| ) |
|
|
| return ChatResponse(reply_markdown=reply) |
|
|