Maga222006 commited on
Commit
bae14fb
·
verified ·
1 Parent(s): c0f7274

Upload 27 files

Browse files
Dockerfile ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ WORKDIR /app
4
+
5
+ RUN apt-get update && apt-get install -y \
6
+ build-essential \
7
+ && rm -rf /var/lib/apt/lists/*
8
+
9
+ COPY requirements.txt .
10
+ RUN pip install --no-cache-dir -r requirements.txt
11
+
12
+ COPY agents/ ./agents/
13
+ COPY app.py .
14
+
15
+ RUN mkdir -p documents
16
+
17
+ ENV PYTHONUNBUFFERED=1
18
+
19
+ EXPOSE 7860
20
+
21
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
agents/__init__.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ """AI Tutor Multi-Agent System - Agents Package"""
2
+
3
+ from agents.model import llm
4
+ from agents.states import QuizTask, Quiz
5
+ from agents.tools import Docs
6
+ from agents.summarizer import summarize_pdf
agents/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (480 Bytes). View file
 
agents/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (429 Bytes). View file
 
agents/__pycache__/examiner.cpython-311.pyc ADDED
Binary file (2.72 kB). View file
 
agents/__pycache__/model.cpython-311.pyc ADDED
Binary file (388 Bytes). View file
 
agents/__pycache__/model.cpython-312.pyc ADDED
Binary file (355 Bytes). View file
 
agents/__pycache__/prompts.cpython-311.pyc ADDED
Binary file (3.58 kB). View file
 
agents/__pycache__/sessions.cpython-311.pyc ADDED
Binary file (5.14 kB). View file
 
agents/__pycache__/states.cpython-311.pyc ADDED
Binary file (3.86 kB). View file
 
agents/__pycache__/states.cpython-312.pyc ADDED
Binary file (3.29 kB). View file
 
agents/__pycache__/summarizer.cpython-311.pyc ADDED
Binary file (4.17 kB). View file
 
agents/__pycache__/summarizer.cpython-312.pyc ADDED
Binary file (3.88 kB). View file
 
agents/__pycache__/supervisor.cpython-311.pyc ADDED
Binary file (7.19 kB). View file
 
agents/__pycache__/tools.cpython-311.pyc ADDED
Binary file (4.25 kB). View file
 
agents/__pycache__/tools.cpython-312.pyc ADDED
Binary file (3.82 kB). View file
 
agents/__pycache__/workflow.cpython-311.pyc ADDED
Binary file (7.84 kB). View file
 
agents/examiner.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_core.prompts import ChatPromptTemplate
2
+ from langgraph.prebuilt import create_react_agent
3
+ from agents.states import Quiz
4
+ from agents.prompts import EXAMINER_SYSTEM_PROMPT, EXAMINER_USER_PROMPT
5
+ from agents.tools import Docs
6
+ from agents.model import llm
7
+
8
+
9
+ def create_examiner_agent(docs: Docs):
10
+ """
11
+ Create an Examiner agent that generates quiz questions.
12
+
13
+ Args:
14
+ docs: Docs instance with loaded document
15
+
16
+ Returns:
17
+ A LangGraph ReAct agent configured for quiz generation
18
+ """
19
+ search_tool = docs.as_search_tool()
20
+
21
+ agent = create_react_agent(
22
+ model=llm,
23
+ tools=[search_tool],
24
+ )
25
+
26
+ return agent
27
+
28
+
29
+ def generate_quiz(docs: Docs, summary: str, num_questions: int = 5) -> Quiz:
30
+ """
31
+ Generate a quiz based on the document and summary.
32
+
33
+ Args:
34
+ docs: Docs instance with loaded document
35
+ summary: Summary of the document
36
+ num_questions: Number of questions to generate
37
+
38
+ Returns:
39
+ Quiz object with generated questions
40
+ """
41
+ llm_with_structure = llm.with_structured_output(Quiz)
42
+
43
+ search_tool = docs.as_search_tool()
44
+
45
+ context_docs = docs.similarity_search("main concepts and key topics", k=5)
46
+ context = "\n\n".join(doc.page_content for doc in context_docs)
47
+
48
+ prompt = ChatPromptTemplate.from_messages([
49
+ ("system", EXAMINER_SYSTEM_PROMPT),
50
+ ("human", EXAMINER_USER_PROMPT + "\n\nAdditional Context from Document:\n{context}")
51
+ ])
52
+
53
+ chain = prompt | llm_with_structure
54
+
55
+ quiz = chain.invoke({
56
+ "summary": summary,
57
+ "num_questions": num_questions,
58
+ "context": context
59
+ })
60
+
61
+ return quiz
agents/model.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ from langchain.chat_models import init_chat_model
2
+ from dotenv import load_dotenv
3
+
4
+ load_dotenv()
5
+
6
+ llm = init_chat_model("google_genai:gemini-2.0-flash-lite")
agents/prompts.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ CONTENT_SAFETY_GUARDRAIL = """IMPORTANT CONTENT SAFETY GUIDELINES:
2
+ - This system is designed for educational use by learners of all ages, including children.
3
+ - You MUST NOT generate, discuss, or reference any inappropriate content including:
4
+ * Sexual, pornographic, or adult content
5
+ * Graphic violence or gore
6
+ * Hate speech, discrimination, or harmful stereotypes
7
+ * Content promoting illegal activities or substance abuse
8
+ * Personal attacks or bullying
9
+ - If source material contains inappropriate content, skip it entirely and focus only on age-appropriate educational topics.
10
+ - Maintain a professional, supportive, and educational tone at all times.
11
+ - If asked to discuss inappropriate topics, politely redirect to the educational material.
12
+ """
13
+
14
+ SUMMARIZER_MAP_PROMPT = """Summarize the following text in 3-5 short bullet points.
15
+
16
+ """ + CONTENT_SAFETY_GUARDRAIL + """
17
+ {chunk}"""
18
+
19
+ SUMMARIZER_REDUCE_PROMPT = """You are combining partial summaries of a long document.
20
+ Write a concise final summary (max ~300 words) with clear sections if useful.
21
+
22
+ """ + CONTENT_SAFETY_GUARDRAIL + """
23
+ Partial summaries:
24
+ {partials}"""
25
+
26
+ EXAMINER_SYSTEM_PROMPT = """You are an expert educational quiz creator. Your task is to generate quiz questions based on the provided document summary and context.
27
+
28
+ """ + CONTENT_SAFETY_GUARDRAIL + """
29
+ Create a diverse set of quiz questions that test understanding of the key concepts. Include:
30
+ - Multiple choice questions (with 4 options each)
31
+ - Fill-in-the-gap questions (with options provided)
32
+ - Type-in questions (short answer)
33
+
34
+ Each question should:
35
+ 1. Be clear and unambiguous
36
+ 2. Test a specific concept from the document
37
+ 3. Have a definitive correct answer
38
+ 4. Be appropriately challenging for the material
39
+
40
+ Use the search tool to retrieve specific details from the document when needed.
41
+
42
+ Document Summary:
43
+ {summary}"""
44
+
45
+ EXAMINER_USER_PROMPT = """Generate a quiz with {num_questions} questions based on the document.
46
+ Make sure to include a mix of question types: multiple_choice, fill_gap, and type_in.
47
+
48
+ Use the search tool to find specific facts and details from the document to create accurate questions."""
49
+
50
+ SUPERVISOR_SYSTEM_PROMPT = """You are a Socratic tutor providing feedback on a student's quiz performance.
51
+
52
+ """ + CONTENT_SAFETY_GUARDRAIL + """
53
+ Your approach:
54
+ 1. Never give direct answers immediately
55
+ 2. Guide students through leading questions
56
+ 3. Help them discover concepts on their own
57
+ 4. Provide encouragement and constructive feedback
58
+ 5. Use the document search tool to reference specific material when helpful
59
+
60
+ Document Summary:
61
+ {summary}
62
+
63
+ Quiz Results:
64
+ {quiz_results}"""
65
+
66
+ SUPERVISOR_USER_PROMPT = """The student has completed the quiz. Review their answers and provide Socratic feedback.
67
+
68
+ For incorrect answers:
69
+ - Ask guiding questions to help them understand the concept
70
+ - Reference relevant parts of the document
71
+ - Encourage them to think through the problem
72
+
73
+ For correct answers:
74
+ - Briefly acknowledge the correct response
75
+ - Optionally ask a follow-up question to deepen understanding
76
+
77
+ Provide a summary of their performance and suggestions for improvement."""
78
+
79
+ SUPERVISOR_CHAT_PROMPT = """Continue the tutoring conversation. The student has asked:
80
+ {user_message}
81
+
82
+ """ + CONTENT_SAFETY_GUARDRAIL + """
83
+ Use the search tool if needed to find relevant information from the document.
84
+ Maintain your Socratic approach - guide rather than tell."""
agents/sessions.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import uuid
2
+ from typing import Dict, Optional, List
3
+ from dataclasses import dataclass, field
4
+ from agents.tools import Docs
5
+ from agents.states import Quiz
6
+
7
+
8
+ @dataclass
9
+ class Session:
10
+ """Represents a single user session with all associated data."""
11
+ session_id: str
12
+ file_path: str
13
+ docs: Optional[Docs] = None
14
+ summary: str = ""
15
+ quiz: Optional[Quiz] = None
16
+ user_answers: List[str] = field(default_factory=list)
17
+ messages: List[dict] = field(default_factory=list)
18
+
19
+
20
+ class SessionManager:
21
+ """Manages user sessions for the AI Tutor API."""
22
+
23
+ def __init__(self):
24
+ self._sessions: Dict[str, Session] = {}
25
+
26
+ def create_session(self, file_path: str) -> Session:
27
+ """
28
+ Create a new session with the given PDF file.
29
+
30
+ Args:
31
+ file_path: Path to the saved PDF file
32
+
33
+ Returns:
34
+ New Session object
35
+ """
36
+ session_id = str(uuid.uuid4())
37
+ docs = Docs(file_path)
38
+ session = Session(
39
+ session_id=session_id,
40
+ file_path=file_path,
41
+ docs=docs
42
+ )
43
+ self._sessions[session_id] = session
44
+ return session
45
+
46
+ def get_session(self, session_id: str) -> Optional[Session]:
47
+ """Get a session by ID."""
48
+ return self._sessions.get(session_id)
49
+
50
+ def delete_session(self, session_id: str) -> bool:
51
+ """
52
+ Delete a session and clean up resources.
53
+
54
+ Returns:
55
+ True if session was deleted, False if not found
56
+ """
57
+ if session_id in self._sessions:
58
+ del self._sessions[session_id]
59
+ return True
60
+ return False
61
+
62
+ def update_summary(self, session_id: str, summary: str) -> bool:
63
+ """Update the summary for a session."""
64
+ session = self.get_session(session_id)
65
+ if session:
66
+ session.summary = summary
67
+ return True
68
+ return False
69
+
70
+ def update_quiz(self, session_id: str, quiz: Quiz) -> bool:
71
+ """Update the quiz for a session."""
72
+ session = self.get_session(session_id)
73
+ if session:
74
+ session.quiz = quiz
75
+ return True
76
+ return False
77
+
78
+ def update_user_answers(self, session_id: str, answers: List[str]) -> bool:
79
+ """Update user answers for a session."""
80
+ session = self.get_session(session_id)
81
+ if session:
82
+ session.user_answers = answers
83
+ return True
84
+ return False
85
+
86
+ def add_message(self, session_id: str, role: str, content: str) -> bool:
87
+ """Add a message to the conversation history."""
88
+ session = self.get_session(session_id)
89
+ if session:
90
+ session.messages.append({"role": role, "content": content})
91
+ return True
92
+ return False
93
+
94
+ def get_messages(self, session_id: str) -> List[dict]:
95
+ """Get conversation history for a session."""
96
+ session = self.get_session(session_id)
97
+ if session:
98
+ return session.messages
99
+ return []
100
+
101
+
102
+ session_manager = SessionManager()
agents/states.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Literal, Optional, List
2
+ from pydantic import BaseModel, Field
3
+
4
+
5
+ class QuizTask(BaseModel):
6
+ """
7
+ Schema for a single quiz task that the agent should generate or return.
8
+ """
9
+
10
+ task_id: int = Field(
11
+ ...,
12
+ description=(
13
+ "Unique integer identifier of the task within a quiz. "
14
+ "Start from 1 and increment by 1 for each new task."
15
+ ),
16
+ )
17
+
18
+ task: str = Field(
19
+ ...,
20
+ description=(
21
+ "The question text shown to the student. "
22
+ "Must be a complete, clear instruction or question in plain language."
23
+ ),
24
+ )
25
+
26
+ task_type: Literal["fill_gap", "multiple_choice", "type_in"] = Field(
27
+ ...,
28
+ description=(
29
+ "The interaction type for this task:\n"
30
+ "- 'fill_gap': a sentence or formula with a missing part the student must fill in.\n"
31
+ "- 'multiple_choice': student chooses exactly one option from 'answer_options'.\n"
32
+ "- 'type_in': open-ended question where student types a short free-text answer."
33
+ ),
34
+ )
35
+
36
+ answer_options: Optional[List[str]] = Field(
37
+ None,
38
+ description=(
39
+ "List of answer options, in the exact order they should be shown to the student. "
40
+ "Required when task_type = 'multiple_choice', 'fill_gap'. "
41
+ "Must be None for 'type_in'."
42
+ ),
43
+ )
44
+
45
+ correct_answer: Optional[str] = Field(
46
+ None,
47
+ description=(
48
+ "The correct answer in normalized string form.\n"
49
+ "- For 'fill_gap' and 'multiple_choice', this MUST exactly match one element of 'answer_options'.\n"
50
+ "- For 'type_in', this is the canonical correct answer "
51
+ "(e.g. '42', 'O(n log n)'); you may later implement fuzzy matching if needed."
52
+ ),
53
+ )
54
+
55
+
56
+ class Quiz(BaseModel):
57
+ """Collection of quiz tasks generated by the Examiner agent."""
58
+ tasks: List[QuizTask] = Field(
59
+ ...,
60
+ description=(
61
+ "A collection of quiz tasks that the agent should generate for the given document.\n"
62
+ )
63
+ )
64
+
65
+
66
+ class WorkflowState(BaseModel):
67
+ """State for the LangGraph workflow."""
68
+ pdf_path: str = Field(default="", description="Path to the uploaded PDF")
69
+ summary: str = Field(default="", description="Summary of the document")
70
+ quiz: Optional[Quiz] = Field(default=None, description="Generated quiz")
71
+ user_answers: Optional[List[str]] = Field(default=None, description="User's quiz answers")
72
+ feedback: str = Field(default="", description="Supervisor feedback on quiz results")
73
+ messages: List[str] = Field(default_factory=list, description="Conversation history")
agents/summarizer.py ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_community.document_loaders import PyPDFLoader
2
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
3
+ from langchain_core.prompts import ChatPromptTemplate
4
+ from langchain_core.documents import Document
5
+ from agents.model import llm
6
+ from typing import List
7
+
8
+
9
+ MAX_CONTEXT_CHARS = 100000
10
+
11
+
12
+ def summarize_pdf(pdf_path: str) -> str:
13
+ """
14
+ Token-efficient PDF summarizer.
15
+
16
+ Strategy:
17
+ 1. If document is small enough, summarize in ONE call (stuff method)
18
+ 2. If larger, use iterative refinement with large chunks (fewer API calls)
19
+
20
+ Args:
21
+ pdf_path: Path to the PDF file
22
+
23
+ Returns:
24
+ Final summary string
25
+ """
26
+ loader = PyPDFLoader(pdf_path)
27
+ docs = loader.load()
28
+
29
+ full_text = "\n\n".join(doc.page_content for doc in docs)
30
+
31
+ if len(full_text) <= MAX_CONTEXT_CHARS:
32
+ return _stuff_summarize(full_text)
33
+ else:
34
+ return _refine_summarize(full_text)
35
+
36
+
37
+ def _stuff_summarize(text: str) -> str:
38
+ """Summarize entire document in one API call."""
39
+ prompt = ChatPromptTemplate.from_template(
40
+ "You are an expert summarizer. Read the following document and provide "
41
+ "a comprehensive summary covering all key topics, concepts, and important details.\n\n"
42
+ "Format your summary with:\n"
43
+ "- A brief overview (2-3 sentences)\n"
44
+ "- Main topics/sections with key points\n"
45
+ "- Important definitions or concepts\n\n"
46
+ "Document:\n{text}"
47
+ )
48
+ chain = prompt | llm
49
+ response = chain.invoke({"text": text})
50
+ return response.content
51
+
52
+
53
+ def _refine_summarize(text: str, chunk_size: int = 50000) -> str:
54
+ """
55
+ Iterative refinement for large documents.
56
+
57
+ Uses fewer, larger chunks and refines the summary incrementally.
58
+ This uses far fewer API calls than map-reduce.
59
+ """
60
+ splitter = RecursiveCharacterTextSplitter(
61
+ chunk_size=chunk_size,
62
+ chunk_overlap=500,
63
+ )
64
+ chunks = splitter.split_text(text)
65
+
66
+ first_prompt = ChatPromptTemplate.from_template(
67
+ "You are an expert summarizer. Summarize the following content, "
68
+ "capturing all key topics, concepts, and important details:\n\n{text}"
69
+ )
70
+ first_chain = first_prompt | llm
71
+ summary = first_chain.invoke({"text": chunks[0]}).content
72
+
73
+ if len(chunks) == 1:
74
+ return summary
75
+
76
+ refine_prompt = ChatPromptTemplate.from_template(
77
+ "You have an existing summary of a document:\n\n"
78
+ "EXISTING SUMMARY:\n{summary}\n\n"
79
+ "Now incorporate the following additional content into the summary. "
80
+ "Expand and refine the summary to include new information while keeping it coherent:\n\n"
81
+ "NEW CONTENT:\n{new_content}\n\n"
82
+ "Provide the updated comprehensive summary:"
83
+ )
84
+ refine_chain = refine_prompt | llm
85
+
86
+ for chunk in chunks[1:]:
87
+ response = refine_chain.invoke({
88
+ "summary": summary,
89
+ "new_content": chunk
90
+ })
91
+ summary = response.content
92
+
93
+ return summary
agents/supervisor.py ADDED
@@ -0,0 +1,159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_core.prompts import ChatPromptTemplate
2
+ from langgraph.prebuilt import create_react_agent
3
+ from agents.states import Quiz
4
+ from agents.prompts import SUPERVISOR_SYSTEM_PROMPT, SUPERVISOR_USER_PROMPT, SUPERVISOR_CHAT_PROMPT
5
+ from agents.tools import Docs
6
+ from agents.model import llm
7
+ from typing import List, Optional, Tuple
8
+
9
+
10
+ def escape_template_braces(text: str) -> str:
11
+ """
12
+ Escape curly braces in text to prevent ChatPromptTemplate from
13
+ interpreting mathematical notation like {X ∈ A|Y = y} as template variables.
14
+
15
+ Args:
16
+ text: Input text that may contain curly braces
17
+
18
+ Returns:
19
+ Text with curly braces escaped ({{ and }})
20
+ """
21
+ if text is None:
22
+ return ""
23
+ return text.replace("{", "{{").replace("}", "}}")
24
+
25
+
26
+ def create_supervisor_agent(docs: Docs):
27
+ """
28
+ Create a Supervisor agent for Socratic tutoring.
29
+
30
+ Args:
31
+ docs: Docs instance with loaded document
32
+
33
+ Returns:
34
+ A LangGraph ReAct agent configured for tutoring
35
+ """
36
+ search_tool = docs.as_search_tool()
37
+
38
+ agent = create_react_agent(
39
+ model=llm,
40
+ tools=[search_tool],
41
+ )
42
+
43
+ return agent
44
+
45
+
46
+ def format_quiz_results(quiz: Quiz, user_answers: List[str]) -> str:
47
+ """Format quiz results for the supervisor to review."""
48
+ results = []
49
+ correct_count = 0
50
+
51
+ for i, (task, user_answer) in enumerate(zip(quiz.tasks, user_answers)):
52
+ correct = task.correct_answer or ""
53
+ is_correct = user_answer.strip().lower() == correct.strip().lower()
54
+ if is_correct:
55
+ correct_count += 1
56
+
57
+ result = f"""
58
+ Question {task.task_id}: {task.task}
59
+ Type: {task.task_type}
60
+ """
61
+ if task.answer_options:
62
+ result += f"Options: {', '.join(task.answer_options)}\n"
63
+ result += f"""Student Answer: {user_answer}
64
+ Correct Answer: {task.correct_answer}
65
+ Result: {'CORRECT' if is_correct else 'INCORRECT'}
66
+ """
67
+ results.append(result)
68
+
69
+ header = f"Score: {correct_count}/{len(quiz.tasks)} ({100*correct_count/len(quiz.tasks):.0f}%)\n"
70
+ return header + "\n".join(results)
71
+
72
+
73
+ def provide_feedback(
74
+ docs: Docs,
75
+ summary: str,
76
+ quiz: Quiz,
77
+ user_answers: List[str]
78
+ ) -> str:
79
+ """
80
+ Provide Socratic feedback on quiz performance.
81
+
82
+ Args:
83
+ docs: Docs instance with loaded document
84
+ summary: Summary of the document
85
+ quiz: The quiz that was taken
86
+ user_answers: User's answers to the quiz
87
+
88
+ Returns:
89
+ Feedback string from the supervisor
90
+ """
91
+ search_tool = docs.as_search_tool()
92
+
93
+ quiz_results = format_quiz_results(quiz, user_answers)
94
+
95
+ context_docs = docs.similarity_search("main concepts explanation", k=3)
96
+ context = "\n\n".join(doc.page_content for doc in context_docs)
97
+
98
+ escaped_summary = escape_template_braces(summary)
99
+ escaped_quiz_results = escape_template_braces(quiz_results)
100
+ escaped_context = escape_template_braces(context)
101
+
102
+ prompt = ChatPromptTemplate.from_messages([
103
+ ("system", SUPERVISOR_SYSTEM_PROMPT),
104
+ ("human", SUPERVISOR_USER_PROMPT + "\n\nRelevant Document Context:\n{context}")
105
+ ])
106
+
107
+ chain = prompt | llm
108
+
109
+ response = chain.invoke({
110
+ "summary": escaped_summary,
111
+ "quiz_results": escaped_quiz_results,
112
+ "context": escaped_context
113
+ })
114
+
115
+ return response.content
116
+
117
+
118
+ def chat_with_supervisor(
119
+ docs: Docs,
120
+ summary: str,
121
+ user_message: str,
122
+ conversation_history: Optional[List[dict]] = None
123
+ ) -> str:
124
+ """
125
+ Continue tutoring conversation with the supervisor.
126
+
127
+ Args:
128
+ docs: Docs instance with loaded document
129
+ summary: Summary of the document
130
+ user_message: User's message/question
131
+ conversation_history: Previous conversation messages
132
+
133
+ Returns:
134
+ Supervisor's response
135
+ """
136
+ context_docs = docs.similarity_search(user_message, k=3)
137
+ context = "\n\n".join(doc.page_content for doc in context_docs)
138
+
139
+ escaped_summary = escape_template_braces(summary)
140
+ escaped_context = escape_template_braces(context)
141
+ escaped_user_message = escape_template_braces(user_message)
142
+
143
+ messages: List[Tuple[str, str]] = [
144
+ ("system", f"You are a Socratic tutor. Document Summary: {escaped_summary}\n\nRelevant Context:\n{escaped_context}")
145
+ ]
146
+
147
+ if conversation_history:
148
+ for msg in conversation_history:
149
+ escaped_content = escape_template_braces(msg["content"])
150
+ messages.append((msg["role"], escaped_content))
151
+
152
+ messages.append(("human", escaped_user_message))
153
+
154
+ prompt = ChatPromptTemplate.from_messages(messages)
155
+ chain = prompt | llm
156
+
157
+ response = chain.invoke({})
158
+
159
+ return response.content
agents/tools.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
2
+ from langchain_community.document_loaders import PyPDFLoader
3
+ from langchain_core.vectorstores import InMemoryVectorStore
4
+ from langchain_huggingface import HuggingFaceEmbeddings
5
+ from langchain_core.tools import tool
6
+ from dotenv import load_dotenv
7
+
8
+ load_dotenv()
9
+
10
+
11
+ class Docs:
12
+ """Document manager with vector store for RAG-based retrieval."""
13
+
14
+ def __init__(self, file_path: str):
15
+ self.file_path = file_path
16
+ self.embeddings = HuggingFaceEmbeddings(
17
+ model_name="sentence-transformers/all-mpnet-base-v2"
18
+ )
19
+ self.vector_store = self._upload_file(file_path)
20
+
21
+ def _upload_file(self, file_path: str) -> InMemoryVectorStore:
22
+ """Load PDF, chunk it, and create vector store."""
23
+ loader = PyPDFLoader(file_path)
24
+ docs = loader.load()
25
+
26
+ text_splitter = RecursiveCharacterTextSplitter(
27
+ chunk_size=1000,
28
+ chunk_overlap=200,
29
+ add_start_index=True,
30
+ )
31
+ all_splits = text_splitter.split_documents(docs)
32
+
33
+ vector_store = InMemoryVectorStore(self.embeddings)
34
+ vector_store.add_documents(documents=all_splits)
35
+
36
+ return vector_store
37
+
38
+ def as_search_tool(self):
39
+ """Return a LangChain tool for searching the document."""
40
+ vector_store = self.vector_store
41
+
42
+ @tool
43
+ def search_in_docs(query: str) -> str:
44
+ """Retrieve information from the uploaded document to answer a query."""
45
+ retrieved_docs = vector_store.similarity_search(query, k=2)
46
+ serialized = "\n\n".join(
47
+ f"Source: {doc.metadata}\nContent: {doc.page_content}"
48
+ for doc in retrieved_docs
49
+ )
50
+ return serialized
51
+
52
+ return search_in_docs
53
+
54
+ def get_diverse_chunks_mmr(self, query: str, k: int = 30):
55
+ """Get diverse chunks using MMR (Maximal Marginal Relevance)."""
56
+ retriever = self.vector_store.as_retriever(
57
+ search_type="mmr",
58
+ search_kwargs={
59
+ "k": k,
60
+ "lambda_mult": 0.5,
61
+ "fetch_k": max(k * 3, 50),
62
+ },
63
+ )
64
+ return retriever.invoke(query)
65
+
66
+ def similarity_search(self, query: str, k: int = 4):
67
+ """Simple similarity search."""
68
+ return self.vector_store.similarity_search(query, k=k)
app.py ADDED
@@ -0,0 +1,200 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import shutil
3
+ import uuid
4
+ from dotenv import load_dotenv
5
+
6
+ load_dotenv()
7
+
8
+ from fastapi import FastAPI, UploadFile, File, HTTPException
9
+ from fastapi.middleware.cors import CORSMiddleware
10
+ from pydantic import BaseModel
11
+ from typing import List, Optional
12
+
13
+ from agents.sessions import session_manager
14
+ from agents.summarizer import summarize_pdf
15
+ from agents.examiner import generate_quiz
16
+ from agents.supervisor import provide_feedback, chat_with_supervisor
17
+
18
+ app = FastAPI(
19
+ title="AI Tutor API",
20
+ description="Multi-agent tutoring system with summarization, quiz generation, and Socratic feedback"
21
+ )
22
+
23
+ app.add_middleware(
24
+ CORSMiddleware,
25
+ allow_origins=["*"],
26
+ allow_credentials=True,
27
+ allow_methods=["*"],
28
+ allow_headers=["*"],
29
+ )
30
+
31
+ UPLOAD_DIR = "documents"
32
+ os.makedirs(UPLOAD_DIR, exist_ok=True)
33
+
34
+
35
+ class ExaminerRequest(BaseModel):
36
+ session_id: str
37
+ num_questions: int = 5
38
+
39
+
40
+ class SupervisorRequest(BaseModel):
41
+ session_id: str
42
+ message: str
43
+ user_answers: Optional[List[str]] = None
44
+
45
+
46
+ class SummaryResponse(BaseModel):
47
+ session_id: str
48
+ summary: str
49
+
50
+
51
+ class QuizResponse(BaseModel):
52
+ quiz: List[dict]
53
+
54
+
55
+ class SupervisorResponse(BaseModel):
56
+ response: str
57
+ messages: List[dict]
58
+
59
+
60
+ class SessionResponse(BaseModel):
61
+ session_id: str
62
+ has_summary: bool
63
+ has_quiz: bool
64
+ message_count: int
65
+
66
+
67
+ @app.post("/summarizer", response_model=SummaryResponse)
68
+ async def summarize_document(file: UploadFile = File(...)):
69
+ """
70
+ Upload a PDF and get a summary.
71
+ Creates a new session and returns session_id with the summary.
72
+ """
73
+ if not file.filename or not file.filename.lower().endswith(".pdf"):
74
+ raise HTTPException(status_code=400, detail="Only PDF files are supported")
75
+
76
+ safe_filename = f"{uuid.uuid4()}.pdf"
77
+ file_path = os.path.join(UPLOAD_DIR, safe_filename)
78
+ with open(file_path, "wb") as buffer:
79
+ shutil.copyfileobj(file.file, buffer)
80
+
81
+ try:
82
+ session = session_manager.create_session(file_path)
83
+ summary = summarize_pdf(file_path)
84
+ session_manager.update_summary(session.session_id, summary)
85
+
86
+ return SummaryResponse(
87
+ session_id=session.session_id,
88
+ summary=summary
89
+ )
90
+ except Exception as e:
91
+ raise HTTPException(status_code=500, detail=str(e))
92
+
93
+
94
+ @app.post("/examiner", response_model=QuizResponse)
95
+ async def generate_quiz_endpoint(request: ExaminerRequest):
96
+ """
97
+ Generate a quiz based on a previously summarized document.
98
+ Requires a valid session_id from /summarizer.
99
+ """
100
+ session = session_manager.get_session(request.session_id)
101
+ if not session:
102
+ raise HTTPException(status_code=404, detail="Session not found")
103
+
104
+ if not session.summary:
105
+ raise HTTPException(status_code=400, detail="No summary found. Call /summarizer first.")
106
+
107
+ if not session.docs:
108
+ raise HTTPException(status_code=400, detail="Document not loaded")
109
+
110
+ try:
111
+ quiz = generate_quiz(
112
+ docs=session.docs,
113
+ summary=session.summary,
114
+ num_questions=request.num_questions
115
+ )
116
+ session_manager.update_quiz(request.session_id, quiz)
117
+
118
+ quiz_data = [task.model_dump() for task in quiz.tasks]
119
+ return QuizResponse(quiz=quiz_data)
120
+ except Exception as e:
121
+ raise HTTPException(status_code=500, detail=str(e))
122
+
123
+
124
+ @app.post("/supervisor", response_model=SupervisorResponse)
125
+ async def supervisor_chat(request: SupervisorRequest):
126
+ """
127
+ Chat with the Socratic tutor supervisor.
128
+
129
+ First call should include user_answers to get initial feedback.
130
+ Subsequent calls can just include message for follow-up questions.
131
+ """
132
+ session = session_manager.get_session(request.session_id)
133
+ if not session:
134
+ raise HTTPException(status_code=404, detail="Session not found")
135
+
136
+ if not session.docs:
137
+ raise HTTPException(status_code=400, detail="Document not loaded")
138
+
139
+ if not session.summary:
140
+ raise HTTPException(status_code=400, detail="No summary found")
141
+
142
+ try:
143
+ if request.user_answers:
144
+ if not session.quiz:
145
+ raise HTTPException(status_code=400, detail="No quiz found. Call /examiner first.")
146
+
147
+ session_manager.update_user_answers(request.session_id, request.user_answers)
148
+
149
+ response = provide_feedback(
150
+ docs=session.docs,
151
+ summary=session.summary,
152
+ quiz=session.quiz,
153
+ user_answers=request.user_answers
154
+ )
155
+ else:
156
+ response = chat_with_supervisor(
157
+ docs=session.docs,
158
+ summary=session.summary,
159
+ user_message=request.message,
160
+ conversation_history=session.messages
161
+ )
162
+
163
+ session_manager.add_message(request.session_id, "user", request.message)
164
+ session_manager.add_message(request.session_id, "assistant", response)
165
+
166
+ return SupervisorResponse(
167
+ response=response,
168
+ messages=session_manager.get_messages(request.session_id)
169
+ )
170
+ except Exception as e:
171
+ raise HTTPException(status_code=500, detail=str(e))
172
+
173
+
174
+ @app.get("/session/{session_id}", response_model=SessionResponse)
175
+ async def get_session_info(session_id: str):
176
+ """Get information about a session."""
177
+ session = session_manager.get_session(session_id)
178
+ if not session:
179
+ raise HTTPException(status_code=404, detail="Session not found")
180
+
181
+ return SessionResponse(
182
+ session_id=session.session_id,
183
+ has_summary=bool(session.summary),
184
+ has_quiz=session.quiz is not None,
185
+ message_count=len(session.messages)
186
+ )
187
+
188
+
189
+ @app.delete("/session/{session_id}")
190
+ async def delete_session(session_id: str):
191
+ """Delete a session and clean up resources."""
192
+ if session_manager.delete_session(session_id):
193
+ return {"message": "Session deleted successfully"}
194
+ raise HTTPException(status_code=404, detail="Session not found")
195
+
196
+
197
+ @app.get("/health")
198
+ async def health_check():
199
+ """Health check endpoint."""
200
+ return {"status": "healthy"}
requirements.txt ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ langchain>=0.3.0
2
+ langchain-core>=0.3.0
3
+ langchain-community>=0.3.0
4
+ langchain-text-splitters>=0.3.0
5
+ langgraph>=0.2.0
6
+ langchain-google-genai>=2.0.0
7
+ langchain-huggingface>=0.1.0
8
+ sentence-transformers>=2.2.0
9
+ pypdf>=3.0.0
10
+ pydantic>=2.0.0
11
+ python-dotenv>=1.0.0
12
+ fastapi>=0.100.0
13
+ uvicorn[standard]>=0.20.0
14
+ python-multipart>=0.0.6