Spaces:
Sleeping
Sleeping
| import json | |
| import sqlite3 | |
| import operator | |
| from math import ceil | |
| from fastapi import FastAPI, Query, Body | |
| from contextlib import asynccontextmanager | |
| from datetime import datetime | |
| from collections import defaultdict | |
| from langchain_openai import ChatOpenAI | |
| from langchain_core.messages import HumanMessage, SystemMessage | |
| from typing import Annotated, List, Optional, Dict, Any | |
| from pydantic import BaseModel, Field | |
| from typing_extensions import TypedDict | |
| from langgraph.graph import StateGraph, START, END | |
| from langgraph.constants import Send | |
| from fastapi.responses import HTMLResponse | |
| # Session State for storing runtime data | |
| session_state = { | |
| "report_data": None, | |
| "final_report": None | |
| } | |
| # Initialize FastAPI app | |
| app = FastAPI( | |
| title="JEE Roadmap Planner API", | |
| description="API for managing and analyzing JEE Roadmaps", | |
| version="1.0.0" | |
| ) | |
| # Models for data input | |
| class RoadmapData(BaseModel): | |
| schedule: List[Dict[str, Any]] | |
| # Add any other fields from the roadmap schema | |
| # AGENT 1 | |
| class Section(BaseModel): | |
| name: str = Field( | |
| description="Name for this section of the report.", | |
| ) | |
| description: str = Field( | |
| description="Brief overview of the main topics and concepts to be covered in this section.", | |
| ) | |
| data_requirements: str = Field( | |
| description="Description of the data needed from the roadmap database to write this section.", | |
| ) | |
| class Sections(BaseModel): | |
| sections: List[Section] = Field( | |
| description="Sections of the report.", | |
| ) | |
| # Initialize LLM | |
| llm = ChatOpenAI(model="gpt-4o-mini") | |
| planner = llm.with_structured_output(Sections) | |
| class State(TypedDict): | |
| sections: list[Section] # List of report sections | |
| completed_sections: Annotated[list, operator.add] # All workers write to this key in parallel | |
| final_report: str # Final report | |
| # Combined helper-worker state | |
| class ProcessorState(TypedDict): | |
| section: Section | |
| completed_sections: Annotated[list, operator.add] | |
| def orchestrator(state: State): | |
| """Orchestrator that generates a plan for the report with data requirements""" | |
| schema = """CREATE TABLE IF NOT EXISTS roadmap ( | |
| id INTEGER PRIMARY KEY AUTOINCREMENT, | |
| day_num INTEGER, | |
| date TEXT, -- [yyyy-mm-dd] | |
| subject TEXT, -- (Physics, Chemistry or Maths) | |
| chapter_name TEXT, | |
| task_type TEXT, -- (Concept Understanding, Question Practice, Revision, Test) | |
| time TEXT, -- formatted like '0.5 hour', '1 hour', '2 Hours', and so on -- Tells the amount of time required to finish the task | |
| subtopic TEXT, | |
| task_completed BOOLEAN, -- 0/1 indicates task completion status | |
| completion_timestamp TEXT | |
| )""" | |
| # Generate queries | |
| report_sections = planner.invoke( | |
| [ | |
| SystemMessage(content=f"""You are responsible for creating a structured plan for a JEE preparation analysis report. | |
| Audience: The report is intended primarily for students, but must also be insightful to mentors and parents. | |
| Keep the language motivational and supportive, with actionable insights backed by data. | |
| Report Format: The report will be composed of exactly 4 concise sections. Your job is to define these sections. Each section must include: | |
| - **Name**: A short, descriptive title | |
| - **Description**: What the section analyzes and how it helps the student | |
| - **Data Requirements**: A plain-English description of what fields and metrics are needed from the roadmap | |
| database whose schema is given here: {schema} | |
| DO NOT invent new sections or formats. Use exactly the following four section templates and fill in the | |
| descriptions and data requirements precisely. | |
| --- | |
| ### Study Time Analysis | |
| **Description**: Analyze how much total time the student planned to spend vs how much they actually completed, | |
| across different subjects and task types. This will help the student understand where their time is really going. | |
| **Data Requirements**: | |
| - Fields: `subject`, `task_type`, `time`, `task_completed` | |
| - Metrics: | |
| - Total planned time β SUM of all `time` | |
| - Total actual time β SUM of `time` where `task_completed = 1` | |
| - Grouped by both `subject` and `task_type` | |
| --- | |
| ### Task Completion Metrics | |
| **Description**: Measure the student's consistency and follow-through by looking at completion rates across | |
| subjects and task types. | |
| **Data Requirements**: | |
| - Fields: `subject`, `task_type`, `task_completed` | |
| - Metrics: | |
| - Total tasks β COUNT of all tasks | |
| - Completed tasks β COUNT of tasks where `task_completed = 1` | |
| - Completion percentage per subject and task type | |
| --- | |
| ### Study Balance Analysis | |
| **Description**: Evaluate how the student's study time is distributed across task types (e.g., Practice, Revision, Test) | |
| within each subject. This highlights over- or under-emphasis on any category. | |
| **Data Requirements**: | |
| - Fields: `subject`, `task_type`, `time` | |
| - Metrics: | |
| - SUM of `time` for each (subject, task_type) pair where task_completed = 1 | |
| - Relative distribution of time per subject to detect imbalance | |
| --- | |
| ### Strengths and Areas for Improvement | |
| **Description**: | |
| This section analyzes how the student's effort is distributed β not by estimating how long they spent, | |
| but by combining how many tasks they completed and how much time those completed tasks represent. | |
| This helps identify: | |
| - Subjects and task types where the student is showing strong commitment | |
| - Areas that may be neglected or inconsistently approached | |
| **Data Requirements**: | |
| - Fields: subject, task_type, task_completed, time | |
| - Metrics (filtered where task_completed = 1): | |
| - Total Number of completed tasks | |
| - Total amount of time spent | |
| - Grouped by subject and task_type | |
| --- | |
| Important Constraints: | |
| - You must include **all the mentioned fields** in the `data_requirements` β no assumptions | |
| - Use only **aggregate metrics** β no need for per-task or per-day analysis | |
| - Keep descriptions student-focused, clear, and motivational | |
| - Do not alter section names or invent new ones | |
| - Do not output anything outside the strict format above | |
| Your output will be passed into a structured data pipeline. Return only the filled-out section definitions as described above. | |
| """), | |
| HumanMessage(content="""Use the given table structure of the roadmap and decide all the sections of | |
| the report along with what should be in it and the clearly mention all the data thats required for it | |
| from the roadmap table"""), | |
| ] | |
| ) | |
| return {"sections": report_sections.sections} | |
| def processor(state: ProcessorState): | |
| """Combined helper and worker - gets data and writes section in one step""" | |
| section = state['section'] | |
| # HELPER PART: Get data for this section | |
| sql_query = generate_sql_for_report(llm, section.data_requirements) | |
| rows = get_sql_data_for_report(sql_query) | |
| # WORKER PART: Write the section using the data | |
| section_result = llm.invoke( | |
| [ | |
| SystemMessage( | |
| content=f"""Create a concise, data-driven JEE preparation report section that provides actionable insights for students, | |
| parents, and mentors. | |
| Requirements: | |
| 1. Begin directly with key metrics and insights - no introductory preamble | |
| 2. Use specific numbers, percentages, and ratios to quantify performance | |
| 3. Include concise tables or bullet points for clarity where appropriate | |
| 4. Highlight patterns related to: | |
| - Task completion rates | |
| - Time allocation efficiency | |
| - Subject/topic focus distribution | |
| - Study consistency patterns | |
| 5. For each observation, provide a brief actionable recommendation focused on student improvement. | |
| 6. Use professional but motivational tone appropriate for academic context | |
| 7. Strictly use Markdown for formatting all the tables and the numbers | |
| 8. Strictly keep each section very focused and write it under 0 to 50 words | |
| 9. Verify the formatting of all the tables multiple times to ensure the markdown is correct. | |
| 10. Check all the numbers and calculations made by you multiple times to ensure accuracy | |
| Base all analysis strictly on the provided data - avoid assumptions beyond what's explicitly given to you. | |
| Don't assume anything else, even a little bit. | |
| *Important* | |
| If you receive an empty data input, understand that the student hasn't done tasks matching the given data description. Also, | |
| know that this report is for the student to improve themselves, and they have no part in making sure the data is logged for | |
| this analysis. Deeply analyze the SQL query ->{sql_query} and the data description ->{section.data_requirements} used to | |
| extract the data and figure out why there was no data available in the roadmap, which the student went through and write | |
| the section accordingly. | |
| """ | |
| ), | |
| HumanMessage( | |
| content=f"""Here is the section name: {section.name} and description: {section.description} | |
| Data for writing this section: {rows}""" | |
| ), | |
| ] | |
| ) | |
| # Return completed section | |
| return {"completed_sections": [section_result.content]} | |
| def synthesizer(state: State): | |
| """Synthesize full report from sections""" | |
| # List of completed sections | |
| completed_sections = state["completed_sections"] | |
| # Format completed section to str to use as context for final sections | |
| completed_report_sections = "\n\n---\n\n".join(completed_sections) | |
| return {"final_report": completed_report_sections} | |
| # Assign processors function | |
| def assign_processors(state: State): | |
| """Assign a processor to each section in the plan""" | |
| return [Send("processor", {"section": s}) for s in state["sections"]] | |
| def generate_sql_for_report(llm, prompt): | |
| table_struct = """ | |
| CREATE TABLE IF NOT EXISTS roadmap ( | |
| id INTEGER PRIMARY KEY AUTOINCREMENT, | |
| day_num INTEGER, | |
| date TEXT, | |
| subject TEXT, | |
| chapter_name TEXT, | |
| task_type TEXT, | |
| time TEXT, | |
| subtopic TEXT, | |
| task_completed BOOLEAN, | |
| completion_timestamp TEXT | |
| ) | |
| """ | |
| response = llm.invoke( | |
| [ | |
| SystemMessage(content=f"""You are a helper who runs in the background of an AI agent, | |
| which helps students for their JEE Preparation. Now your job is to analyze the user's prompt and | |
| create an SQL query to extract the related Information from an sqlite3 database with the table | |
| structure: {table_struct}. | |
| Note: For the time column, the data is formatted like '0.5 hour', '1 hour', '2 hours' and | |
| so on, it tells the amount of time required to complete that specific task. So make sure | |
| to create queries that compare just the numbers within the text. For the task_type column, | |
| the data is either of these (Concept Understanding, Question Practice, Revision or Test) | |
| You will also make sure multiple times that you give an SQL | |
| Query that adheres to the given table structure, and you output just the SQL query. | |
| Do not include anything else like new line statements, ```sql or any other text. Your output | |
| is going to be directly fed into a Python script to extract the required information. So, | |
| please follow all the given instructions. | |
| Verify multiple times that the SQL query is error free for the SQLite3 format."""), | |
| HumanMessage(content=f"""Keeping the table structure in mind: {table_struct}, | |
| Convert this prompt to an SQL query for the given table: {prompt}. Make sure your | |
| output is just the SQL query, which can directly be used to extract required content.""") | |
| ] | |
| ) | |
| return response.content.strip() | |
| def get_sql_data_for_report(sql_query): | |
| conn = sqlite3.connect("jee_full_roadmap.db") | |
| cursor = conn.cursor() | |
| results = [] | |
| queries = [q.strip() for q in sql_query.strip().split(';') if q.strip()] | |
| for query in queries: | |
| cursor.execute(query) | |
| columns = [desc[0] for desc in cursor.description] | |
| rows = cursor.fetchall() | |
| results.append({ | |
| "query": query, | |
| "columns": columns, | |
| "rows": rows | |
| }) | |
| conn.close() | |
| return results | |
| def create_db_for_report(roadmap_data): | |
| try: | |
| conn = sqlite3.connect("jee_full_roadmap.db") | |
| cursor = conn.cursor() | |
| cursor.execute("DROP TABLE IF EXISTS roadmap") | |
| cursor.execute(""" | |
| CREATE TABLE roadmap ( | |
| id INTEGER PRIMARY KEY AUTOINCREMENT, | |
| day_num INTEGER, | |
| date TEXT, | |
| subject TEXT, | |
| chapter_name TEXT, | |
| task_type TEXT, | |
| time TEXT, | |
| subtopic TEXT, | |
| task_completed BOOLEAN, | |
| completion_timestamp TEXT | |
| ) | |
| """) | |
| for day in roadmap_data["schedule"]: | |
| date = day["date"] | |
| day_num = day["dayNumber"] | |
| for subj in day["subjects"]: | |
| subject = subj["name"] | |
| for task in subj["tasks"]: | |
| cursor.execute(""" | |
| INSERT INTO roadmap (day_num, date, subject, chapter_name, task_type, time, subtopic, task_completed, completion_timestamp) | |
| VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) | |
| """, ( | |
| day_num, | |
| date, | |
| subject, | |
| task["ChapterName"], | |
| task["type"], | |
| task["time"], | |
| task["subtopic"], | |
| task["task_completed"], | |
| task["completion_timestamp"] | |
| )) | |
| conn.commit() | |
| conn.close() | |
| print("β Database created and data inserted successfully.") | |
| except Exception as e: | |
| print(f"β οΈ Error initializing database: {e}") | |
| def generate_report(roadmap_data): | |
| # Build workflow | |
| workflow_builder = StateGraph(State) | |
| # Add the nodes | |
| workflow_builder.add_node("orchestrator", orchestrator) | |
| workflow_builder.add_node("processor", processor) | |
| workflow_builder.add_node("synthesizer", synthesizer) | |
| # Add edges to connect nodes | |
| workflow_builder.add_edge(START, "orchestrator") | |
| workflow_builder.add_conditional_edges("orchestrator", assign_processors, ["processor"]) | |
| workflow_builder.add_edge("processor", "synthesizer") | |
| workflow_builder.add_edge("synthesizer", END) | |
| # Compile the workflow | |
| workflow = workflow_builder.compile() | |
| # Initialize database | |
| create_db_for_report(roadmap_data) | |
| # Invoke | |
| state = workflow.invoke({}) | |
| session_state['final_report'] = state["final_report"] | |
| return state["final_report"] | |
| # AGENT 3 | |
| def get_chapters_and_subtopics(roadmap_data): | |
| ch_subt = { | |
| "Physics": {}, | |
| "Chemistry": {}, | |
| "Maths": {} | |
| } | |
| for day in roadmap_data["schedule"]: | |
| for subject in day['subjects']: | |
| sub = ch_subt[subject['name']] | |
| for task in subject['tasks']: | |
| sub[task['ChapterName']] = [] | |
| for day in roadmap_data["schedule"]: | |
| for subject in day['subjects']: | |
| sub = ch_subt[subject['name']] | |
| for task in subject['tasks']: | |
| if task['subtopic'] not in sub[task['ChapterName']]: | |
| sub[task['ChapterName']].append(task['subtopic']) | |
| return ch_subt | |
| def create_roadmap_db(roadmap_data): | |
| try: | |
| conn = sqlite3.connect("jee_roadmap.db") | |
| cursor = conn.cursor() | |
| cursor.execute("DROP TABLE IF EXISTS roadmap") | |
| cursor.execute(""" | |
| CREATE TABLE IF NOT EXISTS roadmap ( | |
| id INTEGER PRIMARY KEY AUTOINCREMENT, | |
| day_num INTEGER, | |
| date TEXT, | |
| subject TEXT, | |
| chapter_name TEXT, | |
| task_type TEXT, | |
| time TEXT, | |
| subtopic TEXT | |
| ) | |
| """) | |
| for day in roadmap_data["schedule"]: | |
| date = day["date"] | |
| day_num = day["dayNumber"] | |
| for subj in day["subjects"]: | |
| subject = subj["name"] | |
| for task in subj["tasks"]: | |
| cursor.execute(""" | |
| INSERT INTO roadmap (day_num, date, subject, chapter_name, task_type, time, subtopic) | |
| VALUES (?, ?, ?, ?, ?, ?, ?) | |
| """, ( | |
| day_num, | |
| date, | |
| subject, | |
| task["ChapterName"], | |
| task["type"], | |
| task["time"], | |
| task["subtopic"] | |
| )) | |
| conn.commit() | |
| conn.close() | |
| print("β Database created and data inserted successfully.") | |
| return True | |
| except Exception as e: | |
| print(f"β οΈ Error initializing database: {e}") | |
| return False | |
| # Function to convert NL query to SQL | |
| def generate_sql_from_nl(prompt, ch_subt): | |
| table_struct = """CREATE TABLE IF NOT EXISTS roadmap ( | |
| id INTEGER PRIMARY KEY AUTOINCREMENT, | |
| day_num INTEGER, | |
| date TEXT, -- [yyyy-mm-dd] | |
| subject TEXT, -- [Physics, Chemistry or Maths] | |
| chapter_name TEXT, | |
| task_type TEXT, -- (Concept Understanding, Question Practice, Revision, Test) | |
| time TEXT, -- formatted like '0.5 hour', '1 hour', '2 Hours', and so on | |
| subtopic TEXT | |
| )""" | |
| response = llm.invoke( | |
| [ | |
| SystemMessage( | |
| content=f"""You are an helper who runs in the background of an AI agent, | |
| which helps students for their JEE Preparation. Now your Job is to analyze the users prompt and | |
| create an SQL query to extract the related Information from an sqlite3 database with the table | |
| structure: {table_struct}. | |
| Note: | |
| - For the time column, the data is formatted like '0.5 hour', '1 hour', '2 hours' and | |
| so on. So make sure to create queries that compare just the numbers within the text. | |
| - If the student mention about any chapters or subtopics, browse through this json file {ch_subt}, | |
| find the one with the closest match to the users query and use only those exact names of Chapers | |
| and Subtopics present in this file to create SQL the query. | |
| - For date related queries, refer today's date {datetime.now().date()} | |
| - If the user ask's you general questions, Return a Dummy query like {"SELECT * FROM your_table WHERE FALSE;"} | |
| You will also make sure multiple times that you give an SQL | |
| Query that adheres to the given table structure, and you Output just the SQL query. | |
| Do not include anyting else like new line statements, ```sql or any other text. Your output | |
| is going to be directly fed into a Python script to extract the required information. So, | |
| please follow all the given Instructions. | |
| """ | |
| ), | |
| HumanMessage( | |
| content=f"""Keeping the table structure in mind: {table_struct}, | |
| Convert this prompt to an SQL query for the given table: {prompt}. Make sure your | |
| output is just the SQL query, which can directly be used to extract required content""" | |
| ), | |
| ] | |
| ) | |
| return response.content.strip() | |
| # Function to fetch data from SQLite | |
| def fetch_data_from_sql(sql_query): | |
| conn = sqlite3.connect("jee_roadmap.db") | |
| cursor = conn.cursor() | |
| cursor.execute(sql_query) | |
| columns = [desc[0] for desc in cursor.description] | |
| rows = cursor.fetchall() | |
| data = { | |
| "query": sql_query, | |
| "columns": columns, | |
| "rows": rows | |
| } | |
| conn.close() | |
| return data | |
| # Function to convert SQL output to natural language | |
| def generate_nl_from_sql_output(prompt, data): | |
| response = llm.invoke( | |
| [ | |
| SystemMessage( | |
| content=f"""You are an helpful AI chatbot working under the roadmap | |
| section of an AI Agent, whose role is to aid students in their preparation for the JEE examination. | |
| You are going to play a very crucial role of a Roadmap Assistant, who helps the student out with whatever query | |
| they have related to their roadmap, the data required to answer the users query is already extracted | |
| from the Roadmap table of a SQLite3 database and given to you here {data}. Analyse the users query deeply and | |
| reply to it with the relevant information from the given data in a supportive manner. If you get empty data | |
| as an input, deeply analyze the user's prompt and the sql query and give a suitable reply. If you find the | |
| user's prompt to be conversational in nature, please respond accordingly.""" | |
| ), | |
| HumanMessage( | |
| content=f"""Answer to this users query using the data given to you, while keeping | |
| your role in mind: {prompt}""" | |
| ), | |
| ] | |
| ) | |
| return response.content.strip() | |
| # Main function for chatbot | |
| def answer_user_query(prompt, roadmap_data): | |
| ch_subt = get_chapters_and_subtopics(roadmap_data) | |
| query = generate_sql_from_nl(prompt, ch_subt) | |
| data = fetch_data_from_sql(query) | |
| return generate_nl_from_sql_output(prompt, data) | |
| def root(): | |
| return """ | |
| <html> | |
| <head><title>Sstudize Agents</title></head> | |
| <body style="font-family: Arial, sans-serif; text-align: center; margin-top: 50px;"> | |
| <h1>Welcome to Sstudize Agents!</h1> | |
| <p>Select an agent:</p> | |
| <ul style="list-style-type: none;"> | |
| <li><a href="/docs#/default/agent1_agent1_post">Agent 1: Task Analysis</a></li> | |
| <li><a href="/docs#/default/agent3_agent3_post">Agent 3: Chatbot Assistant</a></li> | |
| </ul> | |
| </body> | |
| </html> | |
| """ | |
| # --- AGENT 1: Task Analysis (Task Analysis Page) --- | |
| def agent1(roadmap_data: RoadmapData = Body(..., description="Complete roadmap data in JSON format")): | |
| """ | |
| Agent 1 - Task Analysis: Builds a performance report based on provided roadmap data. | |
| """ | |
| try: | |
| # Store the roadmap data | |
| session_state["report_data"] = roadmap_data.dict() | |
| # Generate performance report | |
| report = generate_report(session_state["report_data"]) | |
| return { | |
| "status": "success", | |
| "final_report": report | |
| } | |
| except Exception as e: | |
| return {"status": "error", "message": str(e)} | |
| # --- AGENT 3: Roadmap Chatbot (Roadmap Chatbot Page) --- | |
| def agent3( | |
| query: str = Body(..., description="User's message to the chatbot"), | |
| roadmap_data: RoadmapData = Body(..., description="Complete roadmap data in JSON format") | |
| ): | |
| """ | |
| Agent 3 - Roadmap Chatbot Assistant: Answers user questions about the roadmap in a chat-like style. | |
| """ | |
| try: | |
| # Create DB from provided roadmap data | |
| db_created = create_roadmap_db(roadmap_data.dict()) | |
| if not db_created: | |
| return {"status": "error", "message": "Failed to create database from roadmap data"} | |
| # Generate response to user query | |
| response = answer_user_query(query, roadmap_data.dict()) | |
| return { | |
| "status": "success", | |
| "chat_response": { | |
| "user": query, | |
| "assistant": response | |
| } | |
| } | |
| except Exception as e: | |
| return { | |
| "status": "error", | |
| "message": f"Error processing query: {str(e)}" | |
| } |