Spaces:

ash001
/

LangGraph_Multi-Agent_Orchestrator

Sleeping

App Files Files Community

Sarat Kannan commited on Feb 14

Commit

b69a231

unverified ·

1 Parent(s): ec44df3

Add files via upload

Browse files

Files changed (11) hide show

app.py +534 -0
orchestrator/__init__.py +0 -0
orchestrator/factories.py +14 -0
orchestrator/graph_agent.py +100 -0
orchestrator/graphs.py +231 -0
orchestrator/settings.py +24 -0
orchestrator/sql_agent.py +234 -0
orchestrator/tools.py +88 -0
requirements.txt +21 -0
school.db +3 -0
sqlite.py +296 -0

app.py ADDED Viewed

	@@ -0,0 +1,534 @@

+from __future__ import annotations
+import os
+import streamlit as st
+from dotenv import load_dotenv
+from langchain_core.messages import HumanMessage, AIMessage, SystemMessage, BaseMessage
+from orchestrator.settings import Settings
+from orchestrator.factories import get_llm
+from orchestrator.sql_agent import sql_answer
+from orchestrator.graph_agent import graph_answer
+from orchestrator.tools import run_tools_once
+from orchestrator.graphs import build_router_graph, build_tools_agent_graph
+load_dotenv()
+st.set_page_config(page_title="Multi-Agent Orchestration (LangGraph)", page_icon="🧭", layout="wide")
+def _dict_messages_to_lc(messages: list[dict]) -> list[BaseMessage]:
+    out: list[BaseMessage] = []
+    for m in messages:
+        role = m.get("role")
+        content = m.get("content", "")
+        if role == "user":
+            out.append(HumanMessage(content=content))
+        else:
+            out.append(AIMessage(content=content))
+    return out
+def _extract_tool_names_from_messages(messages: list[BaseMessage]) -> list[str]:
+    names: list[str] = []
+    for m in messages:
+        if isinstance(m, AIMessage):
+            tool_calls = getattr(m, "tool_calls", None) or []
+            for tc in tool_calls:
+                if isinstance(tc, dict):
+                    n = tc.get("name")
+                else:
+                    n = getattr(tc, "name", None)
+                if n:
+                    names.append(str(n))
+    deduped: list[str] = []
+    for n in names:
+        if n not in deduped:
+            deduped.append(n)
+    return deduped
+def _rewrite_followup_to_standalone(settings: Settings, chat_messages: list[dict], question: str) -> str:
+    """
+    Used in the *direct* SQL/Graph pages to make follow-ups work better.
+    Router graph already does this internally.
+    """
+    user_count = sum(1 for m in chat_messages if m.get("role") == "user")
+    if user_count <= 1:
+        return question
+    llm = get_llm(settings, temperature=0)
+    # Build a short transcript
+    recent = chat_messages[-12:]
+    lines = []
+    for m in recent:
+        if m.get("role") == "user":
+            lines.append(f"User: {m.get('content','')}")
+        else:
+            lines.append(f"Assistant: {m.get('content','')}")
+    transcript = "\n".join(lines)
+    prompt = (
+        "Rewrite the user's latest question into a standalone question.\n"
+        "Do NOT answer the question.\n\n"
+        f"Conversation:\n{transcript}\n\n"
+        f"Latest user question:\n{question}\n\n"
+        "Standalone question:"
+    )
+    msg = llm.invoke(
+        [
+            SystemMessage(content="You rewrite follow-up questions into standalone questions."),
+            HumanMessage(content=prompt),
+        ]
+    )
+    rewritten = (msg.content or "").strip()
+    return rewritten or question
+# --- Sidebar ---
+st.sidebar.title("🧭 Multi-Agent Orchestration")
+page = st.sidebar.radio(
+    "Navigation",
+    ["Router Chat", "SQL Agent", "Graph Agent", "Tools Agent", "Settings"],
+    index=0,
+)
+# Runtime settings overrides (UI -> env-like)
+st.sidebar.subheader("Model")
+# llm_model = st.sidebar.text_input("LLM_MODEL (Groq)", value=os.getenv("LLM_MODEL", "llama-3.1-8b-instant"))
+MODEL_OPTIONS = [
+    "llama-3.1-8b-instant",
+    "meta-llama/llama-4-maverick-17b-128e-instruct",
+    "meta-llama/llama-4-scout-17b-16e-instruct",
+    "moonshotai/kimi-k2-instruct-0905",
+    "openai/gpt-oss-120b",
+    "qwen/qwen3-32b",
+]
+default_model = os.getenv("LLM_MODEL", "meta-llama/llama-4-maverick-17b-128e-instruct")
+if default_model not in MODEL_OPTIONS:
+    MODEL_OPTIONS.insert(0, default_model)
+llm_model = st.sidebar.selectbox("LLM_MODEL", MODEL_OPTIONS, index=MODEL_OPTIONS.index(default_model))
+st.sidebar.subheader("SQL (SQLite)")
+sqlite_path = st.sidebar.text_input("SQLITE_PATH", value=os.getenv("SQLITE_PATH", "student.db"))
+st.sidebar.subheader("Neo4j (Graph DB)")
+neo4j_uri = st.sidebar.text_input("NEO4J_URI", value=os.getenv("NEO4J_URI", ""))
+neo4j_username = st.sidebar.text_input("NEO4J_USERNAME", value=os.getenv("NEO4J_USERNAME", ""))
+neo4j_password = st.sidebar.text_input("NEO4J_PASSWORD", value=os.getenv("NEO4J_PASSWORD", ""), type="password")
+st.sidebar.subheader("UI")
+show_routing = st.sidebar.checkbox("Show routed agent", value=True)
+show_tools_used = st.sidebar.checkbox("Show tools used", value=True)
+settings = Settings(
+    groq_api_key=os.getenv("GROQ_API_KEY", ""),
+    llm_model=llm_model,
+    sqlite_path=sqlite_path,
+    neo4j_uri=neo4j_uri,
+    neo4j_username=neo4j_username,
+    neo4j_password=neo4j_password,
+    wiki_doc_content_chars_max=int(os.getenv("WIKI_DOC_CHARS", "2000")),
+    debug=os.getenv("DEBUG", "0") in ("1", "true", "True"),
+)
+@st.cache_resource
+def _router_graph_cached(model: str):
+    s = Settings(
+        groq_api_key=settings.groq_api_key,
+        llm_model=model,
+        sqlite_path=settings.sqlite_path,
+        neo4j_uri=settings.neo4j_uri,
+        neo4j_username=settings.neo4j_username,
+        neo4j_password=settings.neo4j_password,
+        wiki_doc_content_chars_max=settings.wiki_doc_content_chars_max,
+        debug=settings.debug,
+    )
+    return build_router_graph(s)
+@st.cache_resource
+def _tools_graph_cached(model: str):
+    s = Settings(
+        groq_api_key=settings.groq_api_key,
+        llm_model=model,
+        sqlite_path=settings.sqlite_path,
+        neo4j_uri=settings.neo4j_uri,
+        neo4j_username=settings.neo4j_username,
+        neo4j_password=settings.neo4j_password,
+        wiki_doc_content_chars_max=settings.wiki_doc_content_chars_max,
+        debug=settings.debug,
+    )
+    return build_tools_agent_graph(s)
+# --- Pages ---
+if page == "Router Chat":
+    st.title("🧭 Router Chat (LangGraph)")
+    st.write("Multi-turn chat. The router chooses SQL / Graph / Tools / General automatically.")
+    if "router_messages" not in st.session_state:
+        st.session_state.router_messages = [
+            {"role": "assistant", "content": "Hi! Ask a question — I will route it to the right agent."}
+        ]
+    c1, c2 = st.columns([1, 4])
+    with c1:
+        if st.button("Reset chat", key="reset_router"):
+            st.session_state.router_messages = [
+                {"role": "assistant", "content": "Chat reset. Ask a question!"}
+            ]
+            st.rerun()
+    for m in st.session_state.router_messages:
+        with st.chat_message(m["role"]):
+            meta = m.get("meta") or {}
+            if m["role"] == "assistant" and show_routing and meta.get("route"):
+                st.caption(f"🧭 Routed to: `{meta['route']} agent`")
+            if m["role"] == "assistant" and show_tools_used and meta.get("tools_used"):
+                tools_line = ", ".join([f"`{t}`" for t in meta["tools_used"]])
+                st.caption(f"🧰 Tools used: {tools_line}")
+            st.write(m["content"])
+    prompt = st.chat_input("Ask a question...", key="router_chat_input")
+    if prompt:
+        st.session_state.router_messages.append({"role": "user", "content": prompt})
+        with st.chat_message("user"):
+            st.write(prompt)
+        try:
+            with st.chat_message("assistant"):
+                route_slot = st.empty()
+                tools_slot = st.empty()
+                answer_slot = st.empty()
+                with st.spinner("Thinking..."):
+                    graph = _router_graph_cached(settings.llm_model)
+                    msgs = _dict_messages_to_lc(st.session_state.router_messages)
+                    out = graph.invoke({"messages": msgs})
+                    out_msgs = out.get("messages", []) or []
+                    last_ai = next((mm for mm in reversed(out_msgs) if isinstance(mm, AIMessage)), None)
+                    answer = last_ai.content if last_ai else "(no answer)"
+                    dbg = out.get("debug", {}) or {}
+                    route = out.get("route") or dbg.get("router_label") or dbg.get("routed_to") or "general"
+                    tools_used = dbg.get("tools_used") or []
+                # Update same bubble (no jump)
+                if show_routing:
+                    route_slot.caption(f"🧭 Routed to: `{route}` agent")
+                if show_tools_used and tools_used:
+                    tools_slot.caption("🧰 Tools used: " + ", ".join([f"`{t}`" for t in tools_used]))
+                answer_slot.write(answer)
+            # Append to chat history AFTER we have final answer
+            st.session_state.router_messages.append(
+                {"role": "assistant", "content": answer, "meta": {"route": route, "tools_used": tools_used}}
+            )
+            with st.expander("Debug (route + steps)"):
+                st.write(out.get("debug", {}))
+                st.write("Messages produced:", len(out_msgs))
+        except Exception as e:
+            st.error(str(e))
+elif page == "SQL Agent":
+    st.title("🧮 SQL Agent (Chat)")
+    st.write("Multi-turn SQL chat. Good for follow-ups like “now filter by …”")
+    # --- Intro: what the DB contains ---
+    with st.expander("📌 What's in the SQL database?", expanded=False):
+        st.markdown(
+            """
+            The database contains information about **students, courses, enrollments, and attendance**.
+            - **students**: student_id, name, program, section, year
+            - **courses**: course_id, course_code, course_name, department, credits
+            - **enrollments**: student-course enrollment per semester with score and grade
+            - **attendance**: per-class attendance for each student in each course and semester (present = 1/0)
+            - **view**: student_performance (avg_score, num_A grades, num_courses per student per semester)
+            Use this chat for analytics questions like rankings, averages, cohorts, and time/semester filtering.
+            """
+        )
+    # --- Session init ---
+    if "sql_messages" not in st.session_state:
+        st.session_state.sql_messages = [
+            {"role": "assistant", "content": "Ask a question about the student analytics database, or try an example below."}
+        ]
+    # --- Reset ---
+    c1, _ = st.columns([1, 5])
+    with c1:
+        if st.button("Reset chat", key="reset_sql"):
+            st.session_state.sql_messages = [{"role": "assistant", "content": "Chat reset. Ask a SQL question!"}]
+            st.rerun()
+    # --- Example queries (auto-run) ---
+    st.subheader("⚡ Try an example")
+    e1, e2, e3 = st.columns(3)
+    if e1.button("🏆 Top students (2025-Fall)", use_container_width=True):
+        st.session_state.sql_demo_query = (
+            "Show the top 10 students by average score in semester 2025-Fall. "
+            "Use the student_performance view. Return name, program, avg_score, num_courses, and num_A."
+        )
+    if e2.button("📉 Lowest scoring course (2025-Fall)", use_container_width=True):
+        st.session_state.sql_demo_query = (
+            "In 2025-Fall, which course has the lowest average score? "
+            "Return course_code, course_name, department, and avg_score."
+        )
+    if e3.button("🧾 Attendance < 70% (2025-Fall)", use_container_width=True):
+        st.session_state.sql_demo_query = (
+            "For semester 2025-Fall, show students whose overall attendance is below 70%. "
+            "Compute attendance_percent as 100 * AVG(present). "
+            "Return student name, program, attendance_percent, and total_classes."
+        )
+    demo_query = st.session_state.pop("sql_demo_query", None)
+    # --- Render chat history ---
+    for m in st.session_state.sql_messages:
+        st.chat_message(m["role"]).write(m["content"])
+    # --- Input (manual OR demo) ---
+    prompt = st.chat_input("Ask a SQL question...", key="sql_chat_input")
+    user_query = prompt or demo_query
+    if user_query:
+        st.session_state.sql_messages.append({"role": "user", "content": user_query})
+        st.chat_message("user").write(user_query)
+        try:
+            # Create assistant bubble immediately (prevents flicker)
+            with st.chat_message("assistant"):
+                answer_slot = st.empty()
+                with st.spinner("Thinking..."):
+                    standalone = _rewrite_followup_to_standalone(
+                        settings,
+                        st.session_state.sql_messages,
+                        user_query,
+                    )
+                    out = sql_answer(settings, standalone)
+                    answer = str(out.get("answer", ""))
+                answer_slot.write(answer)
+            # Append to history AFTER we have the final answer
+            st.session_state.sql_messages.append({"role": "assistant", "content": answer})
+            with st.expander("Debug"):
+                st.write("Standalone question:", standalone)
+                st.json(out)
+        except Exception as e:
+            st.error(str(e))
+elif page == "Graph Agent":
+    st.title("🕸️ Graph Agent (Chat)")
+    st.write("Multi-turn Cypher/Q&A chat over Neo4j.")
+    # --- Explain what graph contains ---
+    with st.expander("📌 What's in the Neo4j database?", expanded=False):
+        st.markdown(
+            """
+            **Theme:** Hollywood movies.
+            **Nodes**
+            - `Movie`: title, tagline, released (year)
+            - `Person`: name, born (year)
+            **Relationships**
+            - `(:Person)-[:ACTED_IN]->(:Movie)`
+            - `(:Person)-[:DIRECTED]->(:Movie)`
+            - `(:Person)-[:PRODUCED]->(:Movie)`
+            **Examples you can ask about**
+            - Movies: “The Matrix”, “Top Gun”, “Jerry Maguire”
+            - People: “Tom Cruise”, “Keanu Reeves”, “Tom Hanks”
+            """
+        )
+    with st.expander("🧠 Why Neo4j (graph DB) vs Web Search?", expanded=False):
+        st.markdown(
+            """
+            **Neo4j is best for relationship-heavy questions** where you want exact, structured answers:
+            - “Who co-starred with Tom Cruise the most?”
+            - “Find actors who worked with both Tom Cruise and Tom Hanks.”
+            - “Show movies connected to *The Matrix* via shared actors.”
+            **Web search is best for open-world facts** (news, definitions, anything outside your dataset).
+            So: Web search = broad; Neo4j = deep structured relationships inside your graph.
+            """
+        )
+    # --- Session init ---
+    if "graph_messages" not in st.session_state:
+        st.session_state.graph_messages = [
+            {"role": "assistant", "content": "Ask a question about the Neo4j movies graph, or try an example below."}
+        ]
+    # --- Reset button ---
+    c1, _ = st.columns([1, 5])
+    with c1:
+        if st.button("Reset chat", key="reset_graph"):
+            st.session_state.graph_messages = [
+                {"role": "assistant", "content": "Chat reset. Ask a graph question!"}
+            ]
+            st.rerun()
+    # --- Example queries (auto-run) ---
+    st.subheader("⚡ Try an example")
+    e1, e2, e3 = st.columns(3)
+    if e1.button("🎭 Similar to The Matrix (shared actors)", use_container_width=True):
+        st.session_state.graph_demo_query = (
+            "Find movies that share at least 2 actors with The Matrix. "
+            "Return the movie titles and how many actors are shared."
+        )
+    if e2.button("🧭 Shortest path: Tom Hanks ↔ Tom Cruise", use_container_width=True):
+        st.session_state.graph_demo_query = (
+            "Show the shortest connection between Tom Hanks and Tom Cruise."
+        )
+    if e3.button("🎬 Recommend like Cast Away", use_container_width=True):
+        st.session_state.graph_demo_query = (
+            "Recommend movies like Cast Away based on shared actor and director, and also name them."
+        )
+    demo_query = st.session_state.pop("graph_demo_query", None)
+    # --- Render chat history ---
+    for m in st.session_state.graph_messages:
+        st.chat_message(m["role"]).write(m["content"])
+    # --- Input (manual OR demo) ---
+    prompt = st.chat_input("Ask a graph question...", key="graph_chat_input")
+    user_query = prompt or demo_query
+    if user_query:
+        st.session_state.graph_messages.append({"role": "user", "content": user_query})
+        st.chat_message("user").write(user_query)
+        try:
+            # Create assistant bubble immediately (prevents flicker)
+            with st.chat_message("assistant"):
+                answer_slot = st.empty()
+                with st.spinner("Thinking..."):
+                    standalone = _rewrite_followup_to_standalone(
+                        settings,
+                        st.session_state.graph_messages,
+                        user_query,
+                    )
+                    out = graph_answer(settings, standalone)
+                    answer = str(out.get("answer", ""))
+                answer_slot.write(answer)
+            # Append to history AFTER we have the final answer
+            st.session_state.graph_messages.append({"role": "assistant", "content": answer})
+            with st.expander("Debug (Cypher + results)"):
+                st.write("Standalone question:", standalone)
+                st.json(out.get("debug", {}))
+        except Exception as e:
+            st.error(str(e))
+elif page == "Tools Agent":
+    st.title("🧰 Tools Agent (Chat)")
+    st.write("Tool-Assisted Research Chat (Web + Wikipedia + arXiv + Calculator).")
+    if "tools_messages" not in st.session_state:
+        st.session_state.tools_messages = [{"role": "assistant", "content": "Ask a question — I'll search web/Wikipedia/arXiv and use tools when needed."}]
+    c1, _ = st.columns([1, 5])
+    with c1:
+        if st.button("Reset chat", key="reset_tools"):
+            st.session_state.tools_messages = [{"role": "assistant", "content": "Chat reset. Ask a tools question!"}]
+            st.rerun()
+    for m in st.session_state.tools_messages:
+        st.chat_message(m["role"]).write(m["content"])
+    prompt = st.chat_input("Ask a tools question...", key="tools_chat_input")
+    if prompt:
+        st.session_state.tools_messages.append({"role": "user", "content": prompt})
+        st.chat_message("user").write(prompt)
+        try:
+            with st.chat_message("assistant"):
+                tools_slot = st.empty()
+                answer_slot = st.empty()
+                with st.spinner("Thinking..."):
+                    tools_graph = _tools_graph_cached(settings.llm_model)
+                    msgs = _dict_messages_to_lc(st.session_state.tools_messages)
+                    out = tools_graph.invoke({"messages": msgs})
+                    out_msgs = out.get("messages", []) or []
+                    last_ai = next((mm for mm in reversed(out_msgs) if isinstance(mm, AIMessage)), None)
+                    answer = last_ai.content if last_ai else "(no answer)"
+                    tools_used = _extract_tool_names_from_messages(out_msgs)
+                if show_tools_used and tools_used:
+                    tools_slot.caption("🧰 Tools used: " + ", ".join([f"`{t}`" for t in tools_used]))
+                answer_slot.write(answer)
+            st.session_state.tools_messages.append({"role": "assistant", "content": answer})
+            with st.expander("Debug (tool messages)"):
+                st.write("Tools used:", tools_used)
+                st.write("Messages produced:", len(out_msgs))
+        except Exception as e:
+            st.error(str(e))
+    # Optional: keep your old "run once each" tester as a quick health check
+    with st.expander("Quick tool health-check (run each tool once)"):
+        q = st.text_input("Query for one-shot tools test", key="tools_q_once")
+        if st.button("Run one-shot tools", type="secondary"):
+            try:
+                results = run_tools_once(
+                    q,
+                    wiki_chars=settings.wiki_doc_content_chars_max,
+                )
+                for r in results:
+                    with st.expander(r.tool):
+                        st.write(r.output)
+            except Exception as e:
+                st.error(str(e))
+else:
+    st.title("⚙️ Settings / Health Check")
+    st.write("Use this page to confirm your keys and connections.")
+    if not settings.groq_api_key:
+        st.warning("GROQ_API_KEY is not set. Add it in your environment or .env.")
+    else:
+        st.success("GROQ_API_KEY is set.")
+    st.write("**Current model:**", settings.llm_model)
+    st.write("**SQLite path:**", settings.sqlite_path)
+    if settings.neo4j_uri:
+        st.write("**Neo4j URI:**", settings.neo4j_uri)
+    else:
+        st.info("Neo4j not configured yet (NEO4J_URI empty). Graph Agent will fail until set.")

orchestrator/__init__.py ADDED Viewed

File without changes

orchestrator/factories.py ADDED Viewed

	@@ -0,0 +1,14 @@

+from __future__ import annotations
+from typing import Optional
+from orchestrator.settings import Settings
+def get_llm(settings: Settings, *, model: Optional[str] = None, temperature: float = 0.2):
+    # We use Groq in your stack (same as project 1).
+    # If you want OpenAI later, you can add a get_openai_llm here.
+    from langchain_groq import ChatGroq
+    m = model or settings.llm_model
+    if not settings.groq_api_key:
+        raise ValueError("Missing GROQ_API_KEY. Set it in your environment or .env.")
+    return ChatGroq(groq_api_key=settings.groq_api_key, model=m, temperature=temperature)

orchestrator/graph_agent.py ADDED Viewed

	@@ -0,0 +1,100 @@

+from __future__ import annotations
+from dataclasses import dataclass
+from typing import Dict, Any, Optional
+from orchestrator.settings import Settings
+from orchestrator.factories import get_llm
+from langchain_core.prompts import ChatPromptTemplate
+from langchain_core.output_parsers import StrOutputParser
+try:
+    from langchain_community.graphs import Neo4jGraph
+except Exception as e:  # pragma: no cover
+    Neo4jGraph = None
+@dataclass
+class GraphAgentDebug:
+    cypher: str = ""
+    raw_results: Any = None
+    error: str = ""
+def _get_graph(settings: Settings):
+    if Neo4jGraph is None:
+        raise ImportError("Neo4jGraph not available. Install langchain-community[neo4j] or neo4j driver.")
+    if not (settings.neo4j_uri and settings.neo4j_username and settings.neo4j_password):
+        raise ValueError("Missing NEO4J_URI/NEO4J_USERNAME/NEO4J_PASSWORD.")
+    return Neo4jGraph(
+        url=settings.neo4j_uri,
+        username=settings.neo4j_username,
+        password=settings.neo4j_password,
+    )
+def graph_answer(settings: Settings, question: str) -> Dict[str, Any]:
+    """
+    A simple Graph DB agent:
+      1) Get graph schema
+      2) Ask LLM to write Cypher (ONLY the query)
+      3) Execute Cypher
+      4) Ask LLM to produce a final answer grounded in results
+    """
+    llm = get_llm(settings, temperature=0)
+    graph = _get_graph(settings)
+    # schema string
+    schema = getattr(graph, "schema", None)
+    if callable(schema):  # older versions: graph.schema is a function
+        schema = schema()
+    schema = schema or "Schema not available."
+    cypher_prompt = ChatPromptTemplate.from_template(
+        """You are a Neo4j Cypher expert.
+Given the graph schema below, write a Cypher query to answer the user question.
+Return ONLY the Cypher query (no backticks, no explanation).
+Schema:
+{schema}
+User question:
+{question}
+"""
+    )
+    to_cypher = cypher_prompt | llm | StrOutputParser()
+    dbg = GraphAgentDebug()
+    try:
+        cypher = (to_cypher.invoke({"schema": schema, "question": question}) or "").strip()
+        # Basic cleanup
+        cypher = cypher.strip("` ")
+        dbg.cypher = cypher
+        if not cypher or len(cypher) < 6:
+            raise ValueError("LLM did not produce a valid Cypher query.")
+        results = graph.query(cypher)
+        dbg.raw_results = results
+        answer_prompt = ChatPromptTemplate.from_template(
+            """You are a helpful assistant answering questions using ONLY the database results.
+If results are empty, say you couldn't find relevant rows.
+User question:
+{question}
+Cypher results (JSON-like):
+{results}
+Answer concisely and clearly.
+"""
+        )
+        answer_chain = answer_prompt | llm | StrOutputParser()
+        answer = answer_chain.invoke({"question": question, "results": results})
+        return {"answer": answer, "debug": dbg.__dict__, "agent": "graph"}
+    except Exception as e:
+        dbg.error = str(e)
+        return {
+            "answer": "I couldn't query the graph database for that question. Check Neo4j connection/schema and try again.",
+            "debug": dbg.__dict__,
+        }

orchestrator/graphs.py ADDED Viewed

	@@ -0,0 +1,231 @@

+from __future__ import annotations
+from typing import Annotated, Any, Dict, List, Literal, TypedDict
+from langchain_core.messages import AIMessage, BaseMessage, HumanMessage, SystemMessage
+from langgraph.graph import END, START, StateGraph
+from langgraph.graph.message import add_messages
+from langgraph.prebuilt import ToolNode, tools_condition
+from orchestrator.factories import get_llm
+from orchestrator.graph_agent import graph_answer
+from orchestrator.settings import Settings
+from orchestrator.sql_agent import sql_answer
+from orchestrator.tools import make_web_wiki_arxiv_tools
+Route = Literal["sql", "graph", "tools", "general"]
+class RouterState(TypedDict, total=False):
+    messages: Annotated[list[BaseMessage], add_messages]
+    route: Route
+    debug: Dict[str, Any]
+def _safe_text(x: Any) -> str:
+    if x is None:
+        return ""
+    return x if isinstance(x, str) else str(x)
+def _last_user_text(messages: list[BaseMessage]) -> str:
+    for m in reversed(messages):
+        if isinstance(m, HumanMessage):
+            return _safe_text(m.content).strip()
+    return ""
+def _messages_to_transcript(messages: list[BaseMessage], max_turns: int = 8) -> str:
+    """
+    Build a lightweight transcript from the last N Human/AI messages.
+    We intentionally skip tool messages to keep prompts stable.
+    """
+    kept: List[BaseMessage] = []
+    for m in reversed(messages):
+        if isinstance(m, (HumanMessage, AIMessage)):
+            kept.append(m)
+        if len(kept) >= max_turns * 2:  # ~turns * 2 messages
+            break
+    kept.reverse()
+    lines: List[str] = []
+    for m in kept:
+        if isinstance(m, HumanMessage):
+            lines.append(f"User: {_safe_text(m.content)}")
+        elif isinstance(m, AIMessage):
+            lines.append(f"Assistant: {_safe_text(m.content)}")
+    return "\n".join(lines).strip()
+def _merge_debug(state: RouterState, **kv: Any) -> Dict[str, Any]:
+    dbg = dict(state.get("debug") or {})
+    for k, v in kv.items():
+        if v is not None:
+            dbg[k] = v
+    return dbg
+def _extract_tool_names(messages: list[BaseMessage]) -> List[str]:
+    """
+    Extract tool names from AIMessage.tool_calls across LangChain variants.
+    """
+    names: List[str] = []
+    for m in messages:
+        if isinstance(m, AIMessage):
+            tool_calls = getattr(m, "tool_calls", None) or []
+            for tc in tool_calls:
+                # tc may be dict-like or object-like
+                if isinstance(tc, dict):
+                    n = tc.get("name")
+                else:
+                    n = getattr(tc, "name", None)
+                if n:
+                    names.append(str(n))
+    # de-dupe, preserve order
+    out: List[str] = []
+    for n in names:
+        if n not in out:
+            out.append(n)
+    return out
+def _rewrite_to_standalone(llm, messages: list[BaseMessage]) -> str:
+    """
+    If the user asks a follow-up like "show them", rewrite into a standalone question.
+    """
+    question = _last_user_text(messages)
+    if not question:
+        return ""
+    # If there's only one user message total, no rewrite needed.
+    num_user_msgs = sum(1 for m in messages if isinstance(m, HumanMessage))
+    if num_user_msgs <= 1:
+        return question
+    transcript = _messages_to_transcript(messages, max_turns=8)
+    prompt = (
+        "Rewrite the user's latest question into a standalone question.\n"
+        "Do NOT answer the question.\n\n"
+        "Conversation:\n"
+        f"{transcript}\n\n"
+        "Latest user question:\n"
+        f"{question}\n\n"
+        "Standalone question:"
+    )
+    msg = llm.invoke(
+        [
+            SystemMessage(content="You rewrite follow-up questions into standalone questions."),
+            HumanMessage(content=prompt),
+        ]
+    )
+    rewritten = _safe_text(getattr(msg, "content", "")).strip()
+    return rewritten or question
+def build_tools_agent_graph(settings: Settings):
+    tools = make_web_wiki_arxiv_tools(
+        wiki_chars=settings.wiki_doc_content_chars_max,
+    )
+    llm = get_llm(settings, temperature=0).bind_tools(tools)
+    def assistant(state: RouterState):
+        msg = llm.invoke(state["messages"])
+        return {"messages": [msg]}
+    g = StateGraph(RouterState)
+    g.add_node("assistant", assistant)
+    g.add_node("tools", ToolNode(tools))
+    g.add_edge(START, "assistant")
+    g.add_conditional_edges("assistant", tools_condition)
+    g.add_edge("tools", "assistant")
+    return g.compile()
+def build_router_graph(settings: Settings):
+    tools_graph = build_tools_agent_graph(settings)
+    llm_router = get_llm(settings, temperature=0)
+    route_prompt = (
+        "You are a router for a multi-agent system.\n"
+        "Choose exactly ONE route label from: sql, graph, tools, general.\n\n"
+        "Routing rules:\n"
+        "- sql: querying a relational database (tables/rows, SQL, students DB, counts, filters).\n"
+        "- graph: querying a Neo4j graph database (nodes/relationships, Cypher).\n"
+        "- tools: needs external knowledge / searching (Wikipedia/arXiv/web) or tool use.\n"
+        "- general: conceptual explanation or chat that doesn't need tools/DB queries.\n\n"
+        "Return ONLY the label.\n"
+    )
+    def router(state: RouterState):
+        msgs = state.get("messages", [])
+        q = _last_user_text(msgs)
+        transcript = _messages_to_transcript(msgs, max_turns=8)
+        payload = (
+            "Conversation transcript:\n"
+            f"{transcript}\n\n"
+            "Latest user question:\n"
+            f"{q}"
+        )
+        msg = llm_router.invoke(
+            [SystemMessage(content=route_prompt), HumanMessage(content=payload)]
+        )
+        label = _safe_text(msg.content).strip().lower()
+        if label not in ("sql", "graph", "tools", "general"):
+            label = "general"
+        dbg = _merge_debug(state, router_label=label, router_raw=msg.content, routed_to=label)
+        return {"route": label, "debug": dbg}
+    def sql_node(state: RouterState):
+        standalone = _rewrite_to_standalone(llm_router, state["messages"])
+        out = sql_answer(settings, standalone)
+        dbg = _merge_debug(state, routed_to="sql", sql=out, standalone_question=standalone)
+        return {"route": "sql", "messages": [AIMessage(content=str(out["answer"]))], "debug": dbg}
+    def graph_node(state: RouterState):
+        standalone = _rewrite_to_standalone(llm_router, state["messages"])
+        out = graph_answer(settings, standalone)
+        dbg = _merge_debug(state, routed_to="graph", graph=out.get("debug", {}), standalone_question=standalone)
+        return {"route": "graph", "messages": [AIMessage(content=str(out["answer"]))], "debug": dbg}
+    def tools_node(state: RouterState):
+        out_state = tools_graph.invoke({"messages": state["messages"]})
+        out_msgs = out_state.get("messages", [])
+        tools_used = _extract_tool_names(out_msgs)
+        dbg = _merge_debug(
+            state,
+            routed_to="tools",
+            tools_used=tools_used,
+            tools_graph={"messages_len": len(out_msgs)},
+        )
+        return {"route": "tools", "messages": out_msgs, "debug": dbg}
+    def general_node(state: RouterState):
+        # Use the conversation itself (not just last message)
+        convo = [m for m in state["messages"] if isinstance(m, (HumanMessage, AIMessage))]
+        msg = llm_router.invoke([SystemMessage(content="You are a helpful assistant.")] + convo)
+        dbg = _merge_debug(state, routed_to="general")
+        return {"route": "general", "messages": [AIMessage(content=_safe_text(msg.content))], "debug": dbg}
+    g = StateGraph(RouterState)
+    g.add_node("router", router)
+    g.add_node("sql", sql_node)
+    g.add_node("graph", graph_node)
+    g.add_node("tools", tools_node)
+    g.add_node("general", general_node)
+    g.add_edge(START, "router")
+    g.add_conditional_edges(
+        "router",
+        lambda s: s["route"],
+        {"sql": "sql", "graph": "graph", "tools": "tools", "general": "general"},
+    )
+    g.add_edge("sql", END)
+    g.add_edge("graph", END)
+    g.add_edge("tools", END)
+    g.add_edge("general", END)
+    return g.compile()

orchestrator/settings.py ADDED Viewed

	@@ -0,0 +1,24 @@

+from __future__ import annotations
+from dataclasses import dataclass
+import os
+@dataclass(frozen=True)
+class Settings:
+    # LLM
+    groq_api_key: str = os.getenv("GROQ_API_KEY", "")
+    llm_model: str = os.getenv("LLM_MODEL", "meta-llama/llama-4-maverick-17b-128e-instruct")
+    # SQL (SQLite by default)
+    sqlite_path: str = os.getenv("SQLITE_PATH", "student.db")
+    # Neo4j Graph DB
+    neo4j_uri: str = os.getenv("NEO4J_URI", "")
+    neo4j_username: str = os.getenv("NEO4J_USERNAME", "")
+    neo4j_password: str = os.getenv("NEO4J_PASSWORD", "")
+    # Tool settings
+    # wiki_top_k_results: int = int(os.getenv("WIKI_TOP_K", "3"))
+    wiki_doc_content_chars_max: int = int(os.getenv("WIKI_DOC_CHARS", "2000"))
+    # Debug
+    debug: bool = os.getenv("DEBUG", "0") in ("1","true","True","yes","YES")

orchestrator/sql_agent.py ADDED Viewed

	@@ -0,0 +1,234 @@

+from __future__ import annotations
+from pathlib import Path
+from typing import Optional, Dict, Any
+import sqlite3
+from sqlalchemy import create_engine
+from orchestrator.settings import Settings
+from langchain_groq import ChatGroq
+from langchain_community.utilities.sql_database import SQLDatabase
+from langchain_community.agent_toolkits.sql.toolkit import SQLDatabaseToolkit
+from langchain_community.agent_toolkits.sql.base import create_sql_agent
+def _resolve_sqlite_path(settings: Settings, db_path: Optional[str] = None) -> Path:
+    p = Path(db_path or settings.sqlite_path)
+    if not p.is_absolute():
+        # project root = parent of orchestrator/
+        p = (Path(__file__).resolve().parents[1] / p).resolve()
+    return p
+def _make_sql_db_readonly(sqlite_path: Path) -> SQLDatabase:
+    if not sqlite_path.exists():
+        raise FileNotFoundError(
+            f"SQLite DB not found at: {sqlite_path}\n"
+            f"Fix: put student.db at project root OR set SQLITE_PATH to an absolute path."
+        )
+    def _connect():
+        return sqlite3.connect(f"file:{sqlite_path.as_posix()}?mode=ro", uri=True)
+    engine = create_engine("sqlite:///", creator=_connect)
+    return SQLDatabase(engine)
+def _make_llm(settings: Settings):
+    # ChatGroq param names differ across versions; support both.
+    try:
+        return ChatGroq(
+            api_key=settings.groq_api_key,
+            model=settings.llm_model,
+            temperature=0,
+        )
+    except TypeError:
+        return ChatGroq(
+            groq_api_key=settings.groq_api_key,
+            model_name=settings.llm_model,
+            temperature=0,
+        )
+def make_sql_agent(settings: Settings, *, db_path: Optional[str] = None):
+    llm = _make_llm(settings)
+    sqlite_path = _resolve_sqlite_path(settings, db_path=db_path)
+    db = _make_sql_db_readonly(sqlite_path)
+    toolkit = SQLDatabaseToolkit(db=db, llm=llm)
+    # This is the key difference vs your b version:
+    # Force the tool-calling SQL agent (most reliable on LC 1.2.x).
+    agent = create_sql_agent(
+        llm=llm,
+        toolkit=toolkit,
+        agent_type="tool-calling",
+        handle_parsing_errors=True,
+        max_iterations=30,
+        max_execution_time=60,
+        verbose=bool(settings.debug),
+        return_intermediate_steps=bool(settings.debug),
+    )
+    return agent, db, str(sqlite_path)
+def sql_answer(settings: Settings, question: str, *, db_path: Optional[str] = None) -> Dict[str, Any]:
+    agent, db, sqlite_path = make_sql_agent(settings, db_path=db_path)
+    q = (question or "").strip().lower()
+    # Keep your deterministic shortcut (nice UX)
+    if any(s in q for s in ["list the tables", "list tables", "show tables", "what tables"]):
+        tables = db.get_usable_table_names()
+        return {"answer": "Tables: " + ", ".join(tables), "db_path": sqlite_path}
+    # Run agent
+    out = agent.invoke({"input": question})
+    # Normalize output
+    answer = out.get("output") if isinstance(out, dict) else str(out)
+    result = {"answer": str(answer), "db_path": sqlite_path, "agent": "sql"}
+    # If debug enabled, surface intermediate steps in Streamlit expander
+    if isinstance(out, dict) and "intermediate_steps" in out:
+        result["intermediate_steps"] = out["intermediate_steps"]
+    return result
+# from __future__ import annotations
+# from pathlib import Path
+# from typing import Optional, Dict, Any
+# import sqlite3
+# from sqlalchemy import create_engine
+# from orchestrator.settings import Settings
+# from orchestrator.factories import get_llm
+# # --- Imports that vary across LangChain versions ---
+# try:
+#     # langchain >= 1.x
+#     from langchain.sql_database import SQLDatabase
+# except Exception:
+#     # older / community
+#     from langchain_community.utilities import SQLDatabase
+# try:
+#     from langchain_community.agent_toolkits.sql.toolkit import SQLDatabaseToolkit
+# except Exception:
+#     # older path (rare)
+#     from langchain.agents.agent_toolkits import SQLDatabaseToolkit
+# try:
+#     from langchain.agents import create_sql_agent
+# except Exception:
+#     from langchain_community.agent_toolkits.sql.base import create_sql_agent
+# def _resolve_sqlite_path(settings: Settings) -> Path:
+#     """
+#     Resolve SQLITE_PATH relative to project root (parent of orchestrator/),
+#     so Streamlit's current working directory does not break DB loading.
+#     """
+#     p = Path(settings.sqlite_path)
+#     if not p.is_absolute():
+#         p = (Path(__file__).resolve().parents[1] / p).resolve()
+#     return p
+# def _make_sql_db_readonly(sqlite_path: Path) -> SQLDatabase:
+#     """
+#     Open SQLite in READ-ONLY mode so a wrong path does NOT create an empty DB file.
+#     """
+#     if not sqlite_path.exists():
+#         raise FileNotFoundError(
+#             f"SQLite DB not found at: {sqlite_path}\n"
+#             f"Fix: put student.db at the project root OR set SQLITE_PATH to an absolute path."
+#         )
+#     def _connect():
+#         return sqlite3.connect(f"file:{sqlite_path.as_posix()}?mode=ro", uri=True)
+#     engine = create_engine("sqlite:///", creator=_connect)
+#     return SQLDatabase(engine)
+# def _create_agent(llm, toolkit, verbose: bool):
+#     """
+#     Create SQL agent WITHOUT passing kwargs that frequently clash with defaults
+#     in langchain-classic AgentExecutor.
+#     """
+#     # Keep only the safest option; many builds already set other defaults internally.
+#     agent_exec_kwargs = {"handle_parsing_errors": True}
+#     # Some versions accept max_iterations/max_execution_time top-level.
+#     # Some accept neither.
+#     # We try progressively.
+#     try:
+#         return create_sql_agent(
+#             llm=llm,
+#             toolkit=toolkit,
+#             verbose=verbose,
+#             max_iterations=25,
+#             max_execution_time=60,
+#             agent_executor_kwargs=agent_exec_kwargs,
+#         )
+#     except TypeError:
+#         # Try without time/iteration controls to avoid duplicate kwargs.
+#         return create_sql_agent(
+#             llm=llm,
+#             toolkit=toolkit,
+#             verbose=verbose,
+#             agent_executor_kwargs=agent_exec_kwargs,
+#         )
+# def make_sql_agent(settings: Settings, *, db_path: Optional[str] = None):
+#     llm = get_llm(settings, temperature=0)
+#     sqlite_path = Path(db_path).expanduser().resolve() if db_path else _resolve_sqlite_path(settings)
+#     db = _make_sql_db_readonly(sqlite_path)
+#     toolkit = SQLDatabaseToolkit(db=db, llm=llm)
+#     agent = _create_agent(llm, toolkit, verbose=getattr(settings, "debug", False))
+#     return agent, db, str(sqlite_path)
+# def sql_answer(settings: Settings, question: str, *, db_path: Optional[str] = None) -> Dict[str, Any]:
+#     agent, db, sqlite_path = make_sql_agent(settings, db_path=db_path)
+#     # Deterministic shortcut so this never loops.
+#     q = (question or "").strip().lower()
+#     if any(s in q for s in ["list the tables", "list tables", "show tables", "what tables"]):
+#         try:
+#             tables = db.get_usable_table_names()
+#         except Exception:
+#             # fallback for older SQLDatabase implementations
+#             tables = []
+#         return {
+#             "answer": "Tables: " + (", ".join(tables) if tables else "(none found)"),
+#             "db_path": sqlite_path,
+#         }
+#     # Run agent
+#     out = agent.invoke({"input": question})
+#     # Normalize output
+#     if isinstance(out, dict):
+#         answer = out.get("output") or out.get("answer") or str(out)
+#     else:
+#         answer = str(out)
+#     return {"answer": answer, "db_path": sqlite_path}

orchestrator/tools.py ADDED Viewed

	@@ -0,0 +1,88 @@

+from __future__ import annotations
+from dataclasses import dataclass
+from typing import List, Optional
+from langchain_core.tools import tool
+from langchain_community.utilities import WikipediaAPIWrapper
+from langchain_community.tools import DuckDuckGoSearchRun, WikipediaQueryRun, ArxivQueryRun
+# --- Calculator tool (safe arithmetic) ---
+import ast
+import operator as op
+_ALLOWED_OPS = {
+    ast.Add: op.add,
+    ast.Sub: op.sub,
+    ast.Mult: op.mul,
+    ast.Div: op.truediv,
+    ast.Pow: op.pow,
+    ast.USub: op.neg,
+    ast.Mod: op.mod,
+    ast.FloorDiv: op.floordiv,
+}
+def _eval_expr(expr: str) -> float:
+    """Safely evaluate a basic arithmetic expression."""
+    node = ast.parse(expr, mode="eval").body
+    def _eval(n):
+        if isinstance(n, ast.Num):  # py<3.8
+            return n.n
+        if isinstance(n, ast.Constant):  # py>=3.8
+            if isinstance(n.value, (int, float)):
+                return n.value
+            raise ValueError("Only numbers are allowed.")
+        if isinstance(n, ast.BinOp) and type(n.op) in _ALLOWED_OPS:
+            return _ALLOWED_OPS[type(n.op)](_eval(n.left), _eval(n.right))
+        if isinstance(n, ast.UnaryOp) and type(n.op) in _ALLOWED_OPS:
+            return _ALLOWED_OPS[type(n.op)](_eval(n.operand))
+        raise ValueError("Only basic arithmetic is allowed.")
+    return float(_eval(node))
+@tool
+def calculator(expression: str) -> str:
+    """Evaluate a math expression. Input must be a plain arithmetic expression (e.g., '12*(3+4)')."""
+    try:
+        return str(_eval_expr(expression))
+    except Exception as e:
+        return f"[calculator error] {e}"
+# --- Web/Wiki/Arxiv tools ---
+def make_web_wiki_arxiv_tools(*, wiki_k: int = 3, wiki_chars: int = 2000):
+    """Return tool objects compatible with LangGraph ToolNode."""
+    web = DuckDuckGoSearchRun()
+    # IMPORTANT: WikipediaQueryRun requires api_wrapper in your installed versions.
+    wiki_wrapper = WikipediaAPIWrapper(top_k_results=wiki_k, doc_content_chars_max=wiki_chars)
+    wiki = WikipediaQueryRun(api_wrapper=wiki_wrapper)
+    # ArxivQueryRun works similarly; its underlying API doesn't require keys.
+    arxiv = ArxivQueryRun()
+    return [web, wiki, arxiv, calculator]
+# @dataclass
+# class ToolResult:
+#     tool: str
+#     output: str
+@dataclass
+class ToolResult:
+    tool: str
+    output: str
+    ok: bool = True
+    error: Optional[str] = None
+def run_tools_once(query: str, *, wiki_k: int = 3, wiki_chars: int = 2000) -> List[ToolResult]:
+    """Non-agent helper: run each tool once and return outputs (good for debugging)."""
+    tools = make_web_wiki_arxiv_tools(wiki_k=wiki_k, wiki_chars=wiki_chars)
+    out: List[ToolResult] = []
+    for t in tools:
+        try:
+            out.append(ToolResult(tool=t.name, output=str(t.run(query))))
+        except Exception as e:
+            out.append(ToolResult(tool=t.name, output=f"[tool error] {e}", ok=False, error=str(e)))
+    return out

requirements.txt ADDED Viewed

	@@ -0,0 +1,21 @@

+streamlit>=1.35
+python-dotenv>=1.0
+# LangChain / LangGraph stack (align with your env)
+langchain>=1.2.0
+langchain-core>=0.3.0
+langchain-community>=0.4.0
+langchain-groq>=0.3.0
+langgraph>=0.2.0
+langchain-neo4j
+# Tools
+ddgs
+wikipedia>=1.4.0
+arxiv>=2.1.0
+# SQL
+sqlalchemy>=2.0
+# Neo4j graph
+neo4j>=5.0

school.db ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:adb52805ce8c7dc02d5ecc0f104eac5944ac2d234a0cfe04276714e29ea9faf8
+size 1478656

sqlite.py ADDED Viewed

	@@ -0,0 +1,296 @@

+# sqlite.py
+from __future__ import annotations
+import os
+import sqlite3
+import random
+from datetime import date, timedelta
+from pathlib import Path
+DB_NAME = os.environ.get("SQLITE_DB", "school.db")
+SEED = int(os.environ.get("SQLITE_SEED", "42"))
+# Scale knobs (keep modest for fast demo)
+NUM_STUDENTS = int(os.environ.get("NUM_STUDENTS", "120"))
+NUM_COURSES = int(os.environ.get("NUM_COURSES", "14"))
+SEMESTERS = ["2024-Fall", "2025-Spring", "2025-Fall"]  # change freely
+FIRST_NAMES = [
+    "Aarav", "Vivaan", "Aditya", "Vihaan", "Arjun", "Sai", "Reyansh", "Ishaan", "Krishna",
+    "Ananya", "Aadhya", "Diya", "Ira", "Meera", "Saanvi", "Myra", "Aarohi", "Riya",
+    "Rahul", "Kiran", "Suresh", "Priya", "Neha", "Vikram", "Nikhil", "Sneha", "Pooja",
+]
+LAST_NAMES = [
+    "Verma", "Patel", "Gupta", "Mehta", "Singh",
+    "Kumar", "Das", "Roy", "Bose", "Chowdhury",
+]
+PROGRAMS = ["Computer Science", "Data Science", "AI & ML", "Information Systems", "Cybersecurity"]
+SECTIONS = ["A", "B", "C", "D"]
+DEPARTMENTS = ["CS", "DS", "AI", "IS", "CY"]
+COURSE_TITLES = [
+    "Database Systems", "Operating Systems", "Computer Networks", "Machine Learning",
+    "Deep Learning", "Data Structures", "Algorithms", "Cloud Computing",
+    "NLP Fundamentals", "Information Security", "Software Engineering",
+    "Data Visualization", "MLOps Foundations", "Graph Databases",
+    "Statistics for Data Science", "Ethical AI",
+]
+GRADE_BANDS = [
+    ("A", 90, 100),
+    ("B", 80, 89),
+    ("C", 70, 79),
+    ("D", 60, 69),
+    ("F", 0, 59),
+]
+def make_name(rng: random.Random) -> str:
+    return f"{rng.choice(FIRST_NAMES)} {rng.choice(LAST_NAMES)}"
+def grade_from_score(score: float) -> str:
+    for letter, lo, hi in GRADE_BANDS:
+        if lo <= score <= hi:
+            return letter
+    return "F"
+def connect(db_path: Path) -> sqlite3.Connection:
+    con = sqlite3.connect(str(db_path))
+    con.execute("PRAGMA foreign_keys = ON;")
+    con.execute("PRAGMA journal_mode = WAL;")
+    con.execute("PRAGMA synchronous = NORMAL;")
+    return con
+def recreate_schema(con: sqlite3.Connection) -> None:
+    cur = con.cursor()
+    # Drop in FK-safe order
+    cur.executescript(
+        """
+        DROP TABLE IF EXISTS attendance;
+        DROP TABLE IF EXISTS enrollments;
+        DROP TABLE IF EXISTS courses;
+        DROP TABLE IF EXISTS students;
+        """
+    )
+    cur.executescript(
+        """
+        CREATE TABLE students (
+            student_id   INTEGER PRIMARY KEY AUTOINCREMENT,
+            name         TEXT NOT NULL,
+            program      TEXT NOT NULL,
+            section      TEXT NOT NULL,
+            year         INTEGER NOT NULL CHECK (year BETWEEN 1 AND 4)
+        );
+        CREATE TABLE courses (
+            course_id    INTEGER PRIMARY KEY AUTOINCREMENT,
+            course_code  TEXT NOT NULL UNIQUE,
+            course_name  TEXT NOT NULL,
+            department   TEXT NOT NULL,
+            credits      INTEGER NOT NULL CHECK (credits BETWEEN 1 AND 6)
+        );
+        CREATE TABLE enrollments (
+            enrollment_id INTEGER PRIMARY KEY AUTOINCREMENT,
+            student_id    INTEGER NOT NULL,
+            course_id     INTEGER NOT NULL,
+            semester      TEXT NOT NULL,
+            score         REAL NOT NULL CHECK (score BETWEEN 0 AND 100),
+            grade         TEXT NOT NULL CHECK (grade IN ('A','B','C','D','F')),
+            created_at    TEXT NOT NULL DEFAULT (datetime('now')),
+            FOREIGN KEY (student_id) REFERENCES students(student_id) ON DELETE CASCADE,
+            FOREIGN KEY (course_id)  REFERENCES courses(course_id)  ON DELETE CASCADE,
+            UNIQUE(student_id, course_id, semester)
+        );
+        CREATE TABLE attendance (
+            attendance_id INTEGER PRIMARY KEY AUTOINCREMENT,
+            student_id    INTEGER NOT NULL,
+            course_id     INTEGER NOT NULL,
+            semester      TEXT NOT NULL,
+            class_date    TEXT NOT NULL,
+            present       INTEGER NOT NULL CHECK (present IN (0,1)),
+            FOREIGN KEY (student_id) REFERENCES students(student_id) ON DELETE CASCADE,
+            FOREIGN KEY (course_id)  REFERENCES courses(course_id)  ON DELETE CASCADE
+        );
+        CREATE INDEX idx_enrollments_student ON enrollments(student_id);
+        CREATE INDEX idx_enrollments_course  ON enrollments(course_id);
+        CREATE INDEX idx_enrollments_sem     ON enrollments(semester);
+        CREATE INDEX idx_att_student_course  ON attendance(student_id, course_id);
+        CREATE INDEX idx_att_semester        ON attendance(semester);
+        CREATE INDEX idx_att_date            ON attendance(class_date);
+        """
+    )
+    con.commit()
+def seed_students(con: sqlite3.Connection, rng: random.Random) -> None:
+    cur = con.cursor()
+    rows = []
+    for _ in range(NUM_STUDENTS):
+        rows.append(
+            (
+                make_name(rng),
+                rng.choice(PROGRAMS),
+                rng.choice(SECTIONS),
+                rng.randint(1, 4),
+            )
+        )
+    cur.executemany(
+        "INSERT INTO students(name, program, section, year) VALUES (?,?,?,?)",
+        rows,
+    )
+    con.commit()
+def seed_courses(con: sqlite3.Connection, rng: random.Random) -> None:
+    cur = con.cursor()
+    titles = COURSE_TITLES[:]
+    rng.shuffle(titles)
+    titles = titles[:NUM_COURSES]
+    rows = []
+    for i, title in enumerate(titles, start=1):
+        dept = rng.choice(DEPARTMENTS)
+        code = f"{dept}{100 + i}"
+        credits = rng.choice([2, 3, 3, 4])
+        rows.append((code, title, dept, credits))
+    cur.executemany(
+        "INSERT INTO courses(course_code, course_name, department, credits) VALUES (?,?,?,?)",
+        rows,
+    )
+    con.commit()
+def seed_enrollments_and_attendance(con: sqlite3.Connection, rng: random.Random) -> None:
+    cur = con.cursor()
+    student_ids = [r[0] for r in cur.execute("SELECT student_id FROM students").fetchall()]
+    course_ids = [r[0] for r in cur.execute("SELECT course_id FROM courses").fetchall()]
+    enrollment_rows = []
+    attendance_rows = []
+    # Build a small calendar per semester (10 class dates)
+    sem_start = {
+        "2024-Fall": date(2024, 9, 1),
+        "2025-Spring": date(2025, 2, 1),
+        "2025-Fall": date(2025, 9, 1),
+    }
+    for sem in SEMESTERS:
+        start = sem_start.get(sem, date(2025, 1, 1))
+        class_dates = [(start + timedelta(days=7 * i)).isoformat() for i in range(10)]
+        for sid in student_ids:
+            # each semester: 3-5 courses
+            chosen = rng.sample(course_ids, k=rng.randint(3, 5))
+            for cid in chosen:
+                # score distribution: mostly 60-95
+                base = rng.gauss(mu=78, sigma=10)
+                score = max(0, min(100, round(base, 1)))
+                grade = grade_from_score(score)
+                enrollment_rows.append((sid, cid, sem, score, grade))
+                # attendance probability correlates loosely with score
+                # higher score => slightly higher attendance
+                p_present = min(0.98, max(0.60, 0.70 + (score - 70) / 100))
+                for d in class_dates:
+                    present = 1 if rng.random() < p_present else 0
+                    attendance_rows.append((sid, cid, sem, d, present))
+    cur.executemany(
+        "INSERT OR IGNORE INTO enrollments(student_id, course_id, semester, score, grade) VALUES (?,?,?,?,?)",
+        enrollment_rows,
+    )
+    cur.executemany(
+        "INSERT INTO attendance(student_id, course_id, semester, class_date, present) VALUES (?,?,?,?,?)",
+        attendance_rows,
+    )
+    con.commit()
+def create_views(con: sqlite3.Connection) -> None:
+    cur = con.cursor()
+    cur.executescript(
+        """
+        DROP VIEW IF EXISTS student_performance;
+        CREATE VIEW student_performance AS
+        SELECT
+            s.student_id,
+            s.name,
+            s.program,
+            s.section,
+            e.semester,
+            ROUND(AVG(e.score), 2) AS avg_score,
+            SUM(CASE WHEN e.grade = 'A' THEN 1 ELSE 0 END) AS num_A,
+            COUNT(*) AS num_courses
+        FROM students s
+        JOIN enrollments e ON e.student_id = s.student_id
+        GROUP BY s.student_id, e.semester;
+        """
+    )
+    con.commit()
+def print_summary(con: sqlite3.Connection) -> None:
+    cur = con.cursor()
+    tables = cur.execute("SELECT name FROM sqlite_master WHERE type='table' ORDER BY name;").fetchall()
+    print("Tables:", [t[0] for t in tables])
+    for t in ["students", "courses", "enrollments", "attendance"]:
+        n = cur.execute(f"SELECT COUNT(*) FROM {t};").fetchone()[0]
+        print(f"{t}: {n}")
+    # A couple example queries
+    print("\nExample: Top 5 students by avg score (latest semester)")
+    latest = cur.execute("SELECT semester FROM enrollments ORDER BY semester DESC LIMIT 1;").fetchone()[0]
+    rows = cur.execute(
+        """
+        SELECT s.name, s.program, ROUND(AVG(e.score), 2) AS avg_score
+        FROM students s
+        JOIN enrollments e ON e.student_id = s.student_id
+        WHERE e.semester = ?
+        GROUP BY s.student_id
+        ORDER BY avg_score DESC
+        LIMIT 5;
+        """,
+        (latest,),
+    ).fetchall()
+    for r in rows:
+        print(r)
+def main() -> None:
+    rng = random.Random(SEED)
+    db_path = Path(DB_NAME).resolve()
+    con = connect(db_path)
+    try:
+        recreate_schema(con)
+        seed_students(con, rng)
+        seed_courses(con, rng)
+        seed_enrollments_and_attendance(con, rng)
+        create_views(con)
+        print(f"Created DB: {db_path}")
+        print_summary(con)
+    finally:
+        con.close()
+if __name__ == "__main__":
+    main()