Python DSA Tutor 🤖

import streamlit as st
import os
import shutil
from dotenv import load_dotenv
from langchain_groq import ChatGroq
from langchain_community.embeddings import SentenceTransformerEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.chains import RetrievalQA
from ingest import main as ingest_data

# --- Configuration ---
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
load_dotenv(dotenv_path=os.path.join(BASE_DIR, ".env"))
PERSIST_DIR = os.path.join(BASE_DIR, "db")

# --- Caching Functions ---

from langchain.prompts import PromptTemplate

# --- Prompt Engineering ---
prompt_template = """""
    You are Python programmer, assisting a user in coding choosing algorithms explaining concepts as a assistant and tutor, confident, and concise.
    
    
    You said:
    You are PyTutor, an expert Python programmer and teaching assistant.
    Your role is to help users understand, design, and debug Python code confidently and clearly.
    
    Guidelines:
    
    Be concise, precise, and technically accurate.
    
    Always explain the reasoning behind your code or solution.
    
    When relevant, compare alternative approaches and explain why one is preferred.
    
    Use simple language when explaining complex algorithms.
    
    Prioritize clarity, correctness, and performance in code examples.
    
    Provide step-by-step explanations for concepts or algorithms.
    
    When teaching, use small, runnable Python snippets.
    
    Avoid unnecessary verbosity or overgeneralization — keep answers focused and confident.
    
    Always assume the user is learning Python actively and wants to understand, not just copy.
    
    Tone:
    
    Confident, concise, and instructive — like a skilled mentor guiding a student through real code.
    Context:
    {context}
    
    Question: {question}
    
    Interview Answer:
"""""
QA_CHAIN_PROMPT = PromptTemplate.from_template(prompt_template)

# --- Caching Functions ---

@st.cache_resource(show_spinner="Connecting to LLM...")
def llm_pipeline():
    """Initializes the Groq LLM pipeline."""
    token = os.getenv("GROQ_API_KEY")
    if not token:
        st.error("GROQ_API_KEY is not set. Please add it to your .env file.")
        st.stop()

    try:
        llm = ChatGroq(
            groq_api_key=token,
            model_name="llama-3.1-8b-instant",
            temperature=0.4,
            max_tokens=1024
        )
        return llm
    except Exception as e:
        st.error(f"Failed to initialize Groq LLM: {e}")
        st.stop()

@st.cache_resource(show_spinner="Loading Knowledge Base...")
def qa_llm(_llm):
    """Initializes the RetrievalQA chain."""
    try:
        embeddings = SentenceTransformerEmbeddings(
            model_name="all-MiniLM-L6-v2",
            model_kwargs={"device": "cpu"}
        )
        
        if not os.path.exists(PERSIST_DIR):
            st.warning("Knowledge base not found. Please build it first.")
            return None

        db = FAISS.load_local(PERSIST_DIR, embeddings, allow_dangerous_deserialization=True)
        retriever = db.as_retriever(search_kwargs={'k': 5})
        
        qa = RetrievalQA.from_chain_type(
            llm=_llm,
            chain_type="stuff",
            retriever=retriever,
            return_source_documents=True,
            chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
        )
        return qa
    except Exception as e:
        st.error(f"Error initializing QA pipeline: {e}")
        return None

# --- Main App Logic ---

def main():
    st.set_page_config(page_title="Python DSA Tutor", layout="centered")
    st.markdown("<h1 style='text-align:center;color:blue;'> Python DSA Tutor 🤖</h1>", unsafe_allow_html=True)
    st.markdown("### Ask me anything about Python Programming and DSA")

    # --- Sidebar for Actions ---
    with st.sidebar:
        st.markdown("## Actions")
        if st.button("Build/Update Knowledge Base"):
            with st.spinner("Ingesting data from all sources... This may take a moment."):
                ingest_data() # Call the main function from ingest.py
            st.success("Knowledge Base is up to date!")
            st.cache_resource.clear() # Clear cache to reload QA chain
            st.rerun()

        st.markdown("---_")
        st.markdown("**Sources:**")
        st.markdown("- PDF(s) in `docs/` folder")

    # --- Main Chat Interface ---
    if not os.path.exists(PERSIST_DIR):
        st.info("Welcome! Please build the knowledge base using the button in the sidebar to get started.")
        st.stop()

    llm = llm_pipeline()
    qa = qa_llm(llm)

    if qa is None:
        st.warning("The QA system is not available. Please build the knowledge base.")
        st.stop()

    # Pre-defined questions
    example_prompts = [
    "Explain the difference between time and space complexity with examples.",
    "Write an optimized Python solution for the Two Sum problem and explain your approach.",
    "Can you walk me through how to solve a problem using dynamic programming?",
    "Show me how to implement binary search and analyze its complexity.",
    "Compare BFS and DFS in terms of use cases and efficiency.",
    "What are the most common sorting algorithms used in interviews?",
    "Explain how to detect a cycle in a linked list using Floyd’s algorithm.",
    "Give me the Python code for merging two sorted arrays efficiently.",
    "Walk me through solving a problem with recursion and then optimizing it with memoization.",
    "How do I explain my approach to an interviewer for a graph traversal problem?"
    ]

    cols = st.columns(2)
    for i, prompt in enumerate(example_prompts):
        if cols[i % 2].button(prompt):
            st.session_state["last_input"] = prompt

    # User input
    user_input = st.text_input("Your question:", key="user_input")
    user_question = st.session_state.pop("last_input", None) or user_input

    if user_question:
        with st.spinner("Thinking..."):
            try:
                response = qa({"query": user_question})
                answer = response.get("result", "No answer found.")
                
                st.markdown(f"**You:** {user_question}")
                st.markdown(f"**Assistant:** {answer}")

                # Display source documents
                with st.expander("See sources"):
                    for doc in response.get("source_documents", []):
                        st.info(f"**Source:** `{os.path.basename(doc.metadata.get('source', 'N/A'))}`")
                        st.text(doc.page_content[:300] + "...")
            except Exception as e:
                st.error(f"An error occurred: {e}")

if __name__ == "__main__":
    main()