import streamlit as st import os import shutil from dotenv import load_dotenv from langchain_groq import ChatGroq from langchain_community.embeddings import SentenceTransformerEmbeddings from langchain_community.vectorstores import FAISS from langchain.chains import RetrievalQA from ingest import main as ingest_data # --- Configuration --- BASE_DIR = os.path.dirname(os.path.abspath(__file__)) load_dotenv(dotenv_path=os.path.join(BASE_DIR, ".env")) PERSIST_DIR = os.path.join(BASE_DIR, "db") # --- Caching Functions --- from langchain.prompts import PromptTemplate # --- Prompt Engineering --- prompt_template = """"" You are Python programmer, assisting a user in coding choosing algorithms explaining concepts as a assistant and tutor, confident, and concise. You said: You are PyTutor, an expert Python programmer and teaching assistant. Your role is to help users understand, design, and debug Python code confidently and clearly. Guidelines: Be concise, precise, and technically accurate. Always explain the reasoning behind your code or solution. When relevant, compare alternative approaches and explain why one is preferred. Use simple language when explaining complex algorithms. Prioritize clarity, correctness, and performance in code examples. Provide step-by-step explanations for concepts or algorithms. When teaching, use small, runnable Python snippets. Avoid unnecessary verbosity or overgeneralization — keep answers focused and confident. Always assume the user is learning Python actively and wants to understand, not just copy. Tone: Confident, concise, and instructive — like a skilled mentor guiding a student through real code. Context: {context} Question: {question} Interview Answer: """"" QA_CHAIN_PROMPT = PromptTemplate.from_template(prompt_template) # --- Caching Functions --- @st.cache_resource(show_spinner="Connecting to LLM...") def llm_pipeline(): """Initializes the Groq LLM pipeline.""" token = os.getenv("GROQ_API_KEY") if not token: st.error("GROQ_API_KEY is not set. Please add it to your .env file.") st.stop() try: llm = ChatGroq( groq_api_key=token, model_name="llama-3.1-8b-instant", temperature=0.4, max_tokens=1024 ) return llm except Exception as e: st.error(f"Failed to initialize Groq LLM: {e}") st.stop() @st.cache_resource(show_spinner="Loading Knowledge Base...") def qa_llm(_llm): """Initializes the RetrievalQA chain.""" try: embeddings = SentenceTransformerEmbeddings( model_name="all-MiniLM-L6-v2", model_kwargs={"device": "cpu"} ) if not os.path.exists(PERSIST_DIR): st.warning("Knowledge base not found. Please build it first.") return None db = FAISS.load_local(PERSIST_DIR, embeddings, allow_dangerous_deserialization=True) retriever = db.as_retriever(search_kwargs={'k': 5}) qa = RetrievalQA.from_chain_type( llm=_llm, chain_type="stuff", retriever=retriever, return_source_documents=True, chain_type_kwargs={"prompt": QA_CHAIN_PROMPT} ) return qa except Exception as e: st.error(f"Error initializing QA pipeline: {e}") return None # --- Main App Logic --- def main(): st.set_page_config(page_title="Python DSA Tutor", layout="centered") st.markdown("