| import os |
| import sys |
| import tempfile |
| import streamlit as st |
|
|
| from dotenv import load_dotenv |
| load_dotenv() |
|
|
| from langchain_community.embeddings import HuggingFaceEmbeddings |
| from langchain_community.document_loaders import PyPDFLoader |
| from langchain_community.vectorstores import Chroma |
| from langchain_groq import ChatGroq |
| from langchain_text_splitters import RecursiveCharacterTextSplitter |
|
|
| |
| CHROMA_DIR = "chroma_db" |
| EMBEDDING_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2" |
| APP_TITLE = "Source.AI" |
| APP_SUBTITLE = "SOURCE TO YOUR STUDIES" |
|
|
| |
| PREMIUM_STYLE = """ |
| <style> |
| .main { |
| background-color: #0e1117; |
| } |
| .stApp { |
| background: linear-gradient(135deg, #0e1117 0%, #1a1c24 100%); |
| } |
| .sidebar .sidebar-content { |
| background-color: #1a1c24; |
| } |
| h1 { |
| color: #ffffff; |
| font-family: 'Inter', sans-serif; |
| font-weight: 700; |
| letter-spacing: -1px; |
| } |
| .stChatMessage { |
| background-color: #1e222d; |
| border-radius: 10px; |
| border: 1px solid #30363d; |
| margin-bottom: 10px; |
| } |
| .stChatInputContainer { |
| border-radius: 10px; |
| border: 1px solid #30363d; |
| } |
| .css-1offfwp { |
| background-color: #238636 !important; |
| } |
| .stButton>button { |
| width: 100%; |
| border-radius: 8px; |
| border: 1px solid #30363d; |
| background-color: #21262d; |
| color: #c9d1d9; |
| transition: all 0.2s; |
| } |
| .stButton>button:hover { |
| background-color: #30363d; |
| border-color: #8b949e; |
| } |
| </style> |
| """ |
|
|
| PROMPT_TEMPLATE = ( |
| "You are a sophisticated Study Assistant. Use the provided context to answer the student's question accurately. " |
| "If the answer isn't in the context, politely state that you don't know based on the available materials. " |
| "\n\n" |
| "Context:\n{context}\n\n" |
| "Question: {question}" |
| ) |
|
|
| @st.cache_resource |
| def load_vectorstore() -> Chroma: |
| embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL_NAME) |
| vectorstore = Chroma( |
| persist_directory=CHROMA_DIR, |
| embedding_function=embeddings, |
| ) |
| return vectorstore |
|
|
| @st.cache_resource |
| def get_llm(api_key: str) -> ChatGroq: |
| |
| llm = ChatGroq( |
| model="llama-3.3-70b-versatile", |
| groq_api_key=api_key, |
| temperature=0.3, |
| ) |
| return llm |
|
|
| def build_context(chunks) -> str: |
| return "\n\n".join(chunk.page_content for chunk in chunks) |
|
|
| def main() -> None: |
| st.set_page_config(page_title=APP_TITLE, page_icon="π", layout="wide") |
| st.markdown(PREMIUM_STYLE, unsafe_allow_html=True) |
|
|
| |
| with st.sidebar: |
| st.title(f"π {APP_TITLE}") |
| st.markdown(f"**{APP_SUBTITLE}**") |
| st.divider() |
|
|
| |
| if st.button("ποΈ Reset Conversation"): |
| st.session_state["messages"] = [] |
| st.rerun() |
|
|
| st.divider() |
|
|
| |
| st.subheader("π Knowledge Base") |
| uploaded_file = st.file_uploader("Upload course material (PDF)", type=["pdf"]) |
|
|
| if "processed_files" not in st.session_state: |
| st.session_state["processed_files"] = set() |
|
|
| |
| try: |
| vectorstore = load_vectorstore() |
| except Exception as exc: |
| st.error(f"Engine Error: {exc}") |
| return |
|
|
| if uploaded_file is not None: |
| if uploaded_file.name not in st.session_state["processed_files"]: |
| with st.spinner("Analyzing and indexing document..."): |
| tmp_path = None |
| try: |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file: |
| tmp_file.write(uploaded_file.getbuffer()) |
| tmp_path = tmp_file.name |
|
|
| loader = PyPDFLoader(tmp_path) |
| documents = loader.load() |
|
|
| splitter = RecursiveCharacterTextSplitter( |
| chunk_size=700, |
| chunk_overlap=100, |
| ) |
| splits = splitter.split_documents(documents) |
| vectorstore.add_documents(splits) |
| |
| st.session_state["processed_files"].add(uploaded_file.name) |
| st.success("Document added to knowledge base.") |
| except Exception as exc: |
| st.error(f"Indexing Error: {exc}") |
| finally: |
| if tmp_path and os.path.exists(tmp_path): |
| os.remove(tmp_path) |
| else: |
| st.info(f"'{uploaded_file.name}' is indexed.") |
|
|
| |
| st.title(f"π {APP_TITLE}") |
| st.markdown(f"*{APP_SUBTITLE}*") |
|
|
| |
| if "messages" not in st.session_state: |
| st.session_state["messages"] = [] |
|
|
| |
| api_key = os.environ.get("GROQ_API_KEY") |
| if not api_key: |
| st.warning("β οΈ Backend connection not established. Please check your configuration.") |
| return |
|
|
| try: |
| llm = get_llm(api_key) |
| except Exception as exc: |
| st.error(f"Intelligence Engine Error: {exc}") |
| return |
|
|
| |
| for message in st.session_state["messages"]: |
| with st.chat_message(message["role"]): |
| st.markdown(message["content"]) |
|
|
| |
| user_input = st.chat_input("Ask anything about your studies...") |
|
|
| if user_input: |
| st.session_state["messages"].append({"role": "user", "content": user_input}) |
| with st.chat_message("user"): |
| st.markdown(user_input) |
|
|
| with st.chat_message("assistant"): |
| placeholder = st.empty() |
| placeholder.markdown("π Analyzing documents...") |
|
|
| try: |
| |
| docs = vectorstore.similarity_search(user_input, k=4) |
| |
| if not docs: |
| answer = "I couldn't find any relevant information in your current study materials." |
| else: |
| context = build_context(docs) |
| filled_prompt = PROMPT_TEMPLATE.format(context=context, question=user_input) |
| |
| response = llm.invoke(filled_prompt) |
| answer = response.content |
| |
| placeholder.markdown(answer) |
| st.session_state["messages"].append({"role": "assistant", "content": answer}) |
|
|
| except Exception as exc: |
| placeholder.markdown(f"β οΈ Service interruption: {exc}") |
|
|
| if __name__ == "__main__": |
| main() |
|
|