import streamlit as st import os import time from langchain_community.document_loaders import WebBaseLoader try: from langchain_text_splitters import RecursiveCharacterTextSplitter except ImportError: from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_community.embeddings import HuggingFaceEmbeddings from langchain_community.vectorstores import FAISS from langchain_core.prompts import ChatPromptTemplate from langchain_groq import ChatGroq from langchain.chains.combine_documents import create_stuff_documents_chain from langchain.chains import create_retrieval_chain from dotenv import load_dotenv load_dotenv() ## Load the Groq API key groq_api_key = os.environ['GROQ_API_KEY'] # reads the system environment variable into a Python variable. # If you only need to pass the API key to a function ## st.session_state is like a box where you can store variables that will remember their values while you use the app. ## Normally, when you refresh a Streamlit app, all variables reset. if "vectors" not in st.session_state: urls = [ "https://huggingface.co/docs/transformers/index", "https://huggingface.co/docs/datasets/index", "https://huggingface.co/docs/tokenizers/index", "https://huggingface.co/docs/hub/index", "https://huggingface.co/docs/accelerate/index", "https://huggingface.co/docs/optimum/index" ] st.session_state.docs = [] for url in urls: loader = WebBaseLoader(url) st.session_state.docs.extend(loader.load()) st.session_state.text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 200) st.session_state.final_documents = st.session_state.text_splitter.split_documents(st.session_state.docs) st.session_state.embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") st.session_state.vectors = FAISS.from_documents(st.session_state.final_documents, st.session_state.embeddings) llm = ChatGroq(groq_api_key = groq_api_key, # passing the API key to the function model_name = "llama-3.1-8b-instant") prompt = ChatPromptTemplate.from_template( """ Answer the question based on the provided context only. Please provide the most accurate response based on the question {context} Question:{input} """ ) document_chain = create_stuff_documents_chain(llm, prompt) retriever = st.session_state.vectors.as_retriever() retrieval_chain = create_retrieval_chain(retriever, document_chain) # ----------------- Header Section ----------------- st.markdown(""" """, unsafe_allow_html=True) st.markdown( """

Hugging Face Documentation Assistant

""", unsafe_allow_html=True ) st.markdown('

An AI chatbot that answers questions from Hugging Face documentation using RAG & FAISS

', unsafe_allow_html=True) # ----------------- Input Section ----------------- st.markdown("### 🧠 Ask me anything about Hugging Face documentation:") prompt = st.text_input("Type your question here...", placeholder="e.g. What does the pipeline() function in Transformers do?") # ----------------- Chat Response Section ----------------- if prompt: with st.spinner("🔍 Generating answer..."): start = time.process_time() response = retrieval_chain.invoke({"input": prompt}) response_time = round(time.process_time() - start, 2) # Display answer st.markdown("### 🪄 Answer:") st.markdown(f'

{response["answer"]}

', unsafe_allow_html=True) st.markdown(f"⏱️ *Response generated in {response_time} seconds*") # Document similarity search section with st.expander("📚 View Retrieved Context Documents"): for i, doc in enumerate(response["context"]): st.markdown(f'

{doc.page_content}

', unsafe_allow_html=True) # ----------------- Footer ----------------- st.markdown("---") st.markdown( "

" "Built with ❤️ using Streamlit, LangChain, Groq API & FAISS" "

", unsafe_allow_html=True ) ## with is just a cleaner, safer way to handle objects that need automatic opening/closing. like automatic closing of files that were opened. ## st.expander("Title") creates a collapsible box in your web app that can be expanded or collapsed by clicking it. ## enumerate() is a Python function that loops over a collection and also gives you the index (count) of each item ## The with keyword tells Streamlit: "Whatever I write in this block should be inside the expander UI." ## The expander hides the document chunks by default (to keep the UI clean).