Spaces:
Sleeping
Sleeping
| import os | |
| import streamlit as st | |
| from dotenv import load_dotenv | |
| # LangChain + Hugging Face + Groq | |
| from langchain_chroma import Chroma | |
| from langchain_community.chat_message_histories import ChatMessageHistory | |
| from langchain_core.chat_history import BaseChatMessageHistory | |
| from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder | |
| from langchain_text_splitters import RecursiveCharacterTextSplitter | |
| from langchain_community.document_loaders import PyPDFLoader | |
| from langchain_groq import ChatGroq | |
| from langchain_huggingface import HuggingFaceEmbeddings | |
| # Load environment variables | |
| load_dotenv() | |
| # Hugging Face token | |
| #HUGGINGFACEHUB_API_TOKEN = st.secrets["HF_TOKEN"] | |
| #st.secrets["HF_TOKEN"] | |
| # Streamlit UI | |
| st.title("Conversational RAG with PDF Uploads") | |
| st.write("Upload PDFs and chat with their content using Hugging Face embeddings + Groq LLM") | |
| # Groq API key input | |
| api_key = st.text_input("Enter your Groq API key:", type="password") | |
| if api_key: | |
| # Initialize Groq LLM (use supported model) | |
| llm = ChatGroq(groq_api_key=api_key, model_name="llama-3.3-70b-versatile") | |
| # Session management | |
| session_id = st.text_input("Session ID", value="default_session") | |
| if "store" not in st.session_state: | |
| st.session_state.store = {} | |
| # File uploader | |
| uploaded_files = st.file_uploader("Choose PDF files", type="pdf", accept_multiple_files=True) | |
| if uploaded_files: | |
| documents = [] | |
| for uploaded_file in uploaded_files: | |
| temppdf = "./temp.pdf" | |
| with open(temppdf, "wb") as file: | |
| file.write(uploaded_file.getvalue()) | |
| loader = PyPDFLoader(temppdf) | |
| docs = loader.load() | |
| documents.extend(docs) | |
| # Split and embed | |
| text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=200) | |
| splits = text_splitter.split_documents(documents) | |
| embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L12-v2") | |
| vectorstore = Chroma.from_documents(documents=splits, embedding=embeddings) | |
| retriever = vectorstore.as_retriever() | |
| # Prompt template | |
| qa_prompt = ChatPromptTemplate.from_messages([ | |
| ("system", "Use retrieved context to answer concisely:\n\n{context}"), | |
| MessagesPlaceholder("chat_history"), | |
| ("human", "{input}"), | |
| ]) | |
| def rag_chain(question, chat_history): | |
| retrieved_docs = retriever.invoke(question) | |
| context = "\n".join([doc.page_content for doc in retrieved_docs]) | |
| prompt = qa_prompt.format(chat_history=chat_history, input=question, context=context) | |
| return llm.invoke(prompt) | |
| def get_session_history(session: str) -> BaseChatMessageHistory: | |
| if session not in st.session_state.store: | |
| st.session_state.store[session] = ChatMessageHistory() | |
| return st.session_state.store[session] | |
| # User input | |
| user_input = st.text_input("Your question:") | |
| if user_input: | |
| session_history = get_session_history(session_id) | |
| response = rag_chain(user_input, session_history.messages) | |
| session_history.add_user_message(user_input) | |
| session_history.add_ai_message(response.content) | |
| st.write("Assistant:", response.content) | |
| st.write("Chat History:", session_history.messages) | |
| else: | |
| st.warning("Please enter your Groq API Key") | |