Spaces:
Sleeping
Sleeping
| import os | |
| import logging | |
| import time | |
| from dotenv import load_dotenv | |
| import streamlit as st | |
| from PyPDF2 import PdfReader | |
| from langchain.text_splitter import CharacterTextSplitter | |
| from langchain.embeddings import HuggingFaceEmbeddings | |
| from langchain_community.vectorstores import FAISS | |
| from langchain.memory import ConversationBufferMemory | |
| from langchain.chains import ConversationalRetrievalChain | |
| from langchain_groq import ChatGroq | |
| # -------------------------- | |
| # Load environment variables | |
| # -------------------------- | |
| load_dotenv() | |
| # -------------------------- | |
| # Logging configuration | |
| # -------------------------- | |
| logging.basicConfig( | |
| level=logging.INFO, | |
| format='%(asctime)s - %(levelname)s - %(message)s' | |
| ) | |
| # -------------------------- | |
| # PDF text extraction | |
| # -------------------------- | |
| def get_pdf_text(pdf_docs): | |
| text = "" | |
| page_count = 0 | |
| for pdf in pdf_docs: | |
| pdf_reader = PdfReader(pdf) | |
| page_count += len(pdf_reader.pages) | |
| for page in pdf_reader.pages: | |
| extracted_text = page.extract_text() | |
| if extracted_text: | |
| text += extracted_text + "\n" | |
| return text, page_count | |
| # -------------------------- | |
| # Text chunking | |
| # -------------------------- | |
| def get_text_chunks(text): | |
| text_splitter = CharacterTextSplitter( | |
| separator="\n", | |
| chunk_size=1000, | |
| chunk_overlap=200, | |
| length_function=len | |
| ) | |
| return text_splitter.split_text(text) | |
| # -------------------------- | |
| # FAISS VectorStore creation | |
| # -------------------------- | |
| def get_vectorstore(text_chunks): | |
| embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") | |
| return FAISS.from_texts(texts=text_chunks, embedding=embeddings) | |
| # -------------------------- | |
| # Conversation chain | |
| # -------------------------- | |
| def get_conversation_chain(vectorstore): | |
| llm = ChatGroq(model="llama-3.3-70b-versatile", temperature=0.5) | |
| memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True) | |
| return ConversationalRetrievalChain.from_llm( | |
| llm=llm, | |
| retriever=vectorstore.as_retriever(), | |
| memory=memory | |
| ) | |
| # -------------------------- | |
| # Handle user input | |
| # -------------------------- | |
| def handle_userinput(user_question): | |
| if st.session_state.conversation is not None: | |
| start_time = time.time() | |
| with st.spinner("π€ Thinking..."): | |
| response = st.session_state.conversation({'question': user_question}) | |
| elapsed_time = round(time.time() - start_time, 2) | |
| # Show response only (no chat history) | |
| st.markdown(f"**π€ Bot:** {response['answer']}") | |
| st.info(f"β± Response Time: {elapsed_time}s | π Words: {len(response['answer'].split())}") | |
| else: | |
| st.warning("β Please process the documents first.") | |
| # -------------------------- | |
| # Main Streamlit App | |
| # -------------------------- | |
| def main(): | |
| st.set_page_config(page_title="InfinaDocs Knowledge Sphere", page_icon="π", layout="wide") | |
| st.title("π InfinaDocs Knowledge Sphere") | |
| st.markdown("Chat with your documents using **LLaMA 3.3** and **Groq AI**. π") | |
| # Session state initialization | |
| if "conversation" not in st.session_state: | |
| st.session_state.conversation = None | |
| if "pages_processed" not in st.session_state: | |
| st.session_state.pages_processed = 0 | |
| # Sidebar - Upload PDFs | |
| with st.sidebar: | |
| st.header("π Upload & Process") | |
| pdf_docs = st.file_uploader("Upload PDFs", accept_multiple_files=True, type=["pdf"]) | |
| if st.button("π Process Documents"): | |
| if pdf_docs: | |
| with st.spinner("π Reading & Processing..."): | |
| raw_text, page_count = get_pdf_text(pdf_docs) | |
| st.session_state.pages_processed = page_count | |
| if raw_text.strip(): | |
| text_chunks = get_text_chunks(raw_text) | |
| vectorstore = get_vectorstore(text_chunks) | |
| st.session_state.conversation = get_conversation_chain(vectorstore) | |
| st.success(f"β {len(pdf_docs)} file(s) processed | π {page_count} pages") | |
| else: | |
| st.error("No valid text found in PDFs.") | |
| else: | |
| st.warning("Please upload at least one PDF.") | |
| # Main Chat Section | |
| st.subheader("π¬ Ask a Question") | |
| user_question = st.text_input("Type your question here...") | |
| if st.button("Submit Question"): | |
| if user_question.strip(): | |
| handle_userinput(user_question) | |
| else: | |
| st.warning("Please enter a question before submitting.") | |
| if __name__ == '__main__': | |
| main() | |