import streamlit as st import PyPDF2 from groq import Groq import os # Set up Groq API key os.environ["GROQ_API_KEY"] = "myKey" # Initialize Groq client client = Groq(api_key=os.environ.get("GROQ_API_KEY")) # Function to extract PDF content def extract_pdf_content(pdf_file): pdf_text = "" reader = PyPDF2.PdfReader(pdf_file) for page in reader.pages: pdf_text += page.extract_text() return pdf_text # Function to chunk text def chunk_text(text, chunk_size=1000, overlap=200): chunks = [] start = 0 while start < len(text): end = start + chunk_size chunk = text[start:end] chunks.append(chunk) start += chunk_size - overlap return chunks # Function to find relevant chunks def find_relevant_chunks(chunks, query, num_chunks=3): return chunks[:num_chunks] # Simple retrieval # Chatbot function def chatbot_response(user_query, chunks): relevant_chunks = find_relevant_chunks(chunks, user_query) combined_context = "\n\n".join(relevant_chunks) context = f"PDF Content:\n{combined_context}\n\nUser Query: {user_query}" chat_completion = client.chat.completions.create( messages=[{"role": "user", "content": context}], model="llama-3.3-70b-versatile", ) return chat_completion.choices[0].message.content # Streamlit UI st.title("PDF Query Chatbot") st.write("Upload a PDF and ask questions based on its content.") # File upload pdf_file = st.file_uploader("Upload a PDF file", type=["pdf"]) if pdf_file: with st.spinner("Extracting content..."): pdf_content = extract_pdf_content(pdf_file) chunks = chunk_text(pdf_content) st.success("PDF content loaded successfully!") user_query = st.text_input("Ask a question about the PDF:") if user_query: with st.spinner("Fetching response..."): response = chatbot_response(user_query, chunks) st.write(f"**Chatbot Response:** {response}")