Spaces:
Sleeping
Sleeping
| import os | |
| import PyPDF2 | |
| import faiss | |
| import streamlit as st | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain.embeddings import HuggingFaceEmbeddings | |
| from langchain.vectorstores import FAISS | |
| from groq import Groq | |
| # Initialize Groq API | |
| GROQ_API_KEY = "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" | |
| client = Groq(api_key=GROQ_API_KEY) | |
| # Function to extract text from PDF | |
| def extract_text_from_pdf(pdf_path): | |
| text = "" | |
| with open(pdf_path, "rb") as file: | |
| reader = PyPDF2.PdfReader(file) | |
| for page in reader.pages: | |
| text += page.extract_text() | |
| return text | |
| # Function to create chunks and embeddings using LangChain | |
| def process_text_with_langchain(text): | |
| # Split text into chunks | |
| text_splitter = RecursiveCharacterTextSplitter( | |
| chunk_size=500, chunk_overlap=50 | |
| ) | |
| chunks = text_splitter.split_text(text) | |
| # Create embeddings and FAISS index | |
| embeddings = HuggingFaceEmbeddings() | |
| vectorstore = FAISS.from_texts(chunks, embeddings) | |
| return vectorstore, chunks | |
| # Function to query FAISS index | |
| def query_faiss_index(query, vectorstore): | |
| docs = vectorstore.similarity_search(query, k=3) | |
| results = [doc.page_content for doc in docs] | |
| return results | |
| # Function to interact with Groq LLM | |
| def ask_groq(query): | |
| chat_completion = client.chat.completions.create( | |
| messages=[ | |
| { | |
| "role": "user", | |
| "content": query, | |
| } | |
| ], | |
| model="llama3-8b-8192", | |
| stream=False, | |
| ) | |
| return chat_completion.choices[0].message.content | |
| # Streamlit app | |
| st.title("RAG-Based Chatbot") | |
| uploaded_file = st.file_uploader("Upload a PDF file", type="pdf") | |
| if uploaded_file is not None: | |
| with open("uploaded_file.pdf", "wb") as f: | |
| f.write(uploaded_file.read()) | |
| st.info("Processing the PDF...") | |
| text = extract_text_from_pdf("uploaded_file.pdf") | |
| vectorstore, chunks = process_text_with_langchain(text) | |
| st.success("PDF processed and indexed successfully!") | |
| query = st.text_input("Ask a question about the document") | |
| if query: | |
| st.info("Searching relevant chunks...") | |
| relevant_chunks = query_faiss_index(query, vectorstore) | |
| context = "\n".join(relevant_chunks) | |
| st.info("Getting response from the language model...") | |
| response = ask_groq(f"Context: {context}\n\nQuestion: {query}") | |
| st.success(response) | |