Spaces:
Sleeping
Sleeping
| import os | |
| import re | |
| import torch | |
| import numpy as np | |
| from langchain.vectorstores import FAISS | |
| from langchain.embeddings import HuggingFaceEmbeddings | |
| from langchain.document_loaders import PyPDFLoader | |
| from langchain.text_splitter import CharacterTextSplitter | |
| from langchain.chains.question_answering import load_qa_chain | |
| from langchain.prompts import PromptTemplate | |
| from langchain.llms import HuggingFaceHub | |
| import streamlit as st | |
| # Environment setup | |
| HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN") | |
| if not HUGGINGFACEHUB_API_TOKEN: | |
| raise ValueError("HuggingFace API Token is missing.") | |
| # Initialize HuggingFace embeddings model | |
| embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") | |
| # Load PDF document from Google Drive | |
| pdf_url = "https://drive.google.com/uc?id=1XvqA1OIssRs2gbmOtKFKj-02yQ5X2yg0" | |
| loader = PyPDFLoader(pdf_url) | |
| documents = loader.load() | |
| # Split text into chunks | |
| text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200) | |
| texts = text_splitter.split_documents(documents) | |
| # Create FAISS vector database | |
| db = FAISS.from_documents(texts, embeddings) | |
| # Initialize HuggingFace LLM (example model, replace as needed) | |
| llm = HuggingFaceHub(repo_id="bigscience/bloom", model_kwargs={"temperature": 0, "max_length": 512}) | |
| # Define custom prompt | |
| prompt_template = """ | |
| Use the following pieces of context to answer the question at the end. | |
| If the question cannot be answered based on the context, say "I don't know." | |
| Context: | |
| {context} | |
| Question: | |
| {question} | |
| Answer: | |
| """ | |
| prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"]) | |
| # Load QA chain | |
| qa_chain = load_qa_chain(llm, chain_type="stuff", prompt=prompt) | |
| # Streamlit frontend | |
| st.title("RAG-based Document Q&A") | |
| st.write("Upload a document and ask questions about it.") | |
| query = st.text_input("Enter your question:") | |
| if query: | |
| # Search vector database | |
| docs = db.similarity_search(query, k=4) | |
| # Get relevant context | |
| context = "\n\n".join([doc.page_content for doc in docs]) | |
| # Generate answer using LLM | |
| answer = qa_chain.run({"context": context, "question": query}) | |
| st.write("**Answer:**", answer) | |