First_Aid_Kit / app.py
NHZ's picture
Update app.py
f2ab7e6 verified
raw
history blame
2.2 kB
import os
import re
import torch
import numpy as np
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains.question_answering import load_qa_chain
from langchain.prompts import PromptTemplate
from langchain.llms import HuggingFaceHub
import streamlit as st
# Environment setup
HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")
if not HUGGINGFACEHUB_API_TOKEN:
raise ValueError("HuggingFace API Token is missing.")
# Initialize HuggingFace embeddings model
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
# Load PDF document from Google Drive
pdf_url = "https://drive.google.com/uc?id=1XvqA1OIssRs2gbmOtKFKj-02yQ5X2yg0"
loader = PyPDFLoader(pdf_url)
documents = loader.load()
# Split text into chunks
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
texts = text_splitter.split_documents(documents)
# Create FAISS vector database
db = FAISS.from_documents(texts, embeddings)
# Initialize HuggingFace LLM (example model, replace as needed)
llm = HuggingFaceHub(repo_id="bigscience/bloom", model_kwargs={"temperature": 0, "max_length": 512})
# Define custom prompt
prompt_template = """
Use the following pieces of context to answer the question at the end.
If the question cannot be answered based on the context, say "I don't know."
Context:
{context}
Question:
{question}
Answer:
"""
prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
# Load QA chain
qa_chain = load_qa_chain(llm, chain_type="stuff", prompt=prompt)
# Streamlit frontend
st.title("RAG-based Document Q&A")
st.write("Upload a document and ask questions about it.")
query = st.text_input("Enter your question:")
if query:
# Search vector database
docs = db.similarity_search(query, k=4)
# Get relevant context
context = "\n\n".join([doc.page_content for doc in docs])
# Generate answer using LLM
answer = qa_chain.run({"context": context, "question": query})
st.write("**Answer:**", answer)