document_gpt / app.py
meesamraza's picture
Create app.py
c608c63 verified
raw
history blame
2.82 kB
import streamlit as st
from dotenv import load_dotenv
import os
from pinecone import Pinecone
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_pinecone import PineconeVectorStore
from langchain_core.documents import Document
from uuid import uuid4
from langchain.chains import RetrievalQA
from langchain.llms import HuggingFaceHub
# Load environment variables
load_dotenv()
# Get API keys from environment variables
pinecone_api_key = os.getenv("PINECONE_API_KEY")
google_api_key = os.getenv("GOOGLE_API_KEY")
huggingfacehub_api_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
# Check if API keys are available
if not pinecone_api_key or not google_api_key or not huggingfacehub_api_token:
st.error("API keys not found. Please set PINECONE_API_KEY, GOOGLE_API_KEY, and HUGGINGFACEHUB_API_TOKEN in your .env file.")
st.stop()
# Initialize Pinecone
pc = Pinecone(api_key=pinecone_api_key, environment="us-east1-gcp") # Replace with your environment if needed
index_name = "online-rag"
index = pc.Index(index_name)
# Initialize embeddings
os.environ['GOOGLE_API_KEY'] = google_api_key
embeddings = GoogleGenerativeAIEmbeddings(model="models/text-embedding-004")
# Initialize vector store
vector_store = PineconeVectorStore(index=index, embedding=embeddings)
# Initialize LLaMA 30B model from Hugging Face
llm = HuggingFaceHub(repo_id="meta-llama/Llama-2-30b-chat-hf", huggingfacehub_api_token=huggingfacehub_api_token)
# Streamlit app
st.title("LLaMA 30B RAG Chatbot")
# Upload document
uploaded_file = st.file_uploader("Upload a document", type=["txt", "pdf"])
if uploaded_file is not None:
# Read the file
file_details = {"filename": uploaded_file.name, "filetype": uploaded_file.type, "filesize": uploaded_file.size}
st.write(file_details)
file_content = uploaded_file.read().decode("utf-8")
# Create a document
document = Document(page_content=file_content, metadata={"source": uploaded_file.name})
# Add document to vector store
uuids = [str(uuid4()) for _ in range(1)]
vector_store.add_documents(documents=[document], ids=uuids)
st.write("Document added to Pinecone.")
# Query the chatbot
query = st.text_input("Enter your query:")
if query:
try:
# Perform similarity search
results = vector_store.similarity_search(query, k=2)
st.write("Search Results:")
for res in results:
st.write(f"* {res.page_content} [{res.metadata}]")
# Create a RetrievalQA chain
qa_chain = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=vector_store.as_retriever())
# Get the answer
answer = qa_chain.run(query)
st.write("Chatbot Response:")
st.write(answer)
except Exception as e:
st.error(f"An error occurred: {e}")