Spaces:

meesamraza
/

document_gpt

Sleeping

App Files Files Community

document_gpt / app.py

meesamraza

Create app.py

c608c63 verified about 1 year ago

raw

history blame

2.82 kB

	import streamlit as st
	from dotenv import load_dotenv
	import os
	from pinecone import Pinecone
	from langchain_google_genai import GoogleGenerativeAIEmbeddings
	from langchain_pinecone import PineconeVectorStore
	from langchain_core.documents import Document
	from uuid import uuid4
	from langchain.chains import RetrievalQA
	from langchain.llms import HuggingFaceHub

	# Load environment variables
	load_dotenv()

	# Get API keys from environment variables
	pinecone_api_key = os.getenv("PINECONE_API_KEY")
	google_api_key = os.getenv("GOOGLE_API_KEY")
	huggingfacehub_api_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")

	# Check if API keys are available
	if not pinecone_api_key or not google_api_key or not huggingfacehub_api_token:
	st.error("API keys not found. Please set PINECONE_API_KEY, GOOGLE_API_KEY, and HUGGINGFACEHUB_API_TOKEN in your .env file.")
	st.stop()

	# Initialize Pinecone
	pc = Pinecone(api_key=pinecone_api_key, environment="us-east1-gcp") # Replace with your environment if needed
	index_name = "online-rag"
	index = pc.Index(index_name)

	# Initialize embeddings
	os.environ['GOOGLE_API_KEY'] = google_api_key
	embeddings = GoogleGenerativeAIEmbeddings(model="models/text-embedding-004")

	# Initialize vector store
	vector_store = PineconeVectorStore(index=index, embedding=embeddings)

	# Initialize LLaMA 30B model from Hugging Face
	llm = HuggingFaceHub(repo_id="meta-llama/Llama-2-30b-chat-hf", huggingfacehub_api_token=huggingfacehub_api_token)

	# Streamlit app
	st.title("LLaMA 30B RAG Chatbot")

	# Upload document
	uploaded_file = st.file_uploader("Upload a document", type=["txt", "pdf"])

	if uploaded_file is not None:
	# Read the file
	file_details = {"filename": uploaded_file.name, "filetype": uploaded_file.type, "filesize": uploaded_file.size}
	st.write(file_details)
	file_content = uploaded_file.read().decode("utf-8")

	# Create a document
	document = Document(page_content=file_content, metadata={"source": uploaded_file.name})

	# Add document to vector store
	uuids = [str(uuid4()) for _ in range(1)]
	vector_store.add_documents(documents=[document], ids=uuids)
	st.write("Document added to Pinecone.")

	# Query the chatbot
	query = st.text_input("Enter your query:")

	if query:
	try:
	# Perform similarity search
	results = vector_store.similarity_search(query, k=2)
	st.write("Search Results:")
	for res in results:
	st.write(f"* {res.page_content} [{res.metadata}]")

	# Create a RetrievalQA chain
	qa_chain = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=vector_store.as_retriever())

	# Get the answer
	answer = qa_chain.run(query)
	st.write("Chatbot Response:")
	st.write(answer)

	except Exception as e:
	st.error(f"An error occurred: {e}")