import streamlit as st from dotenv import load_dotenv import os from pinecone import Pinecone from langchain_google_genai import GoogleGenerativeAIEmbeddings from langchain_pinecone import PineconeVectorStore from langchain_core.documents import Document from uuid import uuid4 from langchain.chains import RetrievalQA from langchain.llms import HuggingFaceHub # Load environment variables load_dotenv() # Get API keys from environment variables pinecone_api_key = os.getenv("PINECONE_API_KEY") google_api_key = os.getenv("GOOGLE_API_KEY") huggingfacehub_api_token = os.getenv("HUGGINGFACEHUB_API_TOKEN") # Check if API keys are available if not pinecone_api_key or not google_api_key or not huggingfacehub_api_token: st.error("API keys not found. Please set PINECONE_API_KEY, GOOGLE_API_KEY, and HUGGINGFACEHUB_API_TOKEN in your .env file.") st.stop() # Initialize Pinecone pc = Pinecone(api_key=pinecone_api_key, environment="us-east1-gcp") # Replace with your environment if needed index_name = "online-rag" index = pc.Index(index_name) # Initialize embeddings os.environ['GOOGLE_API_KEY'] = google_api_key embeddings = GoogleGenerativeAIEmbeddings(model="models/text-embedding-004") # Initialize vector store vector_store = PineconeVectorStore(index=index, embedding=embeddings) # Initialize LLaMA 30B model from Hugging Face llm = HuggingFaceHub(repo_id="meta-llama/Llama-2-30b-chat-hf", huggingfacehub_api_token=huggingfacehub_api_token) # Streamlit app st.title("LLaMA 30B RAG Chatbot") # Upload document uploaded_file = st.file_uploader("Upload a document", type=["txt", "pdf"]) if uploaded_file is not None: # Read the file file_details = {"filename": uploaded_file.name, "filetype": uploaded_file.type, "filesize": uploaded_file.size} st.write(file_details) file_content = uploaded_file.read().decode("utf-8") # Create a document document = Document(page_content=file_content, metadata={"source": uploaded_file.name}) # Add document to vector store uuids = [str(uuid4()) for _ in range(1)] vector_store.add_documents(documents=[document], ids=uuids) st.write("Document added to Pinecone.") # Query the chatbot query = st.text_input("Enter your query:") if query: try: # Perform similarity search results = vector_store.similarity_search(query, k=2) st.write("Search Results:") for res in results: st.write(f"* {res.page_content} [{res.metadata}]") # Create a RetrievalQA chain qa_chain = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=vector_store.as_retriever()) # Get the answer answer = qa_chain.run(query) st.write("Chatbot Response:") st.write(answer) except Exception as e: st.error(f"An error occurred: {e}")