File size: 4,665 Bytes
5fe0d59 36a4621 7904d7e 36a4621 7904d7e 93ec41a 7904d7e 36a4621 785f217 36a4621 7904d7e 36a4621 7904d7e 36a4621 7904d7e 36a4621 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 |
#from utils.credentials import check_credentials, init_clients
import os
import streamlit as st
from langchain.chains import RetrievalQA
from PyPDF2 import PdfReader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.callbacks.base import BaseCallbackHandler
from langchain.vectorstores.neo4j_vector import Neo4jVector
from streamlit.logger import get_logger
from chains import (
load_embedding_model,
load_llm,
)
from pymongo import MongoClient
import certifi
#url = os.getenv("NEO4J_URI")
#username = os.getenv("NEO4J_USERNAME")
#password = os.getenv("NEO4J_PASSWORD")
#url = os.getenv("MONGO_URI")
#username = os.getenv("NEO4J_USERNAME")
#password = os.getenv("NEO4J_PASSWORD")
import os
from pymongo import MongoClient
from openai import OpenAI
#from dotenv import load_dotenv
# Load environment variables
#load_dotenv()
# Initialize clients
#openai_client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
#atlas_uri = os.getenv("ATLAS_URI")
#url = atlas_uri
#client = MongoClient(atlas_uri)
import requests
from pymongo import MongoClient
import certifi
# Connect to MongoDB Atlas
#client = MongoClient(atlas_uri,tls=True,tlsCAFile=certifi.where())
#db = client['sample_mflix']
#collection = db['embedded_movies']
ollama_base_url = os.getenv("OLLAMA_BASE_URL")
embedding_model_name = os.getenv("EMBEDDING_MODEL", "SentenceTransformer" )
llm_name = os.getenv("LLM", "llama2")
#url = os.getenv("NEO4J_URI")
# Check if the required environment variables are set
#if not all([url, username, password,
# ollama_base_url]):
if not all([
ollama_base_url]):
st.write("The application requires some information before running.")
with st.form("connection_form"):
#url = st.text_input("Enter ATLAS_URI",)
#username = st.text_input("Enter NEO4J_USERNAME")
#password = st.text_input("Enter NEO4J_PASSWORD", type="password")
ollama_base_url = st.text_input("Enter OLLAMA_BASE_URL")
st.markdown("Only enter the OPENAI_APIKEY to use OpenAI instead of Ollama. Leave blank to use Ollama.")
openai_apikey = st.text_input("Enter OPENAI_API_KEY", type="password")
submit_button = st.form_submit_button("Submit")
if submit_button:
#if not all([url, username, password, ]):
#if not all([url, ]):
# st.write("Enter the ATLAS information.")
if not (ollama_base_url or openai_apikey):
st.write("Enter the Ollama URL or OpenAI API Key.")
if openai_apikey:
llm_name = "gpt-3.5"
os.environ['OPENAI_API_KEY'] = openai_apikey
#os.environ["NEO4J_URL"] = url
#os.environ["ATLAS_URI"] = url
logger = get_logger(__name__)
embeddings, dimension = load_embedding_model(
embedding_model_name, config={"ollama_base_url": ollama_base_url}, logger=logger
)
class StreamHandler(BaseCallbackHandler):
def __init__(self, container, initial_text=""):
self.container = container
self.text = initial_text
def on_llm_new_token(self, token: str, **kwargs) -> None:
self.text += token
self.container.markdown(self.text)
llm = load_llm(llm_name, logger=logger, config={"ollama_base_url": ollama_base_url})
def main():
st.header("📄Chat with your pdf file")
# upload a your pdf file
pdf = st.file_uploader("Upload your PDF", type="pdf")
if pdf is not None:
pdf_reader = PdfReader(pdf)
text = ""
for page in pdf_reader.pages:
text += page.extract_text()
# langchain_textspliter
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1000, chunk_overlap=200, length_function=len
)
chunks = text_splitter.split_text(text=text)
# Store the chunks part in db (vector)
vectorstore = Neo4jVector.from_texts(
chunks,
url=url,
username=username,
password=password,
embedding=embeddings,
index_name="pdf_bot",
node_label="PdfBotChunk",
pre_delete_collection=True, # Delete existing PDF data
)
qa = RetrievalQA.from_chain_type(
llm=llm, chain_type="stuff", retriever=vectorstore.as_retriever()
)
# Accept user questions/query
query = st.text_input("Ask questions about your PDF file")
if query:
stream_handler = StreamHandler(st.empty())
qa.run(query, callbacks=[stream_handler])
if __name__ == "__main__":
main()
|