File size: 4,665 Bytes
5fe0d59
36a4621
7904d7e
36a4621
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7904d7e
 
93ec41a
 
 
 
 
 
 
 
7904d7e
36a4621
 
 
 
 
 
 
 
 
 
 
 
785f217
36a4621
 
 
7904d7e
36a4621
 
 
 
 
 
 
 
7904d7e
 
36a4621
 
 
 
 
 
 
7904d7e
36a4621
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
#from utils.credentials import check_credentials, init_clients

import os
import streamlit as st
from langchain.chains import RetrievalQA
from PyPDF2 import PdfReader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.callbacks.base import BaseCallbackHandler
from langchain.vectorstores.neo4j_vector import Neo4jVector
from streamlit.logger import get_logger
from chains import (
    load_embedding_model,
    load_llm,
)
from pymongo import MongoClient
import certifi

#url = os.getenv("NEO4J_URI")
#username = os.getenv("NEO4J_USERNAME")
#password = os.getenv("NEO4J_PASSWORD")

#url = os.getenv("MONGO_URI")
#username = os.getenv("NEO4J_USERNAME")
#password = os.getenv("NEO4J_PASSWORD")

import os
from pymongo import MongoClient
from openai import OpenAI
#from dotenv import load_dotenv

# Load environment variables
#load_dotenv()

# Initialize clients
#openai_client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
#atlas_uri = os.getenv("ATLAS_URI")
#url = atlas_uri
#client = MongoClient(atlas_uri)


import requests
from pymongo import MongoClient
import certifi

# Connect to MongoDB Atlas
#client = MongoClient(atlas_uri,tls=True,tlsCAFile=certifi.where())
#db = client['sample_mflix']
#collection = db['embedded_movies']


ollama_base_url = os.getenv("OLLAMA_BASE_URL")
embedding_model_name = os.getenv("EMBEDDING_MODEL", "SentenceTransformer" )
llm_name = os.getenv("LLM", "llama2")
#url = os.getenv("NEO4J_URI")

# Check if the required environment variables are set
#if not all([url, username, password,
 #         ollama_base_url]):
if not all([
          ollama_base_url]):
    st.write("The application requires some information before running.")
    with st.form("connection_form"):
        #url = st.text_input("Enter ATLAS_URI",)
        #username = st.text_input("Enter NEO4J_USERNAME")
        #password = st.text_input("Enter NEO4J_PASSWORD", type="password")
        ollama_base_url = st.text_input("Enter OLLAMA_BASE_URL")
        st.markdown("Only enter the OPENAI_APIKEY to use OpenAI instead of Ollama. Leave blank to use Ollama.")
        openai_apikey = st.text_input("Enter OPENAI_API_KEY", type="password")
        submit_button = st.form_submit_button("Submit")
    if submit_button:
        #if not all([url, username, password, ]):
        #if not all([url, ]):
        #    st.write("Enter the ATLAS information.")
        if not (ollama_base_url or openai_apikey):
            st.write("Enter the Ollama URL or OpenAI API Key.")
        if openai_apikey:
            llm_name = "gpt-3.5"
            os.environ['OPENAI_API_KEY'] = openai_apikey

#os.environ["NEO4J_URL"] = url
#os.environ["ATLAS_URI"] = url

logger = get_logger(__name__)

embeddings, dimension = load_embedding_model(
    embedding_model_name, config={"ollama_base_url": ollama_base_url}, logger=logger
)


class StreamHandler(BaseCallbackHandler):
    def __init__(self, container, initial_text=""):
        self.container = container
        self.text = initial_text

    def on_llm_new_token(self, token: str, **kwargs) -> None:
        self.text += token
        self.container.markdown(self.text)

llm = load_llm(llm_name, logger=logger, config={"ollama_base_url": ollama_base_url})


def main():
        st.header("📄Chat with your pdf file")

        # upload a your pdf file
        pdf = st.file_uploader("Upload your PDF", type="pdf")

        if pdf is not None:
            pdf_reader = PdfReader(pdf)

            text = ""
            for page in pdf_reader.pages:
                text += page.extract_text()

            # langchain_textspliter
            text_splitter = RecursiveCharacterTextSplitter(
                chunk_size=1000, chunk_overlap=200, length_function=len
            )

            chunks = text_splitter.split_text(text=text)

            # Store the chunks part in db (vector)
            vectorstore = Neo4jVector.from_texts(
                chunks,
                url=url,
                username=username,
                password=password,
                embedding=embeddings,
                index_name="pdf_bot",
                node_label="PdfBotChunk",
                pre_delete_collection=True,  # Delete existing PDF data
            )
            qa = RetrievalQA.from_chain_type(
                llm=llm, chain_type="stuff", retriever=vectorstore.as_retriever()
            )

            # Accept user questions/query
            query = st.text_input("Ask questions about your PDF file")

            if query:
                stream_handler = StreamHandler(st.empty())
                qa.run(query, callbacks=[stream_handler])


if __name__ == "__main__":
     main()