maclenn77 commited on
Commit
ada0a19
·
unverified ·
1 Parent(s): 9163cdf

refactor: Separate app in modules (#9)

Browse files

* refactor: Separate app in modules

* enhancement: Create a sidebar

Files changed (5) hide show
  1. app.py +20 -45
  2. src/chroma_client.py +35 -0
  3. src/gui_messages.py +10 -0
  4. src/openai_client.py +13 -0
  5. src/settings.py +10 -0
app.py CHANGED
@@ -1,45 +1,34 @@
1
  """ A simple example of Streamlit. """
2
- from datetime import datetime as Date
3
  import textwrap
4
  import os
5
  import tiktoken
6
- import chromadb
7
- from chromadb.utils.embedding_functions import OpenAIEmbeddingFunction
8
  import fitz
9
  import streamlit as st
10
  import openai
11
  from dotenv import load_dotenv
12
- from openai import OpenAI
 
 
13
 
14
  load_dotenv()
15
 
16
- chroma_client = chromadb.PersistentClient(path="tmp/chroma")
17
- chroma_client.heartbeat()
18
-
19
-
20
- def api_message(api_key):
21
- """Inform if the api key is set."""
22
- if api_key is None:
23
- return st.warning("Add your OpenAI API key")
24
-
25
- return st.success("Your API key is setup ")
26
-
27
 
28
  def set_api_key():
29
  """Set the OpenAI API key."""
30
  openai.api_key = st.session_state.api_key
31
- st.session_state.api_message = api_message(openai.api_key)
32
 
33
 
34
  openai.api_key = os.getenv("OPENAI_API_KEY")
35
 
36
  if "api_message" not in st.session_state:
37
- st.session_state.api_message = api_message(openai.api_key)
38
 
39
- if os.getenv("OPENAI_API_KEY") is None:
40
- message = st.session_state.api_message
 
41
  openai.api_key = st.text_input(
42
- "Enter your OpenAI API key",
43
  value="",
44
  type="password",
45
  key="api_key",
@@ -47,23 +36,13 @@ if os.getenv("OPENAI_API_KEY") is None:
47
  on_change=set_api_key,
48
  label_visibility="collapsed",
49
  )
50
- st.write("You can find your API key at https://beta.openai.com/account/api-keys")
51
- client = OpenAI(api_key=openai.api_key)
52
- embedding_function = OpenAIEmbeddingFunction(
53
- api_key=openai.api_key, model_name="text-embedding-ada-002"
54
- )
55
- collection = chroma_client.get_or_create_collection(
56
- name="pdf-explainer", embedding_function=embedding_function
57
- )
58
- else:
59
- client = OpenAI()
60
- embedding_function = OpenAIEmbeddingFunction(
61
- api_key=openai.api_key, model_name="text-embedding-ada-002"
62
- )
63
- collection = chroma_client.get_or_create_collection(
64
- name="pdf-explainer", embedding_function=embedding_function
65
  )
66
 
 
 
 
67
 
68
  # Query ChromaDb
69
  query = st.text_input(
@@ -90,14 +69,10 @@ pdf = st.file_uploader("Upload a file", type="pdf")
90
 
91
  if pdf is not None:
92
  with fitz.open(stream=pdf.read(), filetype="pdf") as doc: # open document
93
- text = chr(12).join([page.get_text() for page in doc])
94
- st.write(text[0:200])
95
- if st.button("Add to collection"):
96
- collection.add(
97
- documents=[text],
98
- metadatas=[{"source": pdf.name}],
99
- ids=[pdf.name + str(Date.now())],
100
- )
101
  if st.button("Save chunks"):
102
  with st.spinner("Saving chunks..."):
103
  chunks = textwrap.wrap(text, 3000)
@@ -105,7 +80,7 @@ if pdf is not None:
105
  encoding = tiktoken.get_encoding("cl100k_base")
106
  num_tokens = len(encoding.encode(chunk))
107
  response = (
108
- client.embeddings.create(
109
  input=chunk, model="text-embedding-ada-002"
110
  )
111
  .data[0]
@@ -125,6 +100,6 @@ if st.button("Chroma data collection"):
125
 
126
  if st.button("Delete Chroma Collection"):
127
  try:
128
- chroma_client.delete_collection(collection.name)
129
  except AttributeError:
130
  st.error("Collection erased.")
 
1
  """ A simple example of Streamlit. """
 
2
  import textwrap
3
  import os
4
  import tiktoken
 
 
5
  import fitz
6
  import streamlit as st
7
  import openai
8
  from dotenv import load_dotenv
9
+ from src.chroma_client import ChromaDB
10
+ import src.gui_messages as gm
11
+ from src import settings
12
 
13
  load_dotenv()
14
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
  def set_api_key():
17
  """Set the OpenAI API key."""
18
  openai.api_key = st.session_state.api_key
19
+ st.session_state.api_message = gm.api_message(openai.api_key)
20
 
21
 
22
  openai.api_key = os.getenv("OPENAI_API_KEY")
23
 
24
  if "api_message" not in st.session_state:
25
+ st.session_state.api_message = gm.api_message(openai.api_key)
26
 
27
+ # Sidebar
28
+ with st.sidebar:
29
+ st.write("## OpenAI API key")
30
  openai.api_key = st.text_input(
31
+ "Enter OpenAI API key",
32
  value="",
33
  type="password",
34
  key="api_key",
 
36
  on_change=set_api_key,
37
  label_visibility="collapsed",
38
  )
39
+ st.write(
40
+ "You can find your API key at https://platform.openai.com/account/api-keys"
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  )
42
 
43
+ # Build settings
44
+ chroma_db = ChromaDB(openai.api_key)
45
+ openai_client, collection = settings.build(chroma_db)
46
 
47
  # Query ChromaDb
48
  query = st.text_input(
 
69
 
70
  if pdf is not None:
71
  with fitz.open(stream=pdf.read(), filetype="pdf") as doc: # open document
72
+ with st.spinner("Extracting text..."):
73
+ text = chr(12).join([page.get_text() for page in doc])
74
+ st.subheader("Text preview")
75
+ st.write(text[0:300] + "...")
 
 
 
 
76
  if st.button("Save chunks"):
77
  with st.spinner("Saving chunks..."):
78
  chunks = textwrap.wrap(text, 3000)
 
80
  encoding = tiktoken.get_encoding("cl100k_base")
81
  num_tokens = len(encoding.encode(chunk))
82
  response = (
83
+ openai_client.embeddings.create(
84
  input=chunk, model="text-embedding-ada-002"
85
  )
86
  .data[0]
 
100
 
101
  if st.button("Delete Chroma Collection"):
102
  try:
103
+ chroma_db.client.delete_collection(collection.name)
104
  except AttributeError:
105
  st.error("Collection erased.")
src/chroma_client.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """A client for ChromaDB."""
2
+ import chromadb
3
+ from chromadb.utils.embedding_functions import OpenAIEmbeddingFunction
4
+ import streamlit as st
5
+
6
+
7
+ class ChromaDB:
8
+ """A class for creating a client for ChromaDB."""
9
+
10
+ def __init__(self, api_key, path="tmp/chroma"):
11
+ """Initialize the client."""
12
+ self.client = chromadb.PersistentClient(path=path)
13
+ self.api_key = api_key
14
+ self.client.heartbeat()
15
+
16
+ def get_collection(self, name):
17
+ """Get a Chroma collection."""
18
+ try:
19
+ collection = self.client.get_collection(name=name)
20
+ return collection
21
+ except AttributeError:
22
+ return st.error("An error ocurred while getting the collection.")
23
+
24
+ def create_collection(self, name):
25
+ """Create a Chroma collection."""
26
+ try:
27
+ embedding_function = OpenAIEmbeddingFunction(
28
+ api_key=self.api_key, model_name="text-embedding-ada-002"
29
+ )
30
+ collection = self.client.get_or_create_collection(
31
+ name=name, embedding_function=embedding_function
32
+ )
33
+ return collection
34
+ except AttributeError:
35
+ return st.error("An error ocurred while creating the collection.")
src/gui_messages.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ """Streamlit GUI messages."""
2
+ import streamlit as st
3
+
4
+
5
+ def api_message(api_key):
6
+ """Inform if the api key is set."""
7
+ if api_key is None:
8
+ return st.warning("Add your OpenAI API key")
9
+
10
+ return st.success("Your API key is setup ")
src/openai_client.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """OpenAI client creator."""
2
+ import os
3
+ from openai import OpenAI
4
+
5
+
6
+ def create_client(api_key=None):
7
+ """Create an OpenAI client."""
8
+ if os.getenv("OPENAI_API_KEY"):
9
+ api_key = os.getenv("OPENAI_API_KEY")
10
+
11
+ client = OpenAI(api_key=api_key)
12
+
13
+ return client
src/settings.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ """Build settings for the app."""
2
+ from src.openai_client import create_client
3
+
4
+
5
+ def build(chroma_db):
6
+ """Build the app."""
7
+ openai_client = create_client(chroma_db.api_key)
8
+ collection = chroma_db.create_collection("pdf-explainer")
9
+
10
+ return openai_client, collection