aniruddhdoki commited on
Commit
5516850
·
1 Parent(s): 8b03e78

created a new class to interface with chromadb, huggingface dataset repo to persist database

Browse files
Files changed (4) hide show
  1. .streamlit/secrets.toml +3 -1
  2. README.md +1 -0
  3. app.py +7 -10
  4. utils/ingest.py +8 -0
.streamlit/secrets.toml CHANGED
@@ -1,4 +1,6 @@
1
  LANGCHAIN_TRACING_V2=true
2
  LANGCHAIN_ENDPOINT="https://api.smith.langchain.com"
3
  LANGCHAIN_API_KEY="ls__3382b1f40a7f4eefa6959cb2b03dd687"
4
- LANGCHAIN_PROJECT="ConsultAI v1"
 
 
 
1
  LANGCHAIN_TRACING_V2=true
2
  LANGCHAIN_ENDPOINT="https://api.smith.langchain.com"
3
  LANGCHAIN_API_KEY="ls__3382b1f40a7f4eefa6959cb2b03dd687"
4
+ LANGCHAIN_PROJECT="ConsultAI v1"
5
+ OPENAI_API_KEY = "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"
6
+ APIFY_CLIENT_KEY = "apify_api_GhFIqZgUf2BGqO46OdBcQOyk2rekQt0ns3Wv"
README.md CHANGED
@@ -14,3 +14,4 @@ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-
14
  # TODO
15
  1. Add caching for links so as to not process the same link multiple times
16
  2. Figure out how to disable user input while its processing so that it doesn't get interupted (entire script reruns on every interaction so itll even rerun in the middle of execution)
 
 
14
  # TODO
15
  1. Add caching for links so as to not process the same link multiple times
16
  2. Figure out how to disable user input while its processing so that it doesn't get interupted (entire script reruns on every interaction so itll even rerun in the middle of execution)
17
+ 3. list out all of the files and links in the database each run
app.py CHANGED
@@ -9,24 +9,21 @@ from bs4 import BeautifulSoup
9
  from apify_client import ApifyClient
10
  from pprint import pprint
11
  from utils.split import split
 
12
 
13
  ## NOTE: STREAMLIT RUNS THE ENTIRE SCRIPT FROM TOP TO BOTTOM ON EVERY USER INTERACTION
14
 
15
  ## streamlit ui
16
- st.title("ConsultAI")
17
  st.subheader("STILL IN DEVELOPMENT. DO NOT USE 'UPLOAD FILES' FEATURE IN SIDEBAR YET. IF NO DATA CAN BE SEEN IN DATABASE CONTACT ME.")
18
  openai_api_key = st.text_input("Enter your OpenAI API key here:")
19
 
20
- openai.api_key = openai_api_key
21
- if not openai_api_key:
22
- st.write("Please enter your OpenAI API key above")
23
- st.stop()
24
 
25
- # create vectorstore globally (persist across devices?)
26
- @st.cache_resource
27
- def initialize_db():
28
- client = chromadb.Client()
29
- return client
30
  db = initialize_db()
31
 
32
  # scrape links
 
9
  from apify_client import ApifyClient
10
  from pprint import pprint
11
  from utils.split import split
12
+ from utils.db import initialize_db
13
 
14
  ## NOTE: STREAMLIT RUNS THE ENTIRE SCRIPT FROM TOP TO BOTTOM ON EVERY USER INTERACTION
15
 
16
  ## streamlit ui
17
+ st.title("Atticus")
18
  st.subheader("STILL IN DEVELOPMENT. DO NOT USE 'UPLOAD FILES' FEATURE IN SIDEBAR YET. IF NO DATA CAN BE SEEN IN DATABASE CONTACT ME.")
19
  openai_api_key = st.text_input("Enter your OpenAI API key here:")
20
 
21
+ # openai.api_key = openai_api_key
22
+ # if not openai_api_key:
23
+ # st.write("Please enter your OpenAI API key above")
24
+ # st.stop()
25
 
26
+ # create vectorstore globally (persists data across runs)
 
 
 
 
27
  db = initialize_db()
28
 
29
  # scrape links
utils/ingest.py CHANGED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ class Ingest():
4
+ files = [] # list of UploadedFile objects, subclass of BytesIO
5
+ db = None # chromadb.Client object
6
+
7
+
8
+