Spaces:
Sleeping
Sleeping
Commit ·
5516850
1
Parent(s): 8b03e78
created a new class to interface with chromadb, huggingface dataset repo to persist database
Browse files- .streamlit/secrets.toml +3 -1
- README.md +1 -0
- app.py +7 -10
- utils/ingest.py +8 -0
.streamlit/secrets.toml
CHANGED
|
@@ -1,4 +1,6 @@
|
|
| 1 |
LANGCHAIN_TRACING_V2=true
|
| 2 |
LANGCHAIN_ENDPOINT="https://api.smith.langchain.com"
|
| 3 |
LANGCHAIN_API_KEY="ls__3382b1f40a7f4eefa6959cb2b03dd687"
|
| 4 |
-
LANGCHAIN_PROJECT="ConsultAI v1"
|
|
|
|
|
|
|
|
|
| 1 |
LANGCHAIN_TRACING_V2=true
|
| 2 |
LANGCHAIN_ENDPOINT="https://api.smith.langchain.com"
|
| 3 |
LANGCHAIN_API_KEY="ls__3382b1f40a7f4eefa6959cb2b03dd687"
|
| 4 |
+
LANGCHAIN_PROJECT="ConsultAI v1"
|
| 5 |
+
OPENAI_API_KEY = "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"
|
| 6 |
+
APIFY_CLIENT_KEY = "apify_api_GhFIqZgUf2BGqO46OdBcQOyk2rekQt0ns3Wv"
|
README.md
CHANGED
|
@@ -14,3 +14,4 @@ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-
|
|
| 14 |
# TODO
|
| 15 |
1. Add caching for links so as to not process the same link multiple times
|
| 16 |
2. Figure out how to disable user input while its processing so that it doesn't get interupted (entire script reruns on every interaction so itll even rerun in the middle of execution)
|
|
|
|
|
|
| 14 |
# TODO
|
| 15 |
1. Add caching for links so as to not process the same link multiple times
|
| 16 |
2. Figure out how to disable user input while its processing so that it doesn't get interupted (entire script reruns on every interaction so itll even rerun in the middle of execution)
|
| 17 |
+
3. list out all of the files and links in the database each run
|
app.py
CHANGED
|
@@ -9,24 +9,21 @@ from bs4 import BeautifulSoup
|
|
| 9 |
from apify_client import ApifyClient
|
| 10 |
from pprint import pprint
|
| 11 |
from utils.split import split
|
|
|
|
| 12 |
|
| 13 |
## NOTE: STREAMLIT RUNS THE ENTIRE SCRIPT FROM TOP TO BOTTOM ON EVERY USER INTERACTION
|
| 14 |
|
| 15 |
## streamlit ui
|
| 16 |
-
st.title("
|
| 17 |
st.subheader("STILL IN DEVELOPMENT. DO NOT USE 'UPLOAD FILES' FEATURE IN SIDEBAR YET. IF NO DATA CAN BE SEEN IN DATABASE CONTACT ME.")
|
| 18 |
openai_api_key = st.text_input("Enter your OpenAI API key here:")
|
| 19 |
|
| 20 |
-
openai.api_key = openai_api_key
|
| 21 |
-
if not openai_api_key:
|
| 22 |
-
|
| 23 |
-
|
| 24 |
|
| 25 |
-
# create vectorstore globally (
|
| 26 |
-
@st.cache_resource
|
| 27 |
-
def initialize_db():
|
| 28 |
-
client = chromadb.Client()
|
| 29 |
-
return client
|
| 30 |
db = initialize_db()
|
| 31 |
|
| 32 |
# scrape links
|
|
|
|
| 9 |
from apify_client import ApifyClient
|
| 10 |
from pprint import pprint
|
| 11 |
from utils.split import split
|
| 12 |
+
from utils.db import initialize_db
|
| 13 |
|
| 14 |
## NOTE: STREAMLIT RUNS THE ENTIRE SCRIPT FROM TOP TO BOTTOM ON EVERY USER INTERACTION
|
| 15 |
|
| 16 |
## streamlit ui
|
| 17 |
+
st.title("Atticus")
|
| 18 |
st.subheader("STILL IN DEVELOPMENT. DO NOT USE 'UPLOAD FILES' FEATURE IN SIDEBAR YET. IF NO DATA CAN BE SEEN IN DATABASE CONTACT ME.")
|
| 19 |
openai_api_key = st.text_input("Enter your OpenAI API key here:")
|
| 20 |
|
| 21 |
+
# openai.api_key = openai_api_key
|
| 22 |
+
# if not openai_api_key:
|
| 23 |
+
# st.write("Please enter your OpenAI API key above")
|
| 24 |
+
# st.stop()
|
| 25 |
|
| 26 |
+
# create vectorstore globally (persists data across runs)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
db = initialize_db()
|
| 28 |
|
| 29 |
# scrape links
|
utils/ingest.py
CHANGED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
|
| 3 |
+
class Ingest():
|
| 4 |
+
files = [] # list of UploadedFile objects, subclass of BytesIO
|
| 5 |
+
db = None # chromadb.Client object
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
|