Spaces:
Sleeping
Sleeping
vishanth10 commited on
Commit ·
8959c46
1
Parent(s): 61f09d1
New UI and resolved bugs
Browse files
.txt
ADDED
|
@@ -0,0 +1,298 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# import streamlit as st
|
| 2 |
+
# from carbon import Carbon
|
| 3 |
+
# import requests
|
| 4 |
+
# import json
|
| 5 |
+
|
| 6 |
+
# # Carbon API Key
|
| 7 |
+
# CARBON_API_KEY = "a38ee1fe5fef56fc8e1ae2afc881378804bb902882442e1554adae4f82ee23ea"
|
| 8 |
+
# CUSTOMER_ID = "Candid"
|
| 9 |
+
|
| 10 |
+
# def get_google_drive_oauth(carbon):
|
| 11 |
+
# get_oauth_url_response = carbon.integrations.get_oauth_url(
|
| 12 |
+
# service="GOOGLE_DRIVE",
|
| 13 |
+
# scope="https://www.googleapis.com/auth/drive.readonly",
|
| 14 |
+
# connecting_new_account=True,
|
| 15 |
+
# )
|
| 16 |
+
# return get_oauth_url_response.oauth_url
|
| 17 |
+
|
| 18 |
+
# def get_dropbox_oauth(carbon):
|
| 19 |
+
# get_oauth_url_response = carbon.integrations.get_oauth_url(
|
| 20 |
+
# service="DROPBOX",
|
| 21 |
+
# connecting_new_account=True,
|
| 22 |
+
# )
|
| 23 |
+
# return get_oauth_url_response.oauth_url
|
| 24 |
+
|
| 25 |
+
# def get_notion_oauth(carbon):
|
| 26 |
+
# get_oauth_url_response = carbon.integrations.get_oauth_url(
|
| 27 |
+
# service="NOTION",
|
| 28 |
+
# connecting_new_account=True,
|
| 29 |
+
# )
|
| 30 |
+
# return get_oauth_url_response.oauth_url
|
| 31 |
+
|
| 32 |
+
# def sync_github(carbon, username, token):
|
| 33 |
+
# sync_response = carbon.integrations.sync_git_hub(
|
| 34 |
+
# username=username,
|
| 35 |
+
# token=token,
|
| 36 |
+
# sync_source_items=True
|
| 37 |
+
# )
|
| 38 |
+
# return sync_response
|
| 39 |
+
|
| 40 |
+
# def sync_gitbook(carbon, access_token, organization):
|
| 41 |
+
# sync_response = carbon.integrations.sync_git_book(
|
| 42 |
+
# access_token=access_token,
|
| 43 |
+
# organization=organization,
|
| 44 |
+
# sync_source_items=True
|
| 45 |
+
# )
|
| 46 |
+
# return sync_response
|
| 47 |
+
|
| 48 |
+
# def sync_s3(carbon, access_key, access_key_secret):
|
| 49 |
+
# sync_response = carbon.integrations.sync_s3(
|
| 50 |
+
# access_key=access_key,
|
| 51 |
+
# access_key_secret=access_key_secret,
|
| 52 |
+
# sync_source_items=True
|
| 53 |
+
# )
|
| 54 |
+
# return sync_response
|
| 55 |
+
|
| 56 |
+
# def sync_google_drive(carbon, data_source_id):
|
| 57 |
+
# sync_response = carbon.integrations.sync_data_source_items(data_source_id=int(data_source_id))
|
| 58 |
+
# return sync_response
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
# def list_files(carbon, data_source_id=None, service="GOOGLE_DRIVE"):
|
| 62 |
+
# if data_source_id:
|
| 63 |
+
# sync_google_drive(carbon, data_source_id)
|
| 64 |
+
# list_files_response = carbon.integrations.list_data_source_items(data_source_id=int(data_source_id))
|
| 65 |
+
# st.session_state['current_data_source'] = data_source_id # Store the current data source
|
| 66 |
+
# st.session_state['files'] = list_files_response.items # Store the fetched files
|
| 67 |
+
# else:
|
| 68 |
+
# if 'current_data_source' in st.session_state and st.session_state['current_data_source']:
|
| 69 |
+
# data_source_id = st.session_state['current_data_source']
|
| 70 |
+
# sync_google_drive(carbon, data_source_id)
|
| 71 |
+
# list_files_response = carbon.integrations.list_data_source_items(data_source_id=int(data_source_id))
|
| 72 |
+
# st.session_state['files'] = list_files_response.items # Store the fetched files
|
| 73 |
+
# else:
|
| 74 |
+
# query_user_data_sources_response = carbon.data_sources.query_user_data_sources(
|
| 75 |
+
# pagination={"limit": 100, "offset": 0},
|
| 76 |
+
# order_by="created_at",
|
| 77 |
+
# order_dir="desc",
|
| 78 |
+
# filters={"source": service},
|
| 79 |
+
# )
|
| 80 |
+
# if query_user_data_sources_response.results:
|
| 81 |
+
# data_source_id = query_user_data_sources_response.results[0].id
|
| 82 |
+
# sync_google_drive(carbon, data_source_id)
|
| 83 |
+
# list_files_response = carbon.integrations.list_data_source_items(data_source_id=int(data_source_id))
|
| 84 |
+
# st.session_state['current_data_source'] = data_source_id
|
| 85 |
+
# st.session_state['files'] = list_files_response.items # Store the fetched files
|
| 86 |
+
# else:
|
| 87 |
+
# list_files_response = None
|
| 88 |
+
# return list_files_response.items if list_files_response else None
|
| 89 |
+
|
| 90 |
+
# def list_all_files(carbon, data_source_id):
|
| 91 |
+
# url = "https://api.carbon.ai/user_files_v2"
|
| 92 |
+
# payload = {
|
| 93 |
+
# "pagination": {
|
| 94 |
+
# "limit": 100,
|
| 95 |
+
# "offset": 0
|
| 96 |
+
# },
|
| 97 |
+
# "order_by": "created_at",
|
| 98 |
+
# "order_dir": "desc",
|
| 99 |
+
# "filters": {
|
| 100 |
+
# "organization_user_data_source_id": [data_source_id],
|
| 101 |
+
# "embedding_generators": ["OPENAI"],
|
| 102 |
+
# "include_all_children": True,
|
| 103 |
+
# },
|
| 104 |
+
# "include_raw_file": True,
|
| 105 |
+
# "include_parsed_text_file": True,
|
| 106 |
+
# "include_additional_files": True
|
| 107 |
+
# }
|
| 108 |
+
# headers = {
|
| 109 |
+
# "authorization": f"Bearer {CARBON_API_KEY}",
|
| 110 |
+
# "customer-id": CUSTOMER_ID,
|
| 111 |
+
# "Content-Type": "application/json"
|
| 112 |
+
# }
|
| 113 |
+
|
| 114 |
+
# response = requests.request("POST", url, json=payload, headers=headers)
|
| 115 |
+
# res = json.loads(response.text)
|
| 116 |
+
# file_id= res['results'][0]['id']
|
| 117 |
+
# for i,document in enumerate(res['results']):
|
| 118 |
+
# print(document['name'])
|
| 119 |
+
# print(document['id'])
|
| 120 |
+
|
| 121 |
+
# file_id=res['results'][0]['id']
|
| 122 |
+
# print(file_id)
|
| 123 |
+
# return res['results']
|
| 124 |
+
|
| 125 |
+
|
| 126 |
+
# def list_user_documents(carbon):
|
| 127 |
+
# query_user_documents_response = carbon.documents.query_documents(
|
| 128 |
+
# pagination={"limit": 100, "offset": 0},
|
| 129 |
+
# order_by="created_at",
|
| 130 |
+
# order_dir="desc"
|
| 131 |
+
# )
|
| 132 |
+
# return query_user_documents_response.documents if query_user_documents_response else None
|
| 133 |
+
|
| 134 |
+
# def semantic_search_v2(carbon, query, tags_v2=None, hybrid_search=False):
|
| 135 |
+
# search_response = carbon.embeddings.get_documents(
|
| 136 |
+
# query=query,
|
| 137 |
+
# k=2,
|
| 138 |
+
# tags_v2=tags_v2 if tags_v2 else {},
|
| 139 |
+
# include_tags=True,
|
| 140 |
+
# include_vectors=True,
|
| 141 |
+
# include_raw_file=True,
|
| 142 |
+
# hybrid_search=hybrid_search,
|
| 143 |
+
# hybrid_search_tuning_parameters={
|
| 144 |
+
# "weight_a": 0.5,
|
| 145 |
+
# "weight_b": 0.5,
|
| 146 |
+
# },
|
| 147 |
+
# media_type="TEXT",
|
| 148 |
+
# embedding_model="OPENAI",
|
| 149 |
+
# )
|
| 150 |
+
# return search_response.documents
|
| 151 |
+
|
| 152 |
+
|
| 153 |
+
# def main():
|
| 154 |
+
# st.title('Data Connector using Carbon SDK')
|
| 155 |
+
|
| 156 |
+
# # Authenticate with Carbon API
|
| 157 |
+
# st.write('### Authenticate with Carbon API')
|
| 158 |
+
# carbon = Carbon(api_key=CARBON_API_KEY, customer_id=CUSTOMER_ID)
|
| 159 |
+
# token = carbon.auth.get_access_token()
|
| 160 |
+
# carbon = Carbon(access_token=token.access_token) # authenticated object
|
| 161 |
+
|
| 162 |
+
# # Connect to Data Source
|
| 163 |
+
# st.write('## Connect to Data Source')
|
| 164 |
+
# service = st.selectbox('Select Data Source for OAuth', ['GOOGLE_DRIVE', 'DROPBOX', 'NOTION'])
|
| 165 |
+
# if st.button('Get OAuth URL'):
|
| 166 |
+
# with st.spinner('Fetching OAuth URL...'):
|
| 167 |
+
# try:
|
| 168 |
+
# if service == "GOOGLE_DRIVE":
|
| 169 |
+
# oauth_url = get_google_drive_oauth(carbon)
|
| 170 |
+
# elif service == "DROPBOX":
|
| 171 |
+
# oauth_url = get_dropbox_oauth(carbon)
|
| 172 |
+
# elif service == "NOTION":
|
| 173 |
+
# oauth_url = get_notion_oauth(carbon)
|
| 174 |
+
# st.write(f"OAuth URL for {service}: {oauth_url}")
|
| 175 |
+
# st.session_state['current_data_source'] = None # Reset the current data source
|
| 176 |
+
# st.session_state['files'] = None # Clear the previous files
|
| 177 |
+
# st.session_state['oauth_fetched'] = True
|
| 178 |
+
# except Exception as e:
|
| 179 |
+
# st.error(f"An error occurred: {e}")
|
| 180 |
+
|
| 181 |
+
# if 'oauth_fetched' in st.session_state and st.session_state['oauth_fetched']:
|
| 182 |
+
# st.write("OAuth URL fetched. Please authenticate and then click 'Sync and Fetch Files'.")
|
| 183 |
+
# if st.button('Sync and Fetch Files'):
|
| 184 |
+
# with st.spinner('Syncing and fetching files...'):
|
| 185 |
+
# try:
|
| 186 |
+
# query_user_data_sources_response = carbon.data_sources.query_user_data_sources(
|
| 187 |
+
# pagination={"limit": 100, "offset": 0},
|
| 188 |
+
# order_by="created_at",
|
| 189 |
+
# order_dir="desc",
|
| 190 |
+
# filters={"source": service},
|
| 191 |
+
# )
|
| 192 |
+
# if query_user_data_sources_response.results:
|
| 193 |
+
# data_source_id = query_user_data_sources_response.results[0].id
|
| 194 |
+
# sync_google_drive(carbon, data_source_id)
|
| 195 |
+
# st.session_state['current_data_source'] = data_source_id
|
| 196 |
+
# st.session_state['oauth_fetched'] = False
|
| 197 |
+
# st.success("Synced successfully! Now you can list the files.")
|
| 198 |
+
# else:
|
| 199 |
+
# st.error("No data sources found. Please ensure the connection was successful.")
|
| 200 |
+
# except Exception as e:
|
| 201 |
+
# st.error(f"An error occurred: {e}")
|
| 202 |
+
|
| 203 |
+
# # List Files in Data Source
|
| 204 |
+
# st.write(f'## List Files in {service}')
|
| 205 |
+
# data_source_id = st.text_input('Enter Data Source ID (leave blank to list all files)')
|
| 206 |
+
# if st.button('List Files'):
|
| 207 |
+
# with st.spinner('Fetching files...'):
|
| 208 |
+
# try:
|
| 209 |
+
# if 'current_data_source' in st.session_state and st.session_state['current_data_source']:
|
| 210 |
+
# data_source_id = st.session_state['current_data_source']
|
| 211 |
+
# files = list_files(carbon, data_source_id if data_source_id else None, service)
|
| 212 |
+
# if files:
|
| 213 |
+
# st.write(f"Files in {service}:")
|
| 214 |
+
# for item in files:
|
| 215 |
+
# st.write(f"File ID: {item.id}, File Name: {item.name}, File Size: {item.size if hasattr(item, 'size') else 'N/A'}, Last Modified: {item.last_modified if hasattr(item, 'last_modified') else 'N/A'}")
|
| 216 |
+
# else:
|
| 217 |
+
# st.write("No files found.")
|
| 218 |
+
# except Exception as e:
|
| 219 |
+
# st.error(f"An error occurred: {e}")
|
| 220 |
+
|
| 221 |
+
# # List All Files
|
| 222 |
+
# st.write('### List All Files')
|
| 223 |
+
# if st.button('List All Files'):
|
| 224 |
+
# with st.spinner('Fetching all files...'):
|
| 225 |
+
# try:
|
| 226 |
+
# if 'current_data_source' in st.session_state and st.session_state['current_data_source']:
|
| 227 |
+
# data_source_id = st.session_state['current_data_source']
|
| 228 |
+
# all_files = list_all_files(carbon, data_source_id)
|
| 229 |
+
# if all_files:
|
| 230 |
+
# st.write("All files:")
|
| 231 |
+
# for i, document in enumerate(all_files):
|
| 232 |
+
# file_id = document['id']
|
| 233 |
+
# st.write(f"File ID: {document['id']}, File Name: {document['name']}")
|
| 234 |
+
# else:
|
| 235 |
+
# st.write("No files found.")
|
| 236 |
+
# except Exception as e:
|
| 237 |
+
# st.error(f"An error occurred: {e}")
|
| 238 |
+
|
| 239 |
+
# # Search in the Connected Data Source
|
| 240 |
+
# st.write('### Search in the Connected Data Source')
|
| 241 |
+
# query = st.text_input("Enter your query:", value="Type here...")
|
| 242 |
+
# if st.button('Search'):
|
| 243 |
+
# if query:
|
| 244 |
+
# with st.spinner('Searching...'):
|
| 245 |
+
# try:
|
| 246 |
+
# all_files = list_all_files(carbon, data_source_id)
|
| 247 |
+
# url = "https://api.carbon.ai/embeddings"
|
| 248 |
+
# payload = {
|
| 249 |
+
# "query": query,
|
| 250 |
+
# "k": 2,
|
| 251 |
+
# "file_ids": file_id,
|
| 252 |
+
# "include_all_children": True,
|
| 253 |
+
# "tags": {},
|
| 254 |
+
# "include_tags": True,
|
| 255 |
+
# "include_vectors": True,
|
| 256 |
+
# "include_raw_file": True,
|
| 257 |
+
# "hybrid_search": False,
|
| 258 |
+
# "media_type": "TEXT",
|
| 259 |
+
# "embedding_model": "OPENAI"
|
| 260 |
+
# }
|
| 261 |
+
# headers = {
|
| 262 |
+
# "authorization": f"Bearer {CARBON_API_KEY}",
|
| 263 |
+
# "customer-id": CUSTOMER_ID,
|
| 264 |
+
# "Content-Type": "application/json"
|
| 265 |
+
# }
|
| 266 |
+
# response_search = requests.post(url, json=payload, headers=headers)
|
| 267 |
+
# response_search_chunks = json.loads(response_search.text)
|
| 268 |
+
|
| 269 |
+
# st.write("Search results:")
|
| 270 |
+
# for i, doc in enumerate(response_search_chunks['documents']):
|
| 271 |
+
# st.write(f"Document {i+1}:")
|
| 272 |
+
# st.write(f"Content: {doc['content']}")
|
| 273 |
+
# st.write(f"Source: {doc['source']}")
|
| 274 |
+
# st.write(f"Match Percentage: {doc['score'] * 100}%")
|
| 275 |
+
# if 'file_url' in doc:
|
| 276 |
+
# st.markdown(f"[Download {doc['filename']}]({doc['file_url']})")
|
| 277 |
+
# st.write("-------------------------------------------------")
|
| 278 |
+
# except Exception as e:
|
| 279 |
+
# st.error(f"An error occurred: {e}")
|
| 280 |
+
# else:
|
| 281 |
+
# st.write("Please enter a query to search.")
|
| 282 |
+
|
| 283 |
+
# # Display Search History
|
| 284 |
+
# st.write('## Search History')
|
| 285 |
+
# if 'search_history' not in st.session_state:
|
| 286 |
+
# st.session_state['search_history'] = []
|
| 287 |
+
|
| 288 |
+
# if query and st.button('Add to Search History'):
|
| 289 |
+
# st.session_state['search_history'].append(query)
|
| 290 |
+
|
| 291 |
+
# if st.session_state['search_history']:
|
| 292 |
+
# st.write("Past Searches:")
|
| 293 |
+
# for past_query in st.session_state['search_history']:
|
| 294 |
+
# st.write(past_query)
|
| 295 |
+
|
| 296 |
+
# # Call the main function
|
| 297 |
+
# if __name__ == '__main__':
|
| 298 |
+
# main()
|
app.py
CHANGED
|
@@ -1,12 +1,17 @@
|
|
| 1 |
import streamlit as st
|
| 2 |
from carbon import Carbon
|
|
|
|
| 3 |
import requests
|
| 4 |
import json
|
| 5 |
|
| 6 |
-
#
|
| 7 |
CARBON_API_KEY = "a38ee1fe5fef56fc8e1ae2afc881378804bb902882442e1554adae4f82ee23ea"
|
| 8 |
-
|
| 9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
def get_google_drive_oauth(carbon):
|
| 11 |
get_oauth_url_response = carbon.integrations.get_oauth_url(
|
| 12 |
service="GOOGLE_DRIVE",
|
|
@@ -15,6 +20,7 @@ def get_google_drive_oauth(carbon):
|
|
| 15 |
)
|
| 16 |
return get_oauth_url_response.oauth_url
|
| 17 |
|
|
|
|
| 18 |
def get_dropbox_oauth(carbon):
|
| 19 |
get_oauth_url_response = carbon.integrations.get_oauth_url(
|
| 20 |
service="DROPBOX",
|
|
@@ -22,6 +28,7 @@ def get_dropbox_oauth(carbon):
|
|
| 22 |
)
|
| 23 |
return get_oauth_url_response.oauth_url
|
| 24 |
|
|
|
|
| 25 |
def get_notion_oauth(carbon):
|
| 26 |
get_oauth_url_response = carbon.integrations.get_oauth_url(
|
| 27 |
service="NOTION",
|
|
@@ -29,220 +36,182 @@ def get_notion_oauth(carbon):
|
|
| 29 |
)
|
| 30 |
return get_oauth_url_response.oauth_url
|
| 31 |
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
|
|
|
|
|
|
| 37 |
)
|
| 38 |
-
return
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
|
|
|
| 45 |
)
|
| 46 |
-
return
|
| 47 |
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
access_key_secret=access_key_secret,
|
| 52 |
-
sync_source_items=True
|
| 53 |
-
)
|
| 54 |
-
return sync_response
|
| 55 |
-
|
| 56 |
-
def list_files(carbon, data_source_id=None, service="GOOGLE_DRIVE"):
|
| 57 |
-
if data_source_id:
|
| 58 |
-
list_files_response = carbon.integrations.list_data_source_items(data_source_id=int(data_source_id))
|
| 59 |
-
else:
|
| 60 |
-
query_user_data_sources_response = carbon.data_sources.query_user_data_sources(
|
| 61 |
-
pagination={"limit": 100, "offset": 0},
|
| 62 |
-
order_by="created_at",
|
| 63 |
-
order_dir="desc",
|
| 64 |
-
filters={"source": service},
|
| 65 |
-
)
|
| 66 |
-
if query_user_data_sources_response.results:
|
| 67 |
-
data_source_id = query_user_data_sources_response.results[0].id
|
| 68 |
-
list_files_response = carbon.integrations.list_data_source_items(data_source_id=int(data_source_id))
|
| 69 |
-
else:
|
| 70 |
-
list_files_response = None
|
| 71 |
-
return list_files_response.items if list_files_response else None
|
| 72 |
-
|
| 73 |
-
def list_user_documents(carbon):
|
| 74 |
-
query_user_documents_response = carbon.documents.query_documents(
|
| 75 |
pagination={"limit": 100, "offset": 0},
|
| 76 |
order_by="created_at",
|
| 77 |
-
order_dir="desc"
|
| 78 |
)
|
| 79 |
-
return
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 91 |
"weight_a": 0.5,
|
| 92 |
-
"weight_b": 0.5
|
| 93 |
},
|
| 94 |
-
media_type
|
| 95 |
-
embedding_model
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
"query": query,
|
| 203 |
-
"k": 2,
|
| 204 |
-
"file_ids": [], # Modify to include relevant file IDs if needed
|
| 205 |
-
"include_all_children": True,
|
| 206 |
-
"tags": {},
|
| 207 |
-
"include_tags": True,
|
| 208 |
-
"include_vectors": True,
|
| 209 |
-
"include_raw_file": True,
|
| 210 |
-
"hybrid_search": False,
|
| 211 |
-
"media_type": "TEXT",
|
| 212 |
-
"embedding_model": "OPENAI"
|
| 213 |
-
}
|
| 214 |
-
headers = {
|
| 215 |
-
"authorization": f"Bearer {CARBON_API_KEY}",
|
| 216 |
-
"customer-id": customer_id,
|
| 217 |
-
"Content-Type": "application/json"
|
| 218 |
-
}
|
| 219 |
-
response_search = requests.post(url, json=payload, headers=headers)
|
| 220 |
-
response_search_chunks = json.loads(response_search.text)
|
| 221 |
-
|
| 222 |
-
st.write("Search results:")
|
| 223 |
-
for i, doc in enumerate(response_search_chunks['documents']):
|
| 224 |
-
st.write(f"Document {i+1}:")
|
| 225 |
-
st.write(f"Content: {doc['content']}")
|
| 226 |
-
st.write(f"Source: {doc['source']}")
|
| 227 |
-
st.write(f"Match Percentage: {doc['score'] * 100}%")
|
| 228 |
-
if 'file_url' in doc:
|
| 229 |
-
st.markdown(f"[Download {doc['filename']}]({doc['file_url']})")
|
| 230 |
-
st.write("-------------------------------------------------")
|
| 231 |
-
except Exception as e:
|
| 232 |
-
st.error(f"An error occurred: {e}")
|
| 233 |
-
else:
|
| 234 |
-
st.write("Please enter a query to search.")
|
| 235 |
-
|
| 236 |
-
# Display Search History
|
| 237 |
-
st.write('## Search History')
|
| 238 |
-
if 'search_history' not in st.session_state:
|
| 239 |
-
st.session_state['search_history'] = []
|
| 240 |
-
|
| 241 |
-
if query and st.button('Add to Search History'):
|
| 242 |
-
st.session_state['search_history'].append(query)
|
| 243 |
-
|
| 244 |
-
if st.session_state['search_history']:
|
| 245 |
-
st.write("Past Searches:")
|
| 246 |
-
|
| 247 |
-
if __name__ == '__main__':
|
| 248 |
-
main()
|
|
|
|
| 1 |
import streamlit as st
|
| 2 |
from carbon import Carbon
|
| 3 |
+
from decimal import Decimal
|
| 4 |
import requests
|
| 5 |
import json
|
| 6 |
|
| 7 |
+
# Constants
|
| 8 |
CARBON_API_KEY = "a38ee1fe5fef56fc8e1ae2afc881378804bb902882442e1554adae4f82ee23ea"
|
| 9 |
+
CUSTOMER_ID = "Candid"
|
| 10 |
|
| 11 |
+
# Initialize Carbon SDK
|
| 12 |
+
carbon = Carbon(api_key=CARBON_API_KEY, customer_id=CUSTOMER_ID)
|
| 13 |
+
|
| 14 |
+
# Authenticate and get OAuth URL for Google Drive
|
| 15 |
def get_google_drive_oauth(carbon):
|
| 16 |
get_oauth_url_response = carbon.integrations.get_oauth_url(
|
| 17 |
service="GOOGLE_DRIVE",
|
|
|
|
| 20 |
)
|
| 21 |
return get_oauth_url_response.oauth_url
|
| 22 |
|
| 23 |
+
# Authenticate and get OAuth URL for Dropbox
|
| 24 |
def get_dropbox_oauth(carbon):
|
| 25 |
get_oauth_url_response = carbon.integrations.get_oauth_url(
|
| 26 |
service="DROPBOX",
|
|
|
|
| 28 |
)
|
| 29 |
return get_oauth_url_response.oauth_url
|
| 30 |
|
| 31 |
+
# Authenticate and get OAuth URL for Notion
|
| 32 |
def get_notion_oauth(carbon):
|
| 33 |
get_oauth_url_response = carbon.integrations.get_oauth_url(
|
| 34 |
service="NOTION",
|
|
|
|
| 36 |
)
|
| 37 |
return get_oauth_url_response.oauth_url
|
| 38 |
|
| 39 |
+
# Get data source ID
|
| 40 |
+
def get_data_source_id(service):
|
| 41 |
+
response = carbon.data_sources.query_user_data_sources(
|
| 42 |
+
pagination={"limit": 100, "offset": 0},
|
| 43 |
+
order_by="created_at",
|
| 44 |
+
order_dir="desc",
|
| 45 |
+
filters={"source": service},
|
| 46 |
)
|
| 47 |
+
return response.results[0].id
|
| 48 |
+
|
| 49 |
+
# List files in the data source
|
| 50 |
+
def list_files(data_source_id):
|
| 51 |
+
response = carbon.integrations.list_data_source_items(
|
| 52 |
+
data_source_id=data_source_id,
|
| 53 |
+
filters={},
|
| 54 |
+
pagination={"limit": 250, "offset": 0},
|
| 55 |
)
|
| 56 |
+
return response.items
|
| 57 |
|
| 58 |
+
# List all data sources associated with the user
|
| 59 |
+
def list_user_data_sources():
|
| 60 |
+
response = carbon.data_sources.query_user_data_sources(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
pagination={"limit": 100, "offset": 0},
|
| 62 |
order_by="created_at",
|
| 63 |
+
order_dir="desc",
|
| 64 |
)
|
| 65 |
+
return response.results
|
| 66 |
+
|
| 67 |
+
# List files uploaded by the user
|
| 68 |
+
def list_uploaded_files(data_source_id):
|
| 69 |
+
url = "https://api.carbon.ai/user_files_v2"
|
| 70 |
+
payload = {
|
| 71 |
+
"pagination": {
|
| 72 |
+
"limit": 100,
|
| 73 |
+
"offset": 0
|
| 74 |
+
},
|
| 75 |
+
"order_by": "created_at",
|
| 76 |
+
"order_dir": "desc",
|
| 77 |
+
"filters": {
|
| 78 |
+
"organization_user_data_source_id": [data_source_id],
|
| 79 |
+
"embedding_generators": ["OPENAI"],
|
| 80 |
+
"include_all_children": True,
|
| 81 |
+
},
|
| 82 |
+
"include_raw_file": True,
|
| 83 |
+
"include_parsed_text_file": True,
|
| 84 |
+
"include_additional_files": True
|
| 85 |
+
}
|
| 86 |
+
headers = {
|
| 87 |
+
"authorization": f"Bearer {CARBON_API_KEY}",
|
| 88 |
+
"customer-id": CUSTOMER_ID,
|
| 89 |
+
"Content-Type": "application/json"
|
| 90 |
+
}
|
| 91 |
+
response = requests.post(url, json=payload, headers=headers)
|
| 92 |
+
return response.json()['results']
|
| 93 |
+
|
| 94 |
+
# Search function
|
| 95 |
+
def search_documents(query, file_ids):
|
| 96 |
+
url = "https://api.carbon.ai/embeddings"
|
| 97 |
+
payload = {
|
| 98 |
+
"query": query,
|
| 99 |
+
"k": 2,
|
| 100 |
+
"file_ids": file_ids,
|
| 101 |
+
"include_all_children": True,
|
| 102 |
+
"include_tags": True,
|
| 103 |
+
"include_vectors": True,
|
| 104 |
+
"include_raw_file": True,
|
| 105 |
+
"hybrid_search": False,
|
| 106 |
+
"hybrid_search_tuning_parameters": {
|
| 107 |
"weight_a": 0.5,
|
| 108 |
+
"weight_b": 0.5
|
| 109 |
},
|
| 110 |
+
"media_type": "TEXT",
|
| 111 |
+
"embedding_model": "OPENAI"
|
| 112 |
+
}
|
| 113 |
+
headers = {
|
| 114 |
+
"authorization": f"Bearer {CARBON_API_KEY}",
|
| 115 |
+
"customer-id": CUSTOMER_ID,
|
| 116 |
+
"Content-Type": "application/json"
|
| 117 |
+
}
|
| 118 |
+
#response = requests.post(url, json=payload, headers=headers)
|
| 119 |
+
response = requests.request("POST", url, json=payload, headers=headers)
|
| 120 |
+
print(response.json())
|
| 121 |
+
return response.json()['documents']
|
| 122 |
+
|
| 123 |
+
# Streamlit UI
|
| 124 |
+
st.title("Data Connector using Carbon SDK")
|
| 125 |
+
|
| 126 |
+
# Authenticate with Carbon API
|
| 127 |
+
st.header("Authenticate with Carbon API")
|
| 128 |
+
|
| 129 |
+
# Connect to Data Source
|
| 130 |
+
st.subheader("Connect to Data Source")
|
| 131 |
+
data_source = st.selectbox("Select Data Source for OAuth", ["GOOGLE_DRIVE", "DROPBOX", "NOTION"])
|
| 132 |
+
if st.button("Get OAuth URL"):
|
| 133 |
+
if data_source == "GOOGLE_DRIVE":
|
| 134 |
+
oauth_url = get_google_drive_oauth(carbon)
|
| 135 |
+
elif data_source == "DROPBOX":
|
| 136 |
+
oauth_url = get_dropbox_oauth(carbon)
|
| 137 |
+
elif data_source == "NOTION":
|
| 138 |
+
oauth_url = get_notion_oauth(carbon)
|
| 139 |
+
st.write(f"OAuth URL for {data_source}: {oauth_url}")
|
| 140 |
+
st.markdown(f'<a href="{oauth_url}" target="_blank">Authenticate {data_source}</a>', unsafe_allow_html=True)
|
| 141 |
+
|
| 142 |
+
# List User Data Sources
|
| 143 |
+
st.subheader("List Data Sources")
|
| 144 |
+
if st.button("List Data Sources"):
|
| 145 |
+
data_sources = list_user_data_sources()
|
| 146 |
+
st.write("Data Sources associated with the user:")
|
| 147 |
+
for ds in data_sources:
|
| 148 |
+
st.write(f"ID: {ds.id}, External ID: {ds.data_source_external_id}, Type: {ds.data_source_type}, "
|
| 149 |
+
f"Sync Status: {ds.sync_status}, Created At: {ds.created_at}, Updated At: {ds.updated_at}")
|
| 150 |
+
|
| 151 |
+
# List Files in Data Source
|
| 152 |
+
st.subheader(f"List Files in {data_source}")
|
| 153 |
+
if st.button("List Files"):
|
| 154 |
+
data_source_id = get_data_source_id(data_source)
|
| 155 |
+
files = list_files(data_source_id)
|
| 156 |
+
st.write(f"Files in {data_source}:")
|
| 157 |
+
for file in files:
|
| 158 |
+
st.write(file.name)
|
| 159 |
+
|
| 160 |
+
# List Uploaded Files
|
| 161 |
+
st.subheader("Documents Uploaded Result")
|
| 162 |
+
file_ids = []
|
| 163 |
+
if st.button("Show Uploaded Files"):
|
| 164 |
+
data_source_id = get_data_source_id(data_source)
|
| 165 |
+
uploaded_files = list_uploaded_files(data_source_id)
|
| 166 |
+
st.write("Uploaded Files:")
|
| 167 |
+
for file in uploaded_files:
|
| 168 |
+
st.write(f"ID: {file['id']}, Organization Supplied User ID: {file['organization_supplied_user_id']}, "
|
| 169 |
+
f"Organization User Data Source ID: {file['organization_user_data_source_id']}, External URL: {file['external_url']}")
|
| 170 |
+
file_ids.append(file['id'])
|
| 171 |
+
print(file_ids)
|
| 172 |
+
|
| 173 |
+
# Search Documents
|
| 174 |
+
st.subheader("Search Documents")
|
| 175 |
+
query = st.text_input("Enter your search query:")
|
| 176 |
+
if st.button("Search"):
|
| 177 |
+
if query:
|
| 178 |
+
search_results = search_documents(query, file_ids)
|
| 179 |
+
st.write("Search Results:")
|
| 180 |
+
for result in search_results:
|
| 181 |
+
st.write(f"Source: {result['source']}")
|
| 182 |
+
st.write(f"Title: {result['content']}")
|
| 183 |
+
st.write(f"Source URL: {result['source_url']}")
|
| 184 |
+
st.write(f"Source Type: {result['source_type']}")
|
| 185 |
+
st.write(f"Presigned URL: {result['presigned_url']}")
|
| 186 |
+
st.write(f"Tags: {result['tags']}")
|
| 187 |
+
st.write("-------------------------------------------------")
|
| 188 |
+
|
| 189 |
+
# # Add chat interface using custom HTML/CSS
|
| 190 |
+
# st.subheader("Chat Interface")
|
| 191 |
+
# chat_input = st.text_input("Enter your query:")
|
| 192 |
+
# if st.button("Send"):
|
| 193 |
+
# if chat_input:
|
| 194 |
+
# st.markdown(f'<div class="chat-bubble user">{chat_input}</div>', unsafe_allow_html=True)
|
| 195 |
+
# # Placeholder for bot response (add your processing logic here)
|
| 196 |
+
# bot_response = "This is a bot response."
|
| 197 |
+
# st.markdown(f'<div class="chat-bubble bot">{bot_response}</div>', unsafe_allow_html=True)
|
| 198 |
+
|
| 199 |
+
# Custom CSS for chat bubbles
|
| 200 |
+
st.markdown("""
|
| 201 |
+
<style>
|
| 202 |
+
.chat-bubble {
|
| 203 |
+
padding: 10px 15px;
|
| 204 |
+
border-radius: 10px;
|
| 205 |
+
margin: 5px 0;
|
| 206 |
+
max-width: 60%;
|
| 207 |
+
}
|
| 208 |
+
.user {
|
| 209 |
+
background-color: lightblue;
|
| 210 |
+
align-self: flex-end;
|
| 211 |
+
}
|
| 212 |
+
.bot {
|
| 213 |
+
background-color: darkgray;
|
| 214 |
+
align-self: flex-start;
|
| 215 |
+
}
|
| 216 |
+
</style>
|
| 217 |
+
""", unsafe_allow_html=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|