Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| from carbon import Carbon | |
| from decimal import Decimal | |
| import requests | |
| import json | |
| # Constants | |
| CARBON_API_KEY = "a38ee1fe5fef56fc8e1ae2afc881378804bb902882442e1554adae4f82ee23ea" | |
| CUSTOMER_ID = "Candid" | |
| # Initialize Carbon SDK | |
| carbon = Carbon(api_key=CARBON_API_KEY, customer_id=CUSTOMER_ID) | |
| # Authenticate and get OAuth URL for Google Drive | |
| def get_google_drive_oauth(carbon): | |
| get_oauth_url_response = carbon.integrations.get_oauth_url( | |
| service="GOOGLE_DRIVE", | |
| scope="https://www.googleapis.com/auth/drive.readonly", | |
| connecting_new_account=True, | |
| ) | |
| return get_oauth_url_response.oauth_url | |
| # Authenticate and get OAuth URL for Dropbox | |
| def get_dropbox_oauth(carbon): | |
| get_oauth_url_response = carbon.integrations.get_oauth_url( | |
| service="DROPBOX", | |
| connecting_new_account=True, | |
| ) | |
| return get_oauth_url_response.oauth_url | |
| # Authenticate and get OAuth URL for Notion | |
| def get_notion_oauth(carbon): | |
| get_oauth_url_response = carbon.integrations.get_oauth_url( | |
| service="NOTION", | |
| connecting_new_account=True, | |
| ) | |
| return get_oauth_url_response.oauth_url | |
| # Get data source ID | |
| def get_data_source_id(service): | |
| response = carbon.data_sources.query_user_data_sources( | |
| pagination={"limit": 100, "offset": 0}, | |
| order_by="created_at", | |
| order_dir="desc", | |
| filters={"source": service}, | |
| ) | |
| return response.results[0].id | |
| # List files in the data source | |
| def list_files(data_source_id): | |
| response = carbon.integrations.list_data_source_items( | |
| data_source_id=data_source_id, | |
| filters={}, | |
| pagination={"limit": 250, "offset": 0}, | |
| ) | |
| return response.items | |
| # List all data sources associated with the user | |
| def list_user_data_sources(): | |
| response = carbon.data_sources.query_user_data_sources( | |
| pagination={"limit": 100, "offset": 0}, | |
| order_by="created_at", | |
| order_dir="desc", | |
| ) | |
| return response.results | |
| # List files uploaded by the user | |
| def list_uploaded_files(data_source_id): | |
| url = "https://api.carbon.ai/user_files_v2" | |
| payload = { | |
| "pagination": { | |
| "limit": 100, | |
| "offset": 0 | |
| }, | |
| "order_by": "created_at", | |
| "order_dir": "desc", | |
| "filters": { | |
| "organization_user_data_source_id": [], #data_source_id organization level need to explore | |
| "embedding_generators": ["OPENAI"], | |
| "include_all_children": True, | |
| }, | |
| "include_raw_file": True, | |
| "include_parsed_text_file": True, | |
| "include_additional_files": True | |
| } | |
| headers = { | |
| "authorization": f"Bearer {CARBON_API_KEY}", | |
| "customer-id": CUSTOMER_ID, | |
| "Content-Type": "application/json" | |
| } | |
| response = requests.post(url, json=payload, headers=headers) | |
| return response.json()['results'] | |
| # Search function | |
| def search_documents(query, file_ids): | |
| url = "https://api.carbon.ai/embeddings" | |
| payload = { | |
| "query": query, | |
| "k": 2, | |
| "file_ids": [], | |
| "include_all_children": True, | |
| "include_tags": True, | |
| "include_vectors": True, | |
| "include_raw_file": True, | |
| "hybrid_search": False, | |
| "hybrid_search_tuning_parameters": { | |
| "weight_a": 0.5, | |
| "weight_b": 0.5 | |
| }, | |
| "media_type": "TEXT", | |
| "embedding_model": "OPENAI" | |
| } | |
| headers = { | |
| "authorization": f"Bearer {CARBON_API_KEY}", | |
| "customer-id": CUSTOMER_ID, | |
| "Content-Type": "application/json" | |
| } | |
| #response = requests.post(url, json=payload, headers=headers) | |
| response = requests.request("POST", url, json=payload, headers=headers) | |
| #print(response.json()) | |
| return response.json()['documents'] | |
| # Streamlit UI | |
| st.title("Data Connector using Carbon SDK") | |
| # Authenticate with Carbon API | |
| st.header("Authenticate with Carbon API") | |
| # Connect to Data Source | |
| st.subheader("Connect to Data Source") | |
| data_source = st.selectbox("Select Data Source for OAuth", ["GOOGLE_DRIVE", "DROPBOX", "NOTION"]) | |
| if st.button("Get OAuth URL"): | |
| if data_source == "GOOGLE_DRIVE": | |
| oauth_url = get_google_drive_oauth(carbon) | |
| elif data_source == "DROPBOX": | |
| oauth_url = get_dropbox_oauth(carbon) | |
| elif data_source == "NOTION": | |
| oauth_url = get_notion_oauth(carbon) | |
| st.write(f"OAuth URL for {data_source}: {oauth_url}") | |
| st.markdown(f'<a href="{oauth_url}" target="_blank">Authenticate {data_source}</a>', unsafe_allow_html=True) | |
| # List User Data Sources | |
| st.subheader("List Data Sources") | |
| if st.button("List Data Sources"): | |
| data_sources = list_user_data_sources() | |
| st.write("Data Sources associated with the user:") | |
| for ds in data_sources: | |
| st.write(f"ID: {ds.id}, External ID: {ds.data_source_external_id}, Type: {ds.data_source_type}, " | |
| f"Sync Status: {ds.sync_status}, Created At: {ds.created_at}, Updated At: {ds.updated_at}") | |
| # List Files in Data Source | |
| st.subheader(f"List Files in {data_source}") | |
| if st.button("List Files"): | |
| data_source_id = get_data_source_id(data_source) | |
| files = list_files(data_source_id) | |
| st.write(f"Files in {data_source}:") | |
| for file in files: | |
| st.write(file.name) | |
| # List Uploaded Files | |
| st.subheader("Documents Uploaded Result") | |
| # data_source_search = st.selectbox("Select Data Source for OAuth", ["GOOGLE_DRIVE", "DROPBOX", "NOTION"]) | |
| # if st.button("Submit"): | |
| # data_source_search | |
| file_ids = [] | |
| if st.button("Show Uploaded Files"): | |
| data_source_id = get_data_source_id(data_source) | |
| uploaded_files = list_uploaded_files(data_source_id) | |
| st.write("Uploaded Files:") | |
| #print(uploaded_files) | |
| for file in uploaded_files: | |
| st.write(f"ID: {file['id']}, Name: {file['name']}, Organization Supplied User ID: {file['organization_supplied_user_id']}, " | |
| f"Organization User Data Source ID: {file['organization_user_data_source_id']}, External URL: {file['external_url']}") | |
| file_ids.append(file['id']) | |
| #print( file_ids) | |
| # Search Documents | |
| st.subheader("Search Documents") | |
| query = st.text_input("Enter your search query:") | |
| if st.button("Search"): | |
| if query: | |
| #print(file_ids) | |
| search_results = search_documents(query, file_ids) | |
| st.write("Search Results:") | |
| for result in search_results: | |
| st.write(f"Source: {result['source']}") | |
| st.write(f"Title: {result['content']}") | |
| st.write(f"Source URL: {result['source_url']}") | |
| st.write(f"Source Type: {result['source_type']}") | |
| st.write(f"Presigned URL: {result['presigned_url']}") | |
| st.write(f"Tags: {result['tags']}") | |
| st.write("-------------------------------------------------") | |
| # # Add chat interface using custom HTML/CSS | |
| # st.subheader("Chat Interface") | |
| # chat_input = st.text_input("Enter your query:") | |
| # if st.button("Send"): | |
| # if chat_input: | |
| # st.markdown(f'<div class="chat-bubble user">{chat_input}</div>', unsafe_allow_html=True) | |
| # # Placeholder for bot response (add your processing logic here) | |
| # bot_response = "This is a bot response." | |
| # st.markdown(f'<div class="chat-bubble bot">{bot_response}</div>', unsafe_allow_html=True) | |
| # Custom CSS for chat bubbles | |
| st.markdown(""" | |
| <style> | |
| .chat-bubble { | |
| padding: 10px 15px; | |
| border-radius: 10px; | |
| margin: 5px 0; | |
| max-width: 60%; | |
| } | |
| .user { | |
| background-color: lightblue; | |
| align-self: flex-end; | |
| } | |
| .bot { | |
| background-color: darkgray; | |
| align-self: flex-start; | |
| } | |
| </style> | |
| """, unsafe_allow_html=True) | |