Halemo commited on
Commit
2ec294e
·
1 Parent(s): fbadd2a

convert colivara demo only search feature

Browse files
Files changed (1) hide show
  1. app.py +70 -112
app.py CHANGED
@@ -4,146 +4,104 @@ from colivara_py import Colivara
4
  import base64
5
  from PIL import Image
6
  from io import BytesIO
 
 
 
7
 
8
- # Initialize session state variables
9
- if "uploaded_file_names" not in st.session_state:
10
- st.session_state["uploaded_file_names"] = set()
11
  if "search_results" not in st.session_state:
12
  st.session_state["search_results"] = []
13
- if "collections_list" not in st.session_state:
14
- st.session_state["collections_list"] = ""
15
-
16
 
17
- def validate_api_key(api_key):
18
- # check if the api key is valid
19
- try:
20
- client = Colivara(base_url="https://api.colivara.com", api_key=api_key)
21
- client.list_collections()
22
- return True
23
- except Exception:
24
- return False
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
 
27
  # Ask for API key from user
28
  st.title("Colivara Demo")
29
- st.write("Welcome to the Colivara Demo.")
30
- st.subheader("API Key")
31
- api_key = st.text_input("API Key", type="password")
32
- if not api_key or not validate_api_key(api_key):
33
- st.write("Please enter your API key to proceed.")
34
- st.markdown("If you don't have one, get yours [here](https://colivara.com).")
35
- st.stop()
36
 
37
- client = Colivara(base_url="https://api.colivara.com", api_key=api_key)
38
- st.divider()
39
- st.subheader("Features Overview")
40
- st.write("Each section below is independent, and you can use them separately.")
41
 
 
 
42
 
43
- st.divider()
44
- # Create Collection Section
45
- st.subheader("Create Collection")
46
- collection_name = st.text_input(
47
- "Collection Name",
48
- help="Enter the name of the collection to create",
49
- )
50
- if st.button("Create Collection"):
51
- if not collection_name:
52
- st.error("Please enter a collection name to create the collection.")
53
- else:
54
- with st.spinner("Creating collection..."):
55
- client.create_collection(collection_name)
56
- st.success(f"Collection '{collection_name}' successfully created.")
57
- st.divider()
58
- # List Collections Section
59
- st.subheader("List Collections")
60
- if st.button("List Collections"):
61
- with st.spinner("Fetching collections..."):
62
- collections = client.list_collections()
63
- collections_text = "\n".join([collection.name for collection in collections])
64
- st.session_state["collections_list"] = collections_text
65
 
66
- if st.session_state["collections_list"]:
67
- st.code(st.session_state["collections_list"])
68
- st.divider()
69
- # Upsert Documents Section
70
- st.subheader("Upsert Documents")
71
- uploaded_files = st.file_uploader(
72
- "Upload Documents",
73
- accept_multiple_files=True,
74
- help="Upload your documents here, most extensions are supported (pdf, docx, png, jpg, xlsx, etc.)",
75
  )
76
- selected_collection = st.text_input(
77
- "Collection Name", help="Enter the collection name to store the documents in."
 
 
 
 
 
 
 
 
 
 
 
78
  )
79
 
80
- if st.button("Upsert Files"):
81
- if not uploaded_files or not selected_collection:
82
- st.error(
83
- "Please upload your documents and enter the collection name to proceed."
84
- )
85
- else:
86
- # Filter new files only
87
- new_files = [
88
- file
89
- for file in uploaded_files
90
- if file.name not in st.session_state["uploaded_file_names"]
91
- ]
92
- if not new_files:
93
- st.info("No new files to upsert. Please upload new files to proceed.")
94
- else:
95
- with st.spinner("Upserting Files..."):
96
- progress_bar = st.progress(0)
97
- total_files = len(new_files)
98
- for idx, file in enumerate(new_files):
99
- st.session_state["uploaded_file_names"].add(file.name)
100
- encoded_file = base64.b64encode(file.read()).decode("utf-8")
101
- client.upsert_document(
102
- name=file.name,
103
- document_base64=encoded_file,
104
- collection_name=selected_collection,
105
- wait=True,
106
- )
107
- progress_bar.progress((idx + 1) / total_files)
108
- st.success(
109
- f"{len(new_files)} new files successfully upserted to the collection '{selected_collection}'."
110
- )
111
- st.divider()
112
- # Search and Retrieve Documents Section
113
  st.subheader("Search and Retrieve Documents")
114
 
115
- # Add collection name input for search
116
- search_collection_name = st.text_input(
117
- "Collection Name for Search", help="Enter the collection name to search in."
118
- )
119
-
120
  query = st.text_input(
121
- "Search Query", help="Enter the search query to retrieve documents."
122
  )
123
-
124
  top_k = st.slider(
125
  "Number of Top Documents to Retrieve",
126
  min_value=1,
127
  max_value=10,
128
  value=3,
129
- help="Select the number of top documents to retrieve.",
130
  )
131
 
 
132
  if st.button("Search"):
133
- if not search_collection_name:
134
- st.error("Please enter the collection name for the search.")
135
- elif not query:
136
- st.error("Please enter a search query to retrieve documents.")
137
  else:
138
  with st.spinner("Searching..."):
139
- results = client.search(
140
- query=query, collection_name=search_collection_name, top_k=top_k
141
- )
142
- st.session_state["search_results"] = results.results
 
 
 
143
 
144
- # Display Search Results
145
- if st.session_state["search_results"]:
146
- st.write("Search Results:")
147
  cols = st.columns(2)
148
  for idx, result in enumerate(st.session_state["search_results"]):
149
  img_base64 = result.img_base64
@@ -154,7 +112,7 @@ if st.session_state["search_results"]:
154
  img,
155
  caption=f"Document: {document_name}, Page: {page_number}",
156
  )
157
- st.success("Search completed.")
158
 
159
  st.markdown("----")
160
- st.markdown("Developed by Abdulhaleem from TJM Labs")
 
4
  import base64
5
  from PIL import Image
6
  from io import BytesIO
7
+ import os
8
+
9
+ API_KEY = os.getenv("COLIVARA_API_KEY")
10
 
 
 
 
11
  if "search_results" not in st.session_state:
12
  st.session_state["search_results"] = []
 
 
 
13
 
14
+ client = Colivara(base_url="https://api.colivara.com", api_key=API_KEY)
15
+ # Add badges at the top of the page
16
+ st.markdown(
17
+ """
18
+ <div style="display: flex; gap: 10px; margin-bottom: 20px;">
19
+ <a href="https://discord.gg/DtGRxWuj8y" target="_blank">
20
+ <img src="https://dcbadge.limes.pink/api/server/https://discord.gg/DtGRxWuj8y" alt="Discord">
21
+ </a>
22
+ <a href="https://www.colivara.com" target="_blank">
23
+ <img src="https://img.shields.io/badge/Website-0078D7?style=for-the-badge&logo=internetexplorer&logoColor=white" alt="Website">
24
+ </a>
25
+ <a href="https://docs.colivara.com" target="_blank">
26
+ <img src="https://img.shields.io/badge/Docs-217346?style=for-the-badge&logo=readthedocs&logoColor=white" alt="Docs">
27
+ </a>
28
+ <a href="https://github.com/tjmlabs/ColiVara" target="_blank">
29
+ <img src="https://img.shields.io/badge/GitHub-181717?style=for-the-badge&logo=github&logoColor=white" alt="GitHub">
30
+ </a>
31
+ </div>
32
+ """,
33
+ unsafe_allow_html=True,
34
+ )
35
 
36
 
37
  # Ask for API key from user
38
  st.title("Colivara Demo")
 
 
 
 
 
 
 
39
 
40
+ st.markdown(
41
+ """
42
+ ### Welcome to the Colivara Search Demo
43
+ This demo allows you to search through a sample NVIDIA quarterly report (Form 10-Q).
44
 
45
+ - Enter a search query in the input box below.
46
+ - Retrieve the most relevant pages from the document.
47
 
48
+ **Example Queries:**
49
+ ```
50
+ 1. What were Nvidia's revenues for the most recent quarter?
51
+ 2. What are the main factors that are driving Nvidia's growth?
52
+ 3. What are the risks and challenges that Nvidia is facing?
53
+ ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
 
55
+ Try it out and see how the AI retrieves the information you need efficiently.
56
+ """
 
 
 
 
 
 
 
57
  )
58
+
59
+ # Display placeholder for document details with a badge for the PDF
60
+ st.markdown(
61
+ """
62
+ #### Document Overview:
63
+ **Title:** NVIDIA Corporation Quarterly Report (Form 10-Q, October 27, 2024)
64
+ **Type:** PDF
65
+ **Pages:** 50+
66
+ <a href="https://d18rn0p25nwr6d.cloudfront.net/CIK-0001045810/ed2a395c-5e9b-4411-8b4a-a718d192155a.pdf" target="_blank">
67
+ <img src="https://img.shields.io/badge/View%20PDF-EF8D21?style=for-the-badge&logo=adobeacrobatreader&logoColor=white" alt="View PDF">
68
+ </a>
69
+ """,
70
+ unsafe_allow_html=True,
71
  )
72
 
73
+ # Search and retrieve documents
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
  st.subheader("Search and Retrieve Documents")
75
 
76
+ # User inputs for search
 
 
 
 
77
  query = st.text_input(
78
+ "Enter your search query:", help="Type a query to search through the NVIDIA report."
79
  )
 
80
  top_k = st.slider(
81
  "Number of Top Documents to Retrieve",
82
  min_value=1,
83
  max_value=10,
84
  value=3,
85
+ help="Select the number of top pages to retrieve.",
86
  )
87
 
88
+ collection_name = "Demo-Final"
89
  if st.button("Search"):
90
+ if not query:
91
+ st.error("Please enter a search query to retrieve results.")
 
 
92
  else:
93
  with st.spinner("Searching..."):
94
+ try:
95
+ results = client.search(
96
+ query=query, collection_name=collection_name, top_k=top_k
97
+ )
98
+ st.session_state["search_results"] = results.results
99
+ except Exception as e:
100
+ st.error(f"Error during search: {str(e)}")
101
 
102
+ # Display search results
103
+ if "search_results" in st.session_state and st.session_state["search_results"]:
104
+ st.write("### Search Results")
105
  cols = st.columns(2)
106
  for idx, result in enumerate(st.session_state["search_results"]):
107
  img_base64 = result.img_base64
 
112
  img,
113
  caption=f"Document: {document_name}, Page: {page_number}",
114
  )
115
+ st.success("Search completed successfully.")
116
 
117
  st.markdown("----")
118
+ st.markdown("Developed by Abdulhaleem from TJM Labs.")