Spaces:
Build error
Build error
Ilia Tambovtsev commited on
Commit ·
0ed2f77
1
Parent(s): 75078e5
feat: simplify ui
Browse files- src/rag/__init__.py +8 -1
- src/webapp/app.py +28 -37
src/rag/__init__.py
CHANGED
|
@@ -1 +1,8 @@
|
|
| 1 |
-
from src.rag.storage import
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from src.rag.storage import (
|
| 2 |
+
ChromaSlideStore,
|
| 3 |
+
SearchResult,
|
| 4 |
+
SearchResultPage,
|
| 5 |
+
SearchResultPresentation,
|
| 6 |
+
SlideIndexer,
|
| 7 |
+
create_slides_database,
|
| 8 |
+
)
|
src/webapp/app.py
CHANGED
|
@@ -25,7 +25,7 @@ def format_page_results(result_page: SearchResultPage) -> str:
|
|
| 25 |
f"""\
|
| 26 |
### Page: {result_page.page_num+1}
|
| 27 |
**Best matching chunk:** `{result_page.matched_chunk.chunk_type}`\\
|
| 28 |
-
**Chunk distances:**
|
| 29 |
"""
|
| 30 |
)
|
| 31 |
|
|
@@ -84,7 +84,7 @@ def format_presentation_results(
|
|
| 84 |
# Format header
|
| 85 |
text = f"## {pdf_path.stem}\n"
|
| 86 |
text += f"\n{df_string}\n\n"
|
| 87 |
-
text += f"**
|
| 88 |
|
| 89 |
# Format individual slides
|
| 90 |
for slide in pres_result.slides:
|
|
@@ -112,8 +112,20 @@ class RagInterface:
|
|
| 112 |
self.interface = gr.Blocks()
|
| 113 |
|
| 114 |
# Config
|
|
|
|
| 115 |
self.output_height = 500
|
| 116 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
|
| 118 |
def launch(self, **kwargs):
|
| 119 |
"""Build Gradio interface layout"""
|
|
@@ -123,40 +135,21 @@ class RagInterface:
|
|
| 123 |
|
| 124 |
with gr.Row():
|
| 125 |
# Input components
|
| 126 |
-
with gr.
|
| 127 |
query = gr.Textbox(
|
| 128 |
label="Search Query",
|
| 129 |
placeholder="Enter your search query...",
|
| 130 |
lines=3,
|
| 131 |
elem_id="query",
|
| 132 |
)
|
| 133 |
-
with gr.
|
| 134 |
-
|
| 135 |
-
label="
|
| 136 |
-
scale=1,
|
| 137 |
-
minimum=1,
|
| 138 |
-
maximum=10,
|
| 139 |
-
value=1,
|
| 140 |
-
step=1,
|
| 141 |
-
elem_id="n_pres",
|
| 142 |
-
)
|
| 143 |
-
n_pages = gr.Number(
|
| 144 |
-
label="Number of pages per presentation",
|
| 145 |
scale=1,
|
| 146 |
minimum=1,
|
| 147 |
maximum=5,
|
| 148 |
value=3,
|
| 149 |
step=1,
|
| 150 |
-
elem_id="n_pages",
|
| 151 |
-
)
|
| 152 |
-
max_distance = gr.Number(
|
| 153 |
-
label="Maximum Distance",
|
| 154 |
-
scale=1,
|
| 155 |
-
minimum=0.1,
|
| 156 |
-
maximum=2.0,
|
| 157 |
-
value=2.0,
|
| 158 |
-
step=0.1,
|
| 159 |
-
elem_id="max_distance",
|
| 160 |
)
|
| 161 |
|
| 162 |
search_btn = gr.Button("Search", size="lg", scale=3)
|
|
@@ -166,8 +159,7 @@ class RagInterface:
|
|
| 166 |
|
| 167 |
# Results container
|
| 168 |
result_components = []
|
| 169 |
-
|
| 170 |
-
for i in range(n_results):
|
| 171 |
with gr.Group(visible=True) as g:
|
| 172 |
with gr.Tabs():
|
| 173 |
# Create 3 identical result tabs
|
|
@@ -181,7 +173,6 @@ class RagInterface:
|
|
| 181 |
container=False,
|
| 182 |
visible=False,
|
| 183 |
)
|
| 184 |
-
certainty = gr.Markdown()
|
| 185 |
|
| 186 |
with gr.Tab(f"Details"):
|
| 187 |
# Results text
|
|
@@ -191,24 +182,24 @@ class RagInterface:
|
|
| 191 |
height=self.output_height,
|
| 192 |
visible=False,
|
| 193 |
)
|
| 194 |
-
|
|
|
|
| 195 |
|
| 196 |
def fill_components(inputs):
|
|
|
|
| 197 |
new_results = self.store.search_query_presentations(
|
| 198 |
query=inputs[query],
|
| 199 |
-
n_results=inputs[n_pres],
|
| 200 |
-
max_distance=inputs[max_distance],
|
| 201 |
-
n_slides_per_presentation=inputs[n_pages],
|
| 202 |
)
|
| 203 |
outputs = []
|
| 204 |
-
for i in range(
|
| 205 |
if i < len(new_results):
|
| 206 |
r = new_results[i]
|
| 207 |
text, pdf_path, page = format_presentation_results(r)
|
| 208 |
g = gr.Group(visible=True)
|
| 209 |
-
pdf = PDF(value=str(pdf_path), starting_page=page+1)
|
| 210 |
-
|
| 211 |
-
|
|
|
|
| 212 |
else:
|
| 213 |
g = gr.Group(visible=False)
|
| 214 |
pdf = PDF(visible=False)
|
|
@@ -221,7 +212,7 @@ class RagInterface:
|
|
| 221 |
# Wire up the search function
|
| 222 |
search_btn.click(
|
| 223 |
fn=fill_components,
|
| 224 |
-
inputs={query,
|
| 225 |
outputs=result_components,
|
| 226 |
)
|
| 227 |
|
|
|
|
| 25 |
f"""\
|
| 26 |
### Page: {result_page.page_num+1}
|
| 27 |
**Best matching chunk:** `{result_page.matched_chunk.chunk_type}`\\
|
| 28 |
+
**Chunk distances:**
|
| 29 |
"""
|
| 30 |
)
|
| 31 |
|
|
|
|
| 84 |
# Format header
|
| 85 |
text = f"## {pdf_path.stem}\n"
|
| 86 |
text += f"\n{df_string}\n\n"
|
| 87 |
+
text += f"**Rank Score:** {pres_result.rank_score:.4f}\n"
|
| 88 |
|
| 89 |
# Format individual slides
|
| 90 |
for slide in pres_result.slides:
|
|
|
|
| 112 |
self.interface = gr.Blocks()
|
| 113 |
|
| 114 |
# Config
|
| 115 |
+
self.n_outputs = 7
|
| 116 |
self.output_height = 500
|
| 117 |
|
| 118 |
+
def rate_response(self, score: float):
|
| 119 |
+
best_threshold = 0.45
|
| 120 |
+
ok_threshold = 0.6
|
| 121 |
+
if score < best_threshold:
|
| 122 |
+
return "👍" # "💯"
|
| 123 |
+
if score < ok_threshold:
|
| 124 |
+
return "👌" # "¯\_(ツ)_/¯"
|
| 125 |
+
return "👎"
|
| 126 |
+
|
| 127 |
+
def calculate_params(self, search_depth: int):
|
| 128 |
+
return search_depth * 15
|
| 129 |
|
| 130 |
def launch(self, **kwargs):
|
| 131 |
"""Build Gradio interface layout"""
|
|
|
|
| 135 |
|
| 136 |
with gr.Row():
|
| 137 |
# Input components
|
| 138 |
+
with gr.Row():
|
| 139 |
query = gr.Textbox(
|
| 140 |
label="Search Query",
|
| 141 |
placeholder="Enter your search query...",
|
| 142 |
lines=3,
|
| 143 |
elem_id="query",
|
| 144 |
)
|
| 145 |
+
with gr.Column():
|
| 146 |
+
search_depth = gr.Slider(
|
| 147 |
+
label="Depth of Search",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 148 |
scale=1,
|
| 149 |
minimum=1,
|
| 150 |
maximum=5,
|
| 151 |
value=3,
|
| 152 |
step=1,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 153 |
)
|
| 154 |
|
| 155 |
search_btn = gr.Button("Search", size="lg", scale=3)
|
|
|
|
| 159 |
|
| 160 |
# Results container
|
| 161 |
result_components = []
|
| 162 |
+
for i in range(self.n_outputs):
|
|
|
|
| 163 |
with gr.Group(visible=True) as g:
|
| 164 |
with gr.Tabs():
|
| 165 |
# Create 3 identical result tabs
|
|
|
|
| 173 |
container=False,
|
| 174 |
visible=False,
|
| 175 |
)
|
|
|
|
| 176 |
|
| 177 |
with gr.Tab(f"Details"):
|
| 178 |
# Results text
|
|
|
|
| 182 |
height=self.output_height,
|
| 183 |
visible=False,
|
| 184 |
)
|
| 185 |
+
certainty = gr.Markdown()
|
| 186 |
+
result_components.extend([pdf, certainty, details_text])
|
| 187 |
|
| 188 |
def fill_components(inputs):
|
| 189 |
+
self.calculate_params(search_depth=inputs[search_depth])
|
| 190 |
new_results = self.store.search_query_presentations(
|
| 191 |
query=inputs[query],
|
|
|
|
|
|
|
|
|
|
| 192 |
)
|
| 193 |
outputs = []
|
| 194 |
+
for i in range(self.n_outputs):
|
| 195 |
if i < len(new_results):
|
| 196 |
r = new_results[i]
|
| 197 |
text, pdf_path, page = format_presentation_results(r)
|
| 198 |
g = gr.Group(visible=True)
|
| 199 |
+
pdf = PDF(value=str(pdf_path), starting_page=page + 1, visible=True)
|
| 200 |
+
certainty_symbol = self.rate_response(r.rank_score)
|
| 201 |
+
certainty = gr.Markdown(value=f"# Certainty: {certainty_symbol}", visible=True)
|
| 202 |
+
description = gr.Markdown(value=text, visible=True)
|
| 203 |
else:
|
| 204 |
g = gr.Group(visible=False)
|
| 205 |
pdf = PDF(visible=False)
|
|
|
|
| 212 |
# Wire up the search function
|
| 213 |
search_btn.click(
|
| 214 |
fn=fill_components,
|
| 215 |
+
inputs={query, search_depth},
|
| 216 |
outputs=result_components,
|
| 217 |
)
|
| 218 |
|