Spaces:
Runtime error
Runtime error
MarlonKegel commited on
Commit ยท
f291a48
1
Parent(s): 8be5285
adjusted output token settings and cost estimates
Browse files
README.md
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
---
|
| 2 |
-
title: Post-Neoliberalism
|
| 3 |
emoji: ๐
|
| 4 |
colorFrom: blue
|
| 5 |
colorTo: green
|
|
|
|
| 1 |
---
|
| 2 |
+
title: Post-Neoliberalism Literature RAG
|
| 3 |
emoji: ๐
|
| 4 |
colorFrom: blue
|
| 5 |
colorTo: green
|
rag_ui.py
CHANGED
|
@@ -275,7 +275,7 @@ retrieval_col, llm_col = st.columns(2)
|
|
| 275 |
with retrieval_col:
|
| 276 |
st.subheader("Retrieval Settings")
|
| 277 |
selected_labels = st.multiselect(
|
| 278 |
-
"
|
| 279 |
source_labels,
|
| 280 |
default=[]
|
| 281 |
)
|
|
@@ -286,7 +286,7 @@ with retrieval_col:
|
|
| 286 |
chunk_idx_pool = [i for key in selected_keys for i in source_groups[key]]
|
| 287 |
|
| 288 |
context_chunk_count = st.number_input(
|
| 289 |
-
"
|
| 290 |
min_value=3,
|
| 291 |
max_value=30,
|
| 292 |
value=15,
|
|
@@ -297,25 +297,26 @@ with llm_col:
|
|
| 297 |
st.subheader("LLM Settings")
|
| 298 |
selected_model_name = st.selectbox("Choose an OpenAI model:", model_friendly_names, index=0)
|
| 299 |
selected_model = model_label_map[selected_model_name]
|
| 300 |
-
|
| 301 |
# Max output tokens UI -- show as "words"
|
| 302 |
max_output_words = st.number_input(
|
| 303 |
-
"Max
|
| 304 |
min_value=50,
|
| 305 |
max_value=2000,
|
| 306 |
value=800,
|
| 307 |
step=50
|
| 308 |
)
|
| 309 |
-
# We'll later convert words to tokens for API (rough rule: words ร 1.5 = tokens)
|
| 310 |
-
|
| 311 |
# Advanced controls:
|
| 312 |
with st.expander("Advanced LLM Controls (Optional)"):
|
|
|
|
|
|
|
| 313 |
temp_value = st.slider(
|
| 314 |
"Model randomness (temperature): Lower = more deterministic outputs (only GPT-4.1 and 4.1-mini)",
|
| 315 |
0.0, 0.5, value=0.3, step=0.05,
|
| 316 |
disabled=selected_model not in TEMPERATURE_MODELS,
|
| 317 |
key="temperature_slider"
|
| 318 |
)
|
|
|
|
|
|
|
| 319 |
reasoning_effort = st.selectbox(
|
| 320 |
"Reasoning effort (only for o3 and o4-mini):",
|
| 321 |
["default", "low", "medium", "high"],
|
|
@@ -323,27 +324,36 @@ with llm_col:
|
|
| 323 |
disabled=selected_model not in REASONING_MODELS,
|
| 324 |
key="reasoning_effort"
|
| 325 |
)
|
| 326 |
-
if selected_model not in TEMPERATURE_MODELS:
|
| 327 |
-
st.caption("Temperature is only used for GPT-4.1 and GPT-4.1-mini.")
|
| 328 |
-
if selected_model not in REASONING_MODELS:
|
| 329 |
-
st.caption("Reasoning effort is only used for o3 and o4-mini.")
|
| 330 |
-
|
| 331 |
user_temperature = float(temp_value)
|
| 332 |
user_reasoning = reasoning_effort if reasoning_effort != "default" else None
|
| 333 |
-
# Convert words to tokens for API call
|
| 334 |
-
|
| 335 |
-
|
| 336 |
-
|
| 337 |
-
|
| 338 |
-
|
| 339 |
-
|
| 340 |
-
|
| 341 |
-
|
| 342 |
-
|
| 343 |
-
|
| 344 |
-
|
| 345 |
-
|
| 346 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 347 |
|
| 348 |
if ask_clicked and question.strip():
|
| 349 |
with st.spinner("Retrieving and generating answer..."):
|
|
|
|
| 275 |
with retrieval_col:
|
| 276 |
st.subheader("Retrieval Settings")
|
| 277 |
selected_labels = st.multiselect(
|
| 278 |
+
"Select sources to search (default is _all_):",
|
| 279 |
source_labels,
|
| 280 |
default=[]
|
| 281 |
)
|
|
|
|
| 286 |
chunk_idx_pool = [i for key in selected_keys for i in source_groups[key]]
|
| 287 |
|
| 288 |
context_chunk_count = st.number_input(
|
| 289 |
+
"Number of chunks passed on to the LLM:",
|
| 290 |
min_value=3,
|
| 291 |
max_value=30,
|
| 292 |
value=15,
|
|
|
|
| 297 |
st.subheader("LLM Settings")
|
| 298 |
selected_model_name = st.selectbox("Choose an OpenAI model:", model_friendly_names, index=0)
|
| 299 |
selected_model = model_label_map[selected_model_name]
|
|
|
|
| 300 |
# Max output tokens UI -- show as "words"
|
| 301 |
max_output_words = st.number_input(
|
| 302 |
+
"Max response length (# of words):",
|
| 303 |
min_value=50,
|
| 304 |
max_value=2000,
|
| 305 |
value=800,
|
| 306 |
step=50
|
| 307 |
)
|
|
|
|
|
|
|
| 308 |
# Advanced controls:
|
| 309 |
with st.expander("Advanced LLM Controls (Optional)"):
|
| 310 |
+
if selected_model not in TEMPERATURE_MODELS:
|
| 311 |
+
st.caption("Temperature is only used for GPT-4.1 and GPT-4.1-mini.")
|
| 312 |
temp_value = st.slider(
|
| 313 |
"Model randomness (temperature): Lower = more deterministic outputs (only GPT-4.1 and 4.1-mini)",
|
| 314 |
0.0, 0.5, value=0.3, step=0.05,
|
| 315 |
disabled=selected_model not in TEMPERATURE_MODELS,
|
| 316 |
key="temperature_slider"
|
| 317 |
)
|
| 318 |
+
if selected_model not in REASONING_MODELS:
|
| 319 |
+
st.caption("Reasoning effort is only used for o3 and o4-mini.")
|
| 320 |
reasoning_effort = st.selectbox(
|
| 321 |
"Reasoning effort (only for o3 and o4-mini):",
|
| 322 |
["default", "low", "medium", "high"],
|
|
|
|
| 324 |
disabled=selected_model not in REASONING_MODELS,
|
| 325 |
key="reasoning_effort"
|
| 326 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 327 |
user_temperature = float(temp_value)
|
| 328 |
user_reasoning = reasoning_effort if reasoning_effort != "default" else None
|
| 329 |
+
# Convert words to tokens for API call (model-aware token multiplier)
|
| 330 |
+
if selected_model in REASONING_MODELS:
|
| 331 |
+
if user_reasoning == "low":
|
| 332 |
+
output_token_multiplier = 7
|
| 333 |
+
elif user_reasoning == "medium" or user_reasoning is None:
|
| 334 |
+
output_token_multiplier = 12
|
| 335 |
+
elif user_reasoning == "high":
|
| 336 |
+
output_token_multiplier = 18
|
| 337 |
+
else:
|
| 338 |
+
output_token_multiplier = 12 # default
|
| 339 |
+
else:
|
| 340 |
+
output_token_multiplier = 1.5
|
| 341 |
+
user_max_output_tokens = int(max_output_words * output_token_multiplier)
|
| 342 |
+
|
| 343 |
+
# --- Pricing estimate (dollars only) ---
|
| 344 |
+
chunk_token = 750 # ~500-600 words per chunk โ 750 tokens
|
| 345 |
+
input_tok = context_chunk_count * chunk_token + len(question.split()) * 1.3 + 1800
|
| 346 |
+
output_tok = user_max_output_tokens
|
| 347 |
+
rates = MODEL_PRICING[selected_model]
|
| 348 |
+
input_cost = (input_tok / 1_000_000) * rates["input"]
|
| 349 |
+
output_cost = (output_tok / 1_000_000) * rates["output"]
|
| 350 |
+
total_cost = input_cost + output_cost
|
| 351 |
+
|
| 352 |
+
# Show price estimate, turn red if over $1
|
| 353 |
+
if total_cost > 1:
|
| 354 |
+
st.error(f"**API cost estimate for this query:** ${total_cost:.5f}")
|
| 355 |
+
else:
|
| 356 |
+
st.info(f"**API cost estimate for this query:** ${total_cost:.5f}")
|
| 357 |
|
| 358 |
if ask_clicked and question.strip():
|
| 359 |
with st.spinner("Retrieving and generating answer..."):
|