MarlonKegel commited on
Commit
f291a48
ยท
1 Parent(s): 8be5285

adjusted output token settings and cost estimates

Browse files
Files changed (2) hide show
  1. README.md +1 -1
  2. rag_ui.py +35 -25
README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- title: Post-Neoliberalism LLM
3
  emoji: ๐Ÿ“š
4
  colorFrom: blue
5
  colorTo: green
 
1
  ---
2
+ title: Post-Neoliberalism Literature RAG
3
  emoji: ๐Ÿ“š
4
  colorFrom: blue
5
  colorTo: green
rag_ui.py CHANGED
@@ -275,7 +275,7 @@ retrieval_col, llm_col = st.columns(2)
275
  with retrieval_col:
276
  st.subheader("Retrieval Settings")
277
  selected_labels = st.multiselect(
278
- "Pick one, multiple, or none (default: search *all* sources):",
279
  source_labels,
280
  default=[]
281
  )
@@ -286,7 +286,7 @@ with retrieval_col:
286
  chunk_idx_pool = [i for key in selected_keys for i in source_groups[key]]
287
 
288
  context_chunk_count = st.number_input(
289
- "How many relevant chunks to include:",
290
  min_value=3,
291
  max_value=30,
292
  value=15,
@@ -297,25 +297,26 @@ with llm_col:
297
  st.subheader("LLM Settings")
298
  selected_model_name = st.selectbox("Choose an OpenAI model:", model_friendly_names, index=0)
299
  selected_model = model_label_map[selected_model_name]
300
-
301
  # Max output tokens UI -- show as "words"
302
  max_output_words = st.number_input(
303
- "Max answer length (in words):",
304
  min_value=50,
305
  max_value=2000,
306
  value=800,
307
  step=50
308
  )
309
- # We'll later convert words to tokens for API (rough rule: words ร— 1.5 = tokens)
310
-
311
  # Advanced controls:
312
  with st.expander("Advanced LLM Controls (Optional)"):
 
 
313
  temp_value = st.slider(
314
  "Model randomness (temperature): Lower = more deterministic outputs (only GPT-4.1 and 4.1-mini)",
315
  0.0, 0.5, value=0.3, step=0.05,
316
  disabled=selected_model not in TEMPERATURE_MODELS,
317
  key="temperature_slider"
318
  )
 
 
319
  reasoning_effort = st.selectbox(
320
  "Reasoning effort (only for o3 and o4-mini):",
321
  ["default", "low", "medium", "high"],
@@ -323,27 +324,36 @@ with llm_col:
323
  disabled=selected_model not in REASONING_MODELS,
324
  key="reasoning_effort"
325
  )
326
- if selected_model not in TEMPERATURE_MODELS:
327
- st.caption("Temperature is only used for GPT-4.1 and GPT-4.1-mini.")
328
- if selected_model not in REASONING_MODELS:
329
- st.caption("Reasoning effort is only used for o3 and o4-mini.")
330
-
331
  user_temperature = float(temp_value)
332
  user_reasoning = reasoning_effort if reasoning_effort != "default" else None
333
- # Convert words to tokens for API call
334
- user_max_output_tokens = int(max_output_words * 1.5)
335
-
336
- # --- Pricing estimate (dollars only) ---
337
- # Estimation: input tokens = query + all chunks, output tokens = as set
338
- chunk_token = 750 # keeping for estimate, ~500-600 words per chunk = 750 tokens
339
- input_tok = context_chunk_count * chunk_token + len(question.split()) * 1.3 + 1800
340
- output_tok = user_max_output_tokens
341
- rates = MODEL_PRICING[selected_model]
342
- input_cost = (input_tok / 1_000_000) * rates["input"]
343
- output_cost = (output_tok / 1_000_000) * rates["output"]
344
- total_cost = input_cost + output_cost
345
-
346
- st.info(f"**API cost estimate for this query:** ${total_cost:.5f}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
347
 
348
  if ask_clicked and question.strip():
349
  with st.spinner("Retrieving and generating answer..."):
 
275
  with retrieval_col:
276
  st.subheader("Retrieval Settings")
277
  selected_labels = st.multiselect(
278
+ "Select sources to search (default is _all_):",
279
  source_labels,
280
  default=[]
281
  )
 
286
  chunk_idx_pool = [i for key in selected_keys for i in source_groups[key]]
287
 
288
  context_chunk_count = st.number_input(
289
+ "Number of chunks passed on to the LLM:",
290
  min_value=3,
291
  max_value=30,
292
  value=15,
 
297
  st.subheader("LLM Settings")
298
  selected_model_name = st.selectbox("Choose an OpenAI model:", model_friendly_names, index=0)
299
  selected_model = model_label_map[selected_model_name]
 
300
  # Max output tokens UI -- show as "words"
301
  max_output_words = st.number_input(
302
+ "Max response length (# of words):",
303
  min_value=50,
304
  max_value=2000,
305
  value=800,
306
  step=50
307
  )
 
 
308
  # Advanced controls:
309
  with st.expander("Advanced LLM Controls (Optional)"):
310
+ if selected_model not in TEMPERATURE_MODELS:
311
+ st.caption("Temperature is only used for GPT-4.1 and GPT-4.1-mini.")
312
  temp_value = st.slider(
313
  "Model randomness (temperature): Lower = more deterministic outputs (only GPT-4.1 and 4.1-mini)",
314
  0.0, 0.5, value=0.3, step=0.05,
315
  disabled=selected_model not in TEMPERATURE_MODELS,
316
  key="temperature_slider"
317
  )
318
+ if selected_model not in REASONING_MODELS:
319
+ st.caption("Reasoning effort is only used for o3 and o4-mini.")
320
  reasoning_effort = st.selectbox(
321
  "Reasoning effort (only for o3 and o4-mini):",
322
  ["default", "low", "medium", "high"],
 
324
  disabled=selected_model not in REASONING_MODELS,
325
  key="reasoning_effort"
326
  )
 
 
 
 
 
327
  user_temperature = float(temp_value)
328
  user_reasoning = reasoning_effort if reasoning_effort != "default" else None
329
+ # Convert words to tokens for API call (model-aware token multiplier)
330
+ if selected_model in REASONING_MODELS:
331
+ if user_reasoning == "low":
332
+ output_token_multiplier = 7
333
+ elif user_reasoning == "medium" or user_reasoning is None:
334
+ output_token_multiplier = 12
335
+ elif user_reasoning == "high":
336
+ output_token_multiplier = 18
337
+ else:
338
+ output_token_multiplier = 12 # default
339
+ else:
340
+ output_token_multiplier = 1.5
341
+ user_max_output_tokens = int(max_output_words * output_token_multiplier)
342
+
343
+ # --- Pricing estimate (dollars only) ---
344
+ chunk_token = 750 # ~500-600 words per chunk โ‰ˆ 750 tokens
345
+ input_tok = context_chunk_count * chunk_token + len(question.split()) * 1.3 + 1800
346
+ output_tok = user_max_output_tokens
347
+ rates = MODEL_PRICING[selected_model]
348
+ input_cost = (input_tok / 1_000_000) * rates["input"]
349
+ output_cost = (output_tok / 1_000_000) * rates["output"]
350
+ total_cost = input_cost + output_cost
351
+
352
+ # Show price estimate, turn red if over $1
353
+ if total_cost > 1:
354
+ st.error(f"**API cost estimate for this query:** ${total_cost:.5f}")
355
+ else:
356
+ st.info(f"**API cost estimate for this query:** ${total_cost:.5f}")
357
 
358
  if ask_clicked and question.strip():
359
  with st.spinner("Retrieving and generating answer..."):