File size: 2,972 Bytes
9b2860c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import streamlit as st
from dotenv import load_dotenv

load_dotenv()

st.title("🤖 LLM Evaluation")
pages = [
        st.Page("GenerateModelTraces.py", title="Generate Traces"),
        st.Page("LLMasjudge.py", title="LLM as a Judge"),
        st.Page("evaluationwithexistingdata.py", title="Evaluate with existing data"),
        st.Page("experiments.py", title="Run Experiment"),
    ]

pg = st.navigation(pages)
pg.run()


# action = st.radio("What would you like to do?", ["Generate Traces for Model", "Evaluate Model"])
# models = helpers.fetch_models()
# if models:
#     if action == "Generate Traces for Model":
#         st.subheader("Select a Model")
#         if "selected_model" not in st.session_state:
#             st.session_state.selected_model = models[0]
#         st.selectbox(
#             "Choose a model to use:", 
#             models, key = 'selected_model',
#             index=models.index(st.session_state.selected_model) if st.session_state.selected_model in models else 0
#         )

#         if st.session_state.selected_model:
#             st.subheader("Enter a Prompt")
#             if "prompt" not in st.session_state:
#                 st.session_state.prompt = ""
#             st.session_state.prompt = st.text_area("Enter your prompt:", value=st.session_state.prompt)
#             if st.button("Generate Content", on_click=callback):
#                 if st.session_state.prompt:
#                     st.subheader("Model Output")
#                     st.session_state.generated_content = helpers.generate_content(st.session_state.selected_model, st.session_state.prompt)
#                     st.write(st.session_state.generated_content)
#                     st.session_state.spans_df = phoenix_helpers.get_spans_df()
#                     # print(spans_df)
#                     st.dataframe(st.session_state.spans_df)
#                 else:
#                     st.write("Enter something to generate content.")
#     elif action == "Evaluate Model":
#         st.session_state.spans_df = phoenix_helpers.get_spans_df()
#         st.subheader("Evaluate LLM")

#         if "evaluation_result" not in st.session_state:
#             st.session_state.evaluation_result = None
#         if (st.button("Evaluate", on_click=callback2) or st.session_state.eval_btn_clicked):
#             if "eval_model" not in st.session_state:
#                 st.session_state.eval_model = models[0]
#             st.selectbox(
#                 "Choose a model to use for evaluation:", 
#                 models, key = 'eval_model',
#                 index=models.index(st.session_state.eval_model) if st.session_state.eval_model in models else 0,
#             )
#             if st.session_state.eval_model:
#                 st.session_state.evaluation_result = phoenix_helpers.evaluate_model(st.session_state.spans_df, st.session_state.eval_model)
#                 st.write(st.session_state.evaluation_result)