thrinadhn's picture
Create app.py
9b2860c verified
import streamlit as st
from dotenv import load_dotenv
load_dotenv()
st.title("πŸ€– LLM Evaluation")
pages = [
st.Page("GenerateModelTraces.py", title="Generate Traces"),
st.Page("LLMasjudge.py", title="LLM as a Judge"),
st.Page("evaluationwithexistingdata.py", title="Evaluate with existing data"),
st.Page("experiments.py", title="Run Experiment"),
]
pg = st.navigation(pages)
pg.run()
# action = st.radio("What would you like to do?", ["Generate Traces for Model", "Evaluate Model"])
# models = helpers.fetch_models()
# if models:
# if action == "Generate Traces for Model":
# st.subheader("Select a Model")
# if "selected_model" not in st.session_state:
# st.session_state.selected_model = models[0]
# st.selectbox(
# "Choose a model to use:",
# models, key = 'selected_model',
# index=models.index(st.session_state.selected_model) if st.session_state.selected_model in models else 0
# )
# if st.session_state.selected_model:
# st.subheader("Enter a Prompt")
# if "prompt" not in st.session_state:
# st.session_state.prompt = ""
# st.session_state.prompt = st.text_area("Enter your prompt:", value=st.session_state.prompt)
# if st.button("Generate Content", on_click=callback):
# if st.session_state.prompt:
# st.subheader("Model Output")
# st.session_state.generated_content = helpers.generate_content(st.session_state.selected_model, st.session_state.prompt)
# st.write(st.session_state.generated_content)
# st.session_state.spans_df = phoenix_helpers.get_spans_df()
# # print(spans_df)
# st.dataframe(st.session_state.spans_df)
# else:
# st.write("Enter something to generate content.")
# elif action == "Evaluate Model":
# st.session_state.spans_df = phoenix_helpers.get_spans_df()
# st.subheader("Evaluate LLM")
# if "evaluation_result" not in st.session_state:
# st.session_state.evaluation_result = None
# if (st.button("Evaluate", on_click=callback2) or st.session_state.eval_btn_clicked):
# if "eval_model" not in st.session_state:
# st.session_state.eval_model = models[0]
# st.selectbox(
# "Choose a model to use for evaluation:",
# models, key = 'eval_model',
# index=models.index(st.session_state.eval_model) if st.session_state.eval_model in models else 0,
# )
# if st.session_state.eval_model:
# st.session_state.evaluation_result = phoenix_helpers.evaluate_model(st.session_state.spans_df, st.session_state.eval_model)
# st.write(st.session_state.evaluation_result)