File size: 1,934 Bytes
1c67c92
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import streamlit as st
from dotenv import load_dotenv
import phoenix_helpers
import helpers 
import plotly.express as px


load_dotenv()
models = helpers.fetch_models()


if models:
    st.subheader("Select a Model")
    if "selected_model" not in st.session_state:
        st.session_state.selected_model = models[0]
    st.selectbox(
        "Choose a model to use:", 
        models, key = 'selected_model',
        index=models.index(st.session_state.selected_model) if st.session_state.selected_model in models else 0
    )

    datasetName = st.text_input("Dataset name")

    if datasetName:
        dataset = phoenix_helpers.get_dataset(name=datasetName)
        df = dataset = dataset.as_dataframe()
        eval_res = phoenix_helpers.dataEvalResults(st.session_state.selected_model,df)

        eval_res["hallucination_eval"] = eval_res["hallucination_eval"].apply(
            lambda x: x if x in ["hallucinated", "factual"] else "other"
        )
        eval_res["qa_eval"] = eval_res["qa_eval"].apply(
            lambda x: x if x in ["correct", "incorrect"] else "other"
        )

        response_counts = eval_res["hallucination_eval"].value_counts().reset_index()
        response_counts.columns = ["hallucination_eval", "count"]
        response_counts_qa = eval_res["qa_eval"].value_counts().reset_index()
        response_counts_qa.columns = ["qa_eval", "count"]

        st.dataframe(eval_res)

        fig = px.pie(
            response_counts,
            values="count",
            names="hallucination_eval",
            title=f"{st.session_state.selected_model} Model Hallucination",
            hole=0.4  
        )

        st.plotly_chart(fig)
        
        fig = px.pie(
            response_counts_qa,
            values="count",
            names="qa_eval",
            title=f"{st.session_state.selected_model} Model QA",
            hole=0.4  
        )

        st.plotly_chart(fig)