thrinadhn commited on
Commit
1c67c92
·
verified ·
1 Parent(s): b1b866f

Create evaluationwithexistingdata.py

Browse files
Files changed (1) hide show
  1. evaluationwithexistingdata.py +64 -0
evaluationwithexistingdata.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from dotenv import load_dotenv
3
+ import phoenix_helpers
4
+ import helpers
5
+ import plotly.express as px
6
+
7
+
8
+ load_dotenv()
9
+ models = helpers.fetch_models()
10
+
11
+
12
+ if models:
13
+ st.subheader("Select a Model")
14
+ if "selected_model" not in st.session_state:
15
+ st.session_state.selected_model = models[0]
16
+ st.selectbox(
17
+ "Choose a model to use:",
18
+ models, key = 'selected_model',
19
+ index=models.index(st.session_state.selected_model) if st.session_state.selected_model in models else 0
20
+ )
21
+
22
+ datasetName = st.text_input("Dataset name")
23
+
24
+ if datasetName:
25
+ dataset = phoenix_helpers.get_dataset(name=datasetName)
26
+ df = dataset = dataset.as_dataframe()
27
+ eval_res = phoenix_helpers.dataEvalResults(st.session_state.selected_model,df)
28
+
29
+ eval_res["hallucination_eval"] = eval_res["hallucination_eval"].apply(
30
+ lambda x: x if x in ["hallucinated", "factual"] else "other"
31
+ )
32
+ eval_res["qa_eval"] = eval_res["qa_eval"].apply(
33
+ lambda x: x if x in ["correct", "incorrect"] else "other"
34
+ )
35
+
36
+ response_counts = eval_res["hallucination_eval"].value_counts().reset_index()
37
+ response_counts.columns = ["hallucination_eval", "count"]
38
+ response_counts_qa = eval_res["qa_eval"].value_counts().reset_index()
39
+ response_counts_qa.columns = ["qa_eval", "count"]
40
+
41
+ st.dataframe(eval_res)
42
+
43
+ fig = px.pie(
44
+ response_counts,
45
+ values="count",
46
+ names="hallucination_eval",
47
+ title=f"{st.session_state.selected_model} Model Hallucination",
48
+ hole=0.4
49
+ )
50
+
51
+ st.plotly_chart(fig)
52
+
53
+ fig = px.pie(
54
+ response_counts_qa,
55
+ values="count",
56
+ names="qa_eval",
57
+ title=f"{st.session_state.selected_model} Model QA",
58
+ hole=0.4
59
+ )
60
+
61
+ st.plotly_chart(fig)
62
+
63
+
64
+