| import pandas as pd |
| import streamlit as st |
| from transformers import pipeline |
| import os |
|
|
| |
| model1 = pipeline("text-classification", model="vectara/hallucination_evaluation_model") |
| model2 = pipeline("text-classification", model="sileod/deberta-v3-base-tasksource-nli") |
|
|
| |
| examples = { |
| 'good': { |
| 'question': "What causes rainbows to appear in the sky?", |
| 'explanation': "Rainbows appear when sunlight is refracted, dispersed, and reflected inside water droplets in the atmosphere, resulting in a spectrum of light appearing in the sky.", |
| 'ground_truth': "Correct" |
| }, |
| 'bad': { |
| 'question': "What causes rainbows to appear in the sky?", |
| 'explanation': "Rainbows happen because light in the sky gets mixed up and sometimes shows colors when it's raining or when there is water around.", |
| 'ground_truth': "Incorrect" |
| } |
| } |
|
|
| |
| def evaluate_explanation(question, explanation): |
| results1 = model1(explanation) |
| results2 = model2(explanation) |
| return results1, results2 |
|
|
| |
| def compare_vectors(v1, v2): |
| diff = abs(v1[0]['score'] - v2[0]['score']) |
| return diff |
|
|
| |
| st.title('Dual Model Evaluation of Explanations') |
|
|
| |
| def check_password(): |
| def password_entered(): |
| if password_input == os.getenv('PASSWORD'): |
| st.session_state['password_correct'] = True |
| else: |
| st.error("Incorrect Password, please try again.") |
|
|
| password_input = st.text_input("Enter Password:", type="password") |
| submit_button = st.button("Submit", on_click=password_entered) |
|
|
| if submit_button and not st.session_state.get('password_correct', False): |
| st.error("Please enter a valid password to access the demo.") |
|
|
| |
| if not st.session_state.get('password_correct', False): |
| check_password() |
| else: |
| st.sidebar.success("Password Verified. Proceed with the demo.") |
|
|
| input_type = st.radio("Choose input type:", ('Use predefined example', 'Enter your own')) |
| if input_type == 'Use predefined example': |
| example_type = st.radio("Select an example type:", ('good', 'bad')) |
| selected_example = examples[example_type] |
| question = selected_example['question'] |
| explanation = selected_example['explanation'] |
| ground_truth = selected_example['ground_truth'] |
| else: |
| question = st.text_input('Enter your question:', '') |
| explanation = st.text_input('Enter your explanation:', '') |
| ground_truth = st.text_input('Enter ground truth:', '') |
|
|
| if st.button('Evaluate Explanation'): |
| if question and explanation and ground_truth: |
| results1, results2 = evaluate_explanation(question, explanation) |
| diff = compare_vectors(results1, results2) |
| st.write('### Model 1 Results') |
| st.write(results1) |
| st.write('### Model 2 Results') |
| st.write(results2) |
| st.write(f'### Score Difference: {diff}') |
| else: |
| st.error('Please enter a question, explanation, and ground truth to evaluate.') |