Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| from openai import OpenAI | |
| import pandas as pd | |
| import json | |
| import time | |
| import re | |
| # Configuration initiale de Streamlit | |
| st.set_page_config(page_title='Évaluation LLM Way2Call', layout='wide') | |
| st.title('Évaluation de la Performance d’un LLM') | |
| # Configuration de la sidebar pour l'entree utilisateur | |
| st.sidebar.header('Configuration de l’Évaluation') | |
| vLLM_endpoint = st.sidebar.text_input('vLLM Endpoint from RunPod') | |
| model_name = st.sidebar.text_input('Nom du Modèle (e.g., gpt-3.5-turbo)') | |
| num_samples = st.sidebar.number_input('Nombre d’Évaluations à Tester', min_value=1, step=1, value=2) | |
| BASE_URL= vLLM_endpoint+"v1" | |
| API_KEY="SOMEHOW" | |
| client = OpenAI( | |
| base_url=BASE_URL, | |
| api_key=API_KEY | |
| ) | |
| ############ HELPER FUNCTIONS ################################### | |
| #def extract_maitrise_value(text): | |
| # match = re.search(r"Maîtrise:\s*([A-Za-zé]+)", text) | |
| # if match: | |
| # return match.group(1) | |
| #return None | |
| def get_maitrise_value(text): | |
| # Adding the missing closing curly brace | |
| text += '}' | |
| # Using regular expressions to find the value of "Maitrise" | |
| match = re.search(r'"Maitrise":\s*"(.*?)"', text) | |
| # If a match is found, return the value | |
| if match: | |
| return match.group(1) | |
| else: | |
| return "No match found" | |
| def extract_maitrise_value(text): | |
| # Cette fonction prends les deux cas ou | |
| match = re.search(r'"?Maîtrise"?:\s*"?([A-Za-zé\s]+)"?', text) | |
| if match: | |
| return match.group(1) | |
| return None | |
| def extract_maitrise_real_value(json_text): | |
| try: | |
| # Charger le texte JSON en tant qu'objet Python | |
| data = json.loads(json_text) | |
| # Retourner la valeur associée à la clé "Maitrise" | |
| return data.get("Maitrise", None) | |
| except json.JSONDecodeError: | |
| # Gérer les erreurs de décodage JSON si nécessaire | |
| return None | |
| # Charger le dataset | |
| dataset_path = "instruction_dataset.csv" | |
| if st.sidebar.button('Lancer l’Évaluation') and model_name: | |
| dataset = pd.read_csv(dataset_path) | |
| prompts = dataset['Input'].head(num_samples * 14).tolist() | |
| outputs = dataset['Output'].head(num_samples * 14).tolist() | |
| items = dataset['Item'].head(num_samples * 14).tolist() | |
| phone_numbers = dataset['Phone Number'].head(num_samples * 14).tolist() | |
| results = [] | |
| # Initialisation de la barre de progression | |
| progress_text = "Operation in progress. Please wait..." | |
| progress_bar = st.progress(0, text=progress_text) | |
| total_evaluations = len(prompts) | |
| for idx, (prompt, output, item, phone_number) in enumerate(zip(prompts, outputs, items, phone_numbers)): | |
| response = client.completions.create( | |
| model=model_name, | |
| prompt=prompt, | |
| max_tokens=1024 | |
| #temperature=0 | |
| ) | |
| generated_text = response.choices[0].text | |
| #maitrise_generated_value = extract_maitrise_value(generated_text) | |
| maitrise_generated_value = get_maitrise_value(generated_text) | |
| maitrise_value = extract_maitrise_real_value(output) | |
| # Déterminer le statut | |
| status = "OK" if maitrise_generated_value == maitrise_value else "KO" | |
| print(generated_text) | |
| results.append({"Item": item, "Note LLM": maitrise_generated_value, "Note Certifiée": maitrise_value, "Status": status, "Phone Number": phone_number}) | |
| # Mise à jour de la barre de progression | |
| time.sleep(0.01) | |
| progress_bar.progress((idx + 1) / total_evaluations, text=progress_text) | |
| # Préparation des résultats pour l'affichage | |
| results_df = pd.DataFrame(results) | |
| time.sleep(1) | |
| progress_bar.empty() | |
| # Calculer la performance globale | |
| num_ok = results_df['Status'].value_counts().get('OK', 0) | |
| total_items = num_samples * 14 | |
| performance_globale = (num_ok / total_items) * 100 | |
| # Afficher la performance globale | |
| st.write('### Performance Globale') | |
| st.write(f'Performance Globale: {performance_globale:.2f}%') | |
| # Calculer la performance par item | |
| performance_par_item = results_df.groupby('Item').apply(lambda x: (x['Status'].value_counts().get('OK', 0) / len(x)) * 100).reset_index(name='Performance') | |
| # Afficher la performance par item | |
| st.write('### Performance Par Item en %') | |
| st.dataframe(performance_par_item) | |
| # Afficher le tableau | |
| st.write('### Résultats des Évaluations') | |
| st.dataframe(results_df) | |
| # Afficher une instruction si les champs nécessaires ne sont pas remplis | |
| if not model_name: | |
| st.error('Veuillez fournir un modele valide') |