Spaces:

woters
/

unlp

Sleeping

App Files Files Community

woters commited on Feb 27, 2024

Commit

1acb2f0

1 Parent(s): 85a2ba5

update

Browse files

Files changed (2) hide show

app.py +115 -25
requirements.txt +2 -1

app.py CHANGED Viewed

@@ -4,6 +4,8 @@ import random
 import firebase_admin
 from firebase_admin import credentials
 from firebase_admin import firestore
 CSV_FILE_PATH = "qa_pairs.csv"
@@ -29,8 +31,8 @@ def fetch_questions():
     return questions_list
-def display_answers(question, model1, model2):
-    df = pd.read_csv(CSV_FILE_PATH)
     answers = {
         model1: "No answer available for Model 1",
         model2: "No answer available for Model 2",
@@ -38,7 +40,7 @@ def display_answers(question, model1, model2):
     for model in [model1, model2]:
         filtered_df = df[(df['question'] == question) & (df['model'] == model)]
         if not filtered_df.empty:
-            answers[model] = f"**{model} Answer:**\n{filtered_df['answer'].iloc[0]}"
     return answers[model1], answers[model2]
@@ -65,11 +67,23 @@ def update_symbols1(q,m1,a1,m2,a2):
     )
     votes_ref = db.collection('votes')
     vote_doc = votes_ref.document(m1).get()
     if vote_doc.exists:
-        votes_ref.document(m1).update({'count': firestore.Increment(1)})
     else:
-        votes_ref.document(m1).set({'count': 1})
-    update_total_votes()
     return update_symbols(q, m1, a1, m2, a2)
@@ -83,7 +97,7 @@ def update_symbols2(q, m1, a1, m2, a2):
         output2=a2,
         outcome='tie'
     )
-    update_total_votes()
     return update_symbols(q, m1, a1, m2, a2)
 def update_symbols3(q, m1, a1, m2, a2):
@@ -98,17 +112,29 @@ def update_symbols3(q, m1, a1, m2, a2):
     )
     votes_ref = db.collection('votes')
     vote_doc = votes_ref.document(m2).get()
     if vote_doc.exists:
-        votes_ref.document(m2).update({'count': firestore.Increment(1)})
     else:
-        votes_ref.document(m2).set({'count': 1})
-    update_total_votes()
     return update_symbols(q, m1, a1, m2, a2)
 def update_symbols(q,m1,a1,m2,a2):
     random_question = random.choice(questions)
     random_model1, random_model2 = random.sample(models, 2)
-    answer1, answer2 = display_answers(random_question, random_model1, random_model2)
     m1 = gr.Markdown(f"{random_model1}", visible=False)
     a1 = gr.Markdown(answer1)
     q = gr.Markdown(f"{random_question}")
@@ -145,29 +171,92 @@ def log_vote(model1, model2, question, output1, output2, outcome):
 def fetch_and_format_leaderboard():
     vote_counts_ref = db.collection('votes')
-    # Ensure you're using FieldPath.document_id() correctly
     docs = vote_counts_ref.stream()
     leaderboard = []
     for doc in docs:
         model_name = doc.id
-        vote_count = doc.to_dict().get('count', 0)
-        leaderboard.append(f"{model_name}: {vote_count} votes")
-    # Optional: Sort the leaderboard by vote count in descending order
-    leaderboard.sort(key=lambda x: int(x.split(': ')[1].split(' ')[0]), reverse=True)
-    return "\n".join(leaderboard)
 #questions = list_questions()
-models = list_models()
-random_question = 'Click any button to start!'
-random_model1, random_model2 = '1', '2'
-answer1, answer2 = display_answers(random_question, random_model1, random_model2)
 db = firestore.client()
 questions = []
 questions_ = fetch_questions()
 for question in questions_:
@@ -175,6 +264,7 @@ for question in questions_:
 votes_ref = db.collection('votes')
 def create_app():
     print('-----------------------')
@@ -204,8 +294,8 @@ def create_app():
         #    b5 = gr.Button("Show Leaderboard")
         initial_leaderboard_data = fetch_and_format_leaderboard()
-        leaderboard_display = gr.Textbox(value=initial_leaderboard_data,label="Leaderboard", placeholder="Leaderboard will be displayed here.",
-                                              lines=30, visible=True)
         #b5.click(fn=fetch_and_format_leaderboard, inputs=[], outputs=leaderboard_display)
         b4.click(update_b, inputs=[q,m1,a1,m2,a2], outputs=[q,m1,a1,m2,a2,b1,b2,b3, b4])

 import firebase_admin
 from firebase_admin import credentials
 from firebase_admin import firestore
+from trueskill import Rating
+import trueskill
 CSV_FILE_PATH = "qa_pairs.csv"
     return questions_list
+def display_answers(question, model1, model2, df):
+    #df = pd.read_csv(CSV_FILE_PATH)
     answers = {
         model1: "No answer available for Model 1",
         model2: "No answer available for Model 2",
     for model in [model1, model2]:
         filtered_df = df[(df['question'] == question) & (df['model'] == model)]
         if not filtered_df.empty:
+            answers[model] = f"**Answer:**\n{filtered_df['answer'].iloc[0]}"
     return answers[model1], answers[model2]
     )
     votes_ref = db.collection('votes')
     vote_doc = votes_ref.document(m1).get()
+    elo_count_1 = vote_doc.get('elo_rating')
+    elo1 = Rating(elo_count_1)
+    if vote_doc.exists:
+        votes_ref.document(m1).update({'win_count': firestore.Increment(1)})
+    else:
+        votes_ref.document(m1).set({'win_count': 1})
+    vote_doc = votes_ref.document(m2).get()
+    elo_count_2 = vote_doc.get('elo_rating')
+    elo2 = Rating(elo_count_2)
+    elo1, elo2 = trueskill.rate_1vs1(elo1, elo2)
+    votes_ref.document(m2).update({'elo_rating': elo2.mu})
+    votes_ref.document(m1).update({'elo_rating': elo1.mu})
     if vote_doc.exists:
+        votes_ref.document(m2).update({'loss_count': firestore.Increment(1)})
     else:
+        votes_ref.document(m2).set({'loss_count': 1})
     return update_symbols(q, m1, a1, m2, a2)
         output2=a2,
         outcome='tie'
     )
+    #update_total_votes()
     return update_symbols(q, m1, a1, m2, a2)
 def update_symbols3(q, m1, a1, m2, a2):
     )
     votes_ref = db.collection('votes')
     vote_doc = votes_ref.document(m2).get()
+    elo_count_2 = vote_doc.get('elo_rating')
+    elo2 = Rating(elo_count_2)
+    if vote_doc.exists:
+        votes_ref.document(m2).update({'win_count': firestore.Increment(1)})
+    else:
+        votes_ref.document(m2).set({'win_count': 1})
+    vote_doc = votes_ref.document(m1).get()
+    elo_count_1 = vote_doc.get('elo_rating')
+    elo1 = Rating(elo_count_1)
+    elo1, elo2 = trueskill.rate_1vs1(elo2, elo1)
+    votes_ref.document(m2).update({'elo_rating': elo2.mu})
+    votes_ref.document(m1).update({'elo_rating': elo1.mu})
     if vote_doc.exists:
+        votes_ref.document(m1).update({'loss_count': firestore.Increment(1)})
     else:
+        votes_ref.document(m1).set({'loss_count': 1})
+    #update_total_votes()
     return update_symbols(q, m1, a1, m2, a2)
 def update_symbols(q,m1,a1,m2,a2):
     random_question = random.choice(questions)
     random_model1, random_model2 = random.sample(models, 2)
+    answer1, answer2 = display_answers(random_question, random_model1, random_model2, combined_df)
     m1 = gr.Markdown(f"{random_model1}", visible=False)
     a1 = gr.Markdown(answer1)
     q = gr.Markdown(f"{random_question}")
 def fetch_and_format_leaderboard():
     vote_counts_ref = db.collection('votes')
     docs = vote_counts_ref.stream()
     leaderboard = []
     for doc in docs:
+        model_data = doc.to_dict()
         model_name = doc.id
+        win_count = model_data.get('win_count', 0)
+        loss_count = model_data.get('loss_count', 0)
+        total_matches = win_count + loss_count
+        win_rate = (win_count / total_matches) * 100 if total_matches > 0 else 0
+        elo_rating = model_data.get('elo_rating', 0)
+        leaderboard.append({
+            "model": model_name,
+            "win_rate": win_rate,
+            "elo_rating": elo_rating
+        })
+    # Sort the leaderboard by elo_rating in descending order
+    leaderboard.sort(key=lambda x: x['win_rate'], reverse=True)
+    leaderboard_df = pd.DataFrame(leaderboard)
+    leaderboard_df['Rank'] = leaderboard_df['win_rate'].rank(method='max', ascending=False).astype(int)
+    # Reorder columns to match your requirement
+    leaderboard_df = leaderboard_df[['Rank', 'model', 'win_rate', 'elo_rating'
+                                     ]]
+    # Format the DataFrame as a string for display; you might adjust this part based on how Gradio expects the data
+    # For Gradio, you might directly return the DataFrame instead of converting it to a string
+    return leaderboard_df
 #questions = list_questions()
 db = firestore.client()
+def fetch_questions_c(collection):
+    questions_ref = db.collection(collection)
+    docs = questions_ref.stream()
+    questions_list = []
+    for doc in docs:
+        question = doc.to_dict()
+        questions_list.append(question)
+    return questions_list
+codekobzar = fetch_questions_c('codekobzar')
+gpt = fetch_questions_c('gpt-4')
+llama  = fetch_questions_c('llama-2-70b-chat')
+sherlocknorag  = fetch_questions_c('sherlock-no-rag')
+sherlockrag  = fetch_questions_c('sherlock-rag')
+ukrainenow  = fetch_questions_c('ukrainenow')
+df1 = pd.DataFrame(codekobzar)
+df2 = pd.DataFrame(gpt)
+df3 = pd.DataFrame(llama)
+df4 = pd.DataFrame(sherlocknorag)
+df5 = pd.DataFrame(sherlockrag)
+df6 = pd.DataFrame(ukrainenow)
+df1['model'] = 'codekobzar'
+df2['model'] = 'gpt-4'
+df3['model'] = 'llama-2-70b-chat'
+df4['model'] = 'sherlock-no-rag'
+df5['model'] = 'sherlock-rag'
+df6['model'] = 'ukrainenow'
+combined_df = pd.concat([df1, df2, df3, df4, df5, df6], ignore_index=True)
+combined_df.drop('input',axis=1,inplace=True)
+combined_df.rename(columns={'instruction': 'question', 'output': 'answer'}, inplace=True)
+models = ['codekobzar','gpt-4','llama-2-70b-chat','sherlock-no-rag','sherlock-rag','ukrainenow']#list_models()
+votes_ref = db.collection('votes')
+for model in models:
+    vote_doc = votes_ref.document(model).get()
+    votes_ref.document(model).set({'win_count': 0})
+    votes_ref.document(model).set({'loss_count': 0})
+    votes_ref.document(model).set({'elo_rating': 25})
+random_question = 'Click any button to start!'
+random_model1, random_model2 = '1', '2'
+answer1, answer2 = display_answers(random_question, random_model1, random_model2,combined_df)
 questions = []
 questions_ = fetch_questions()
 for question in questions_:
 votes_ref = db.collection('votes')
 def create_app():
     print('-----------------------')
         #    b5 = gr.Button("Show Leaderboard")
         initial_leaderboard_data = fetch_and_format_leaderboard()
+        #leaderboard_display = gr.Textbox(value=initial_leaderboard_data,label="Leaderboard", placeholder="Leaderboard will be displayed here.",lines=30, visible=True)
+        leaderboard_display = gr.Dataframe(value=initial_leaderboard_data, label="Leaderboard")
         #b5.click(fn=fetch_and_format_leaderboard, inputs=[], outputs=leaderboard_display)
         b4.click(update_b, inputs=[q,m1,a1,m2,a2], outputs=[q,m1,a1,m2,a2,b1,b2,b3, b4])

requirements.txt CHANGED Viewed

@@ -1,2 +1,3 @@
 firebase-admin
-pandas

 firebase-admin
+pandas
+trueskill