Spaces:

eventdata-utd
/

ConfliBERT-QA

Sleeping

App Files Files Community

salsarra commited on Nov 10, 2024

Commit

12ecea8

verified ·

1 Parent(s): cf16b5d

Update app.py

Browse files

Files changed (1) hide show

app.py +98 -48

app.py CHANGED Viewed

@@ -22,15 +22,15 @@ bert_qa_model_v1 = TFAutoModelForQuestionAnswering.from_pretrained(bert_model_na
 bert_qa_tokenizer_v1 = AutoTokenizer.from_pretrained(bert_model_name_v1)
 # Load Spanish models and tokenizers
-confli_model_spanish = 'salsarra/ConfliBERT-Spanish-Beto-Cased-NewsQA'
-confli_model_spanish_qa = TFAutoModelForQuestionAnswering.from_pretrained(confli_model_spanish)
-confli_tokenizer_spanish = AutoTokenizer.from_pretrained(confli_model_spanish)
-beto_model_spanish = 'salsarra/Beto-Spanish-Cased-NewsQA'
-beto_model_spanish_qa = TFAutoModelForQuestionAnswering.from_pretrained(beto_model_spanish)
-beto_tokenizer_spanish = AutoTokenizer.from_pretrained(beto_model_spanish)
-# Load the newly added models for Spanish (Beto and ConfliBERT SQAC)
 confli_sqac_model_spanish = 'salsarra/ConfliBERT-Spanish-Beto-Cased-SQAC'
 confli_sqac_model_spanish_qa = TFAutoModelForQuestionAnswering.from_pretrained(confli_sqac_model_spanish)
 confli_sqac_tokenizer_spanish = AutoTokenizer.from_pretrained(confli_sqac_model_spanish)
@@ -56,7 +56,7 @@ def handle_error_message(e, default_limit=512):
     return f"<span style='color: red; font-weight: bold;'>Error: {error_message}</span>"
-# Define question_answering_v1 for ConfliBERT English
 def question_answering_v1(context, question):
     try:
         inputs = qa_tokenizer_v1(question, context, return_tensors='tf', truncation=True)
@@ -66,11 +66,11 @@ def question_answering_v1(context, question):
         answer = qa_tokenizer_v1.convert_tokens_to_string(
             qa_tokenizer_v1.convert_ids_to_tokens(inputs['input_ids'].numpy()[0][answer_start:answer_end])
         )
-        return f"<span style='color: green; font-weight: bold;'>{answer}</span>"
     except Exception as e:
         return handle_error_message(e)
-# Define bert_question_answering_v1 for BERT English
 def bert_question_answering_v1(context, question):
     try:
         inputs = bert_qa_tokenizer_v1(question, context, return_tensors='tf', truncation=True)
@@ -83,6 +83,83 @@ def bert_question_answering_v1(context, question):
         return f"<span style='font-weight: bold;'>{answer}</span>"
     except Exception as e:
         return handle_error_message(e)
 # Main comparison function with language selection
 def compare_question_answering(language, context, question):
     if language == "English":
@@ -94,19 +171,13 @@ def compare_question_answering(language, context, question):
             <h2 style='color: #2e8b57; font-weight: bold;'>Answers:</h2>
         </div><br>
         <div>
-            <strong>ConfliBERT-cont-cased-SQuAD-v1:</strong><br>{confli_answer_v1}</div><br>
         <div>
-            <strong>BERT-base-cased-SQuAD-v1:</strong><br>{bert_answer_v1}
         </div><br>
         <div>
-            <strong>ChatGPT:</strong><br>{chatgpt_answer}
         </div><br>
-        <div>
-            <strong>Model Information:</strong><br>
-            ConfliBERT-cont-cased-SQuAD-v1: <a href='https://huggingface.co/salsarra/ConfliBERT-QA' target='_blank'>salsarra/ConfliBERT-QA</a><br>
-            BERT-base-cased-SQuAD-v1: <a href='https://huggingface.co/salsarra/BERT-base-cased-SQuAD-v1' target='_blank'>salsarra/BERT-base-cased-SQuAD-v1</a><br>
-            ChatGPT (GPT-3.5 Turbo): <a href='https://platform.openai.com/docs/models/gpt-3-5' target='_blank'>OpenAI API</a><br>
-        </div>
         """
     elif language == "Spanish":
         confli_answer_spanish = question_answering_spanish(context, question)
@@ -119,30 +190,22 @@ def compare_question_answering(language, context, question):
             <h2 style='color: #2e8b57; font-weight: bold;'>Answers:</h2>
         </div><br>
         <div>
-            <strong>ConfliBERT-Spanish-Beto-Cased-NewsQA:</strong><br>{confli_answer_spanish}</div><br>
         <div>
-            <strong>Beto-Spanish-Cased-NewsQA:</strong><br>{beto_answer_spanish}
         </div><br>
         <div>
-            <strong>ConfliBERT-Spanish-Beto-Cased-SQAC:</strong><br>{confli_sqac_answer_spanish}
         </div><br>
         <div>
-            <strong>Beto-Spanish-Cased-SQAC:</strong><br>{beto_sqac_answer_spanish}
         </div><br>
         <div>
-            <strong>ChatGPT:</strong><br>{chatgpt_answer_spanish}
         </div><br>
-        <div>
-            <strong>Model Information:</strong><br>
-            ConfliBERT-Spanish-Beto-Cased-NewsQA: <a href='https://huggingface.co/salsarra/ConfliBERT-Spanish-Beto-Cased-NewsQA' target='_blank'>salsarra/ConfliBERT-Spanish-Beto-Cased-NewsQA</a><br>
-            Beto-Spanish-Cased-NewsQA: <a href='https://huggingface.co/salsarra/Beto-Spanish-Cased-NewsQA' target='_blank'>salsarra/Beto-Spanish-Cased-NewsQA</a><br>
-            ConfliBERT-Spanish-Beto-Cased-SQAC: <a href='https://huggingface.co/salsarra/ConfliBERT-Spanish-Beto-Cased-SQAC' target='_blank'>salsarra/ConfliBERT-Spanish-Beto-Cased-SQAC</a><br>
-            Beto-Spanish-Cased-SQAC: <a href='https://huggingface.co/salsarra/Beto-Spanish-Cased-SQAC' target='_blank'>salsarra/Beto-Spanish-Cased-SQAC</a><br>
-            ChatGPT (GPT-3.5 Turbo): <a href='https://platform.openai.com/docs/models/gpt-3-5' target='_blank'>OpenAI API</a><br>
-        </div>
         """
-# Setting up Gradio Blocks interface with footer
 with gr.Blocks(css="""
     body {
         background-color: #f0f8ff;
@@ -162,19 +225,6 @@ with gr.Blocks(css="""
         text-align: center;
         font-size: 1.5em;
     }
-    .gradio-container {
-        max-width: 100%;
-        margin: 10px auto;
-        padding: 10px;
-        background-color: #ffffff;
-        border-radius: 10px;
-        box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
-    }
-    .button-row {
-        display: flex;
-        justify-content: center;
-        gap: 10px;
-    }
 """) as demo:
     gr.Markdown("# [ConfliBERT-QA](https://eventdata.utdallas.edu/conflibert/)", elem_id="title")
@@ -185,7 +235,7 @@ with gr.Blocks(css="""
     question = gr.Textbox(lines=2, placeholder="Enter your question here...", label="Question")
     output = gr.HTML(label="Output")
-    with gr.Row(elem_id="button-row"):
         clear_btn = gr.Button("Clear")
         submit_btn = gr.Button("Submit")

 bert_qa_tokenizer_v1 = AutoTokenizer.from_pretrained(bert_model_name_v1)
 # Load Spanish models and tokenizers
+confli_model_spanish_name = 'salsarra/ConfliBERT-Spanish-Beto-Cased-NewsQA'
+confli_model_spanish = TFAutoModelForQuestionAnswering.from_pretrained(confli_model_spanish_name)
+confli_tokenizer_spanish = AutoTokenizer.from_pretrained(confli_model_spanish_name)
+beto_model_spanish_name = 'salsarra/Beto-Spanish-Cased-NewsQA'
+beto_model_spanish = TFAutoModelForQuestionAnswering.from_pretrained(beto_model_spanish_name)
+beto_tokenizer_spanish = AutoTokenizer.from_pretrained(beto_model_spanish_name)
+# Load the additional Spanish models
 confli_sqac_model_spanish = 'salsarra/ConfliBERT-Spanish-Beto-Cased-SQAC'
 confli_sqac_model_spanish_qa = TFAutoModelForQuestionAnswering.from_pretrained(confli_sqac_model_spanish)
 confli_sqac_tokenizer_spanish = AutoTokenizer.from_pretrained(confli_sqac_model_spanish)
     return f"<span style='color: red; font-weight: bold;'>Error: {error_message}</span>"
+# Define question_answering_v1 for ConfliBERT English with truncation=True
 def question_answering_v1(context, question):
     try:
         inputs = qa_tokenizer_v1(question, context, return_tensors='tf', truncation=True)
         answer = qa_tokenizer_v1.convert_tokens_to_string(
             qa_tokenizer_v1.convert_ids_to_tokens(inputs['input_ids'].numpy()[0][answer_start:answer_end])
         )
+        return f"<span style='font-weight: bold;'>{answer}</span>"
     except Exception as e:
         return handle_error_message(e)
+# Define bert_question_answering_v1 for BERT English with truncation=True
 def bert_question_answering_v1(context, question):
     try:
         inputs = bert_qa_tokenizer_v1(question, context, return_tensors='tf', truncation=True)
         return f"<span style='font-weight: bold;'>{answer}</span>"
     except Exception as e:
         return handle_error_message(e)
+# Define question_answering_spanish for ConfliBERT-Spanish-Beto-Cased-NewsQA
+def question_answering_spanish(context, question):
+    try:
+        inputs = confli_tokenizer_spanish(question, context, return_tensors='tf', truncation=True)
+        outputs = confli_model_spanish(inputs)
+        answer_start = tf.argmax(outputs.start_logits, axis=1).numpy()[0]
+        answer_end = tf.argmax(outputs.end_logits, axis=1).numpy()[0] + 1
+        answer = confli_tokenizer_spanish.convert_tokens_to_string(
+            confli_tokenizer_spanish.convert_ids_to_tokens(inputs['input_ids'].numpy()[0][answer_start:answer_end])
+        )
+        return f"<span style='font-weight: bold;'>{answer}</span>"
+    except Exception as e:
+        return handle_error_message(e)
+# Define beto_question_answering_spanish for Beto-Spanish-Cased-NewsQA
+def beto_question_answering_spanish(context, question):
+    try:
+        inputs = beto_tokenizer_spanish(question, context, return_tensors='tf', truncation=True)
+        outputs = beto_model_spanish(inputs)
+        answer_start = tf.argmax(outputs.start_logits, axis=1).numpy()[0]
+        answer_end = tf.argmax(outputs.end_logits, axis=1).numpy()[0] + 1
+        answer = beto_tokenizer_spanish.convert_tokens_to_string(
+            beto_tokenizer_spanish.convert_ids_to_tokens(inputs['input_ids'].numpy()[0][answer_start:answer_end])
+        )
+        return f"<span style='font-weight: bold;'>{answer}</span>"
+    except Exception as e:
+        return handle_error_message(e)
+# Define confli_sqac_question_answering_spanish for ConfliBERT-Spanish-Beto-Cased-SQAC
+def confli_sqac_question_answering_spanish(context, question):
+    inputs = confli_sqac_tokenizer_spanish.encode_plus(question, context, return_tensors="tf", truncation=True)
+    outputs = confli_sqac_model_spanish_qa(inputs)
+    answer_start = tf.argmax(outputs.start_logits, axis=1).numpy()[0]
+    answer_end = tf.argmax(outputs.end_logits, axis=1).numpy()[0] + 1
+    answer = confli_sqac_tokenizer_spanish.convert_tokens_to_string(
+        confli_sqac_tokenizer_spanish.convert_ids_to_tokens(inputs['input_ids'].numpy()[0][answer_start:answer_end])
+    )
+    return f"<span style='font-weight: bold;'>{answer}</span>"
+# Define beto_sqac_question_answering_spanish for Beto-Spanish-Cased-SQAC
+def beto_sqac_question_answering_spanish(context, question):
+    inputs = beto_sqac_tokenizer_spanish.encode_plus(question, context, return_tensors="tf", truncation=True)
+    outputs = beto_sqac_model_spanish_qa(inputs)
+    answer_start = tf.argmax(outputs.start_logits, axis=1).numpy()[0]
+    answer_end = tf.argmax(outputs.end_logits, axis=1).numpy()[0] + 1
+    answer = beto_sqac_tokenizer_spanish.convert_tokens_to_string(
+        beto_sqac_tokenizer_spanish.convert_ids_to_tokens(inputs['input_ids'].numpy()[0][answer_start:answer_end])
+    )
+    return f"<span style='font-weight: bold;'>{answer}</span>"
+# Define a function to get ChatGPT's answer in English
+def chatgpt_question_answering(context, question):
+    prompt = f"Context: {context}\nQuestion: {question}\nAnswer:"
+    response = openai.ChatCompletion.create(
+        model="gpt-3.5-turbo",
+        messages=[
+            {"role": "system", "content": "You are a helpful assistant."},
+            {"role": "user", "content": prompt}
+        ],
+        max_tokens=150
+    )
+    return response['choices'][0]['message']['content'].strip()
+# Define a function to get ChatGPT's answer in Spanish
+def chatgpt_question_answering_spanish(context, question):
+    prompt = f"Contexto: {context}\nPregunta: {question}\nRespuesta:"
+    response = openai.ChatCompletion.create(
+        model="gpt-3.5-turbo",
+        messages=[
+            {"role": "system", "content": "You are a helpful assistant that responds in Spanish."},
+            {"role": "user", "content": prompt}
+        ],
+        max_tokens=150
+    )
+    return response['choices'][0]['message']['content'].strip()
 # Main comparison function with language selection
 def compare_question_answering(language, context, question):
     if language == "English":
             <h2 style='color: #2e8b57; font-weight: bold;'>Answers:</h2>
         </div><br>
         <div>
+            <strong style='color: green; font-weight: bold;'>ConfliBERT-cont-cased-SQuAD-v1:</strong><br><span style='font-weight: bold;'>{confli_answer_v1}</span></div><br>
         <div>
+            <strong style='color: orange; font-weight: bold;'>BERT-base-cased-SQuAD-v1:</strong><br><span style='font-weight: bold;'>{bert_answer_v1}</span>
         </div><br>
         <div>
+            <strong style='color: #74AA9C; font-weight: bold;'>ChatGPT:</strong><br><span style='font-weight: bold;'>{chatgpt_answer}</span>
         </div><br>
         """
     elif language == "Spanish":
         confli_answer_spanish = question_answering_spanish(context, question)
             <h2 style='color: #2e8b57; font-weight: bold;'>Answers:</h2>
         </div><br>
         <div>
+            <strong style='color: green; font-weight: bold;'>ConfliBERT-Spanish-Beto-Cased-NewsQA:</strong><br><span style='font-weight: bold;'>{confli_answer_spanish}</span></div><br>
         <div>
+            <strong style='color: orange; font-weight: bold;'>Beto-Spanish-Cased-NewsQA:</strong><br><span style='font-weight: bold;'>{beto_answer_spanish}</span>
         </div><br>
         <div>
+            <strong style='color: green; font-weight: bold;'>ConfliBERT-Spanish-Beto-Cased-SQAC:</strong><br><span style='font-weight: bold;'>{confli_sqac_answer_spanish}</span>
         </div><br>
         <div>
+            <strong style='color: orange; font-weight: bold;'>Beto-Spanish-Cased-SQAC:</strong><br><span style='font-weight: bold;'>{beto_sqac_answer_spanish}</span>
         </div><br>
         <div>
+            <strong style='color: #74AA9C; font-weight: bold;'>ChatGPT:</strong><br><span style='font-weight: bold;'>{chatgpt_answer_spanish}
         </div><br>
         """
+# Gradio interface setup
 with gr.Blocks(css="""
     body {
         background-color: #f0f8ff;
         text-align: center;
         font-size: 1.5em;
     }
 """) as demo:
     gr.Markdown("# [ConfliBERT-QA](https://eventdata.utdallas.edu/conflibert/)", elem_id="title")
     question = gr.Textbox(lines=2, placeholder="Enter your question here...", label="Question")
     output = gr.HTML(label="Output")
+    with gr.Row():
         clear_btn = gr.Button("Clear")
         submit_btn = gr.Button("Submit")