Spaces:

harishkumarkotte
/

QandA_ChatBot_with_AccuracyChecking

Sleeping

App Files Files Community

harishkumarkotte commited on Nov 5, 2024

Commit

089f363

verified ·

1 Parent(s): 7f9cd5f

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -38

app.py CHANGED Viewed

@@ -1,16 +1,8 @@
 import gradio as gr
 from transformers import pipeline , AutoTokenizer ,AutoModelForQuestionAnswering
-import string , re ,nltk
 from collections import Counter
-from nltk.corpus import stopwords
-from nltk.stem import WordNetLemmatizer
-lemmatizer = WordNetLemmatizer()
-nltk.download('stopwords')
-nltk.download('wordnet')
-stop_words = set(stopwords.words('english'))
 # Path to your custom-trained model
 model_path = "model/customTrained_Distilbert_Squad"
@@ -42,38 +34,41 @@ def normalize_answer(s):
 def exact_match_score(prediction, ground_truth):
     return normalize_answer(prediction) == normalize_answer(ground_truth)
-'''
-Return the F1 score, precision, and recall of the candidate answer given the reference answer
-'''
-def f1_score_with_precision_recall(reference, candidate):
-    # Split the strings into sets of words
-    reference = lemmatizer.lemmatize(normalize_answer(str(reference)))
-    candidate = lemmatizer.lemmatize(normalize_answer(str(candidate)))
-    words_reference = set(reference.split())
-    words_candidate = set(candidate.split())
-    # Calculate true positives, false positives, and false negatives
-    tp = len(words_reference.intersection(words_candidate))
-    fp = len(words_reference - words_candidate)
-    fn = len(words_candidate - words_reference)
-    # Calculate precision and recall
-    precision = tp / (tp + fp) if (tp + fp) > 0 else 0
-    recall = tp / (tp + fn) if (tp + fn) > 0 else 0
-    # Calculate F1 score
-    f1_score = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
-    return {'f1': f1_score, 'precision': precision, 'recall': recall}
-'''
-Return the F1 score of the candidate answer given the reference answer
-'''
-def f1_score(reference, candidate):
-    f1_stats = f1_score_with_precision_recall(reference, candidate)
-    return f1_stats['f1']
-def result(context,question,goldAnswer=""):
   # Perform question-answering
   predicted_result = qa_pipeline({
       'question': question,
@@ -88,11 +83,11 @@ def result(context,question,goldAnswer=""):
   predicted_answer = predicted_result['answer']
   # Compute Exact Match and F1 Score
   em_score = exact_match_score(predicted_answer, ground_truth)
-  f1 = f1_score(predicted_answer, predicted_answer)
   return(f"'Answer': {predicted_result['answer']}"),(f"'Machine Answer': {predicted_result['answer']}"+"   Vs  'Human Answer':"+ground_truth), (f"Exact Match: {em_score}"), (f"F1 Score: {f1}")
 demo = gr.Interface(
-    fn=result,
     inputs=["text", "text","text"],
     outputs=["text","text","text","text"],

 import gradio as gr
 from transformers import pipeline , AutoTokenizer ,AutoModelForQuestionAnswering
+import string
+import re
 from collections import Counter
 # Path to your custom-trained model
 model_path = "model/customTrained_Distilbert_Squad"
 def exact_match_score(prediction, ground_truth):
     return normalize_answer(prediction) == normalize_answer(ground_truth)
+def f1_score(prediction, ground_truth):
+    pred_tokens = normalize_answer(prediction).split()
+    truth_tokens = normalize_answer(ground_truth).split()
+    common_tokens = Counter(pred_tokens) & Counter(truth_tokens)
+    num_common = sum(common_tokens.values())
+    if num_common == 0:
+        return 0.0
+    precision = num_common / len(pred_tokens)
+    recall = num_common / len(truth_tokens)
+    f1 = 2 * (precision * recall) / (precision + recall)
+    return f1
+def EM_ScoreF1(context,question,goldAnswer=""):
+  # Perform question-answering
+  predicted_result = qa_pipeline({
+      'question': question,
+      'context': context
+  })
+  # Ground truth (the correct answer)
+  if goldAnswer=="":
+    ground_truth = "Answer Unavailable"
+  else:
+    ground_truth = goldAnswer
+  # Get the predicted answer
+  predicted_answer = predicted_result['answer']
+  # Compute Exact Match and F1 Score
+  em_score = exact_match_score(predicted_answer, ground_truth)
+  f1 = f1_score(predicted_answer, ground_truth)
+  return(f"Machine Answer: {predicted_result['answer']}"+" Vs 'Human Answer':"+ground_truth), (f"Exact Match: {em_score}"), (f"F1 Score: {f1}")
+def EM_ScoreF1(context,question,goldAnswer=""):
   # Perform question-answering
   predicted_result = qa_pipeline({
       'question': question,
   predicted_answer = predicted_result['answer']
   # Compute Exact Match and F1 Score
   em_score = exact_match_score(predicted_answer, ground_truth)
+  f1 = f1_score(predicted_answer, ground_truth)
   return(f"'Answer': {predicted_result['answer']}"),(f"'Machine Answer': {predicted_result['answer']}"+"   Vs  'Human Answer':"+ground_truth), (f"Exact Match: {em_score}"), (f"F1 Score: {f1}")
 demo = gr.Interface(
+    fn=EM_ScoreF1,
     inputs=["text", "text","text"],
     outputs=["text","text","text","text"],