Spaces:

VGG11
/

armenian_chatbot_bert_multilingual

Runtime error

App Files Files Community

Mary12 commited on Aug 10, 2023

Commit

1844fcc

1 Parent(s): 8dd5245

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -74

app.py CHANGED Viewed

@@ -4,26 +4,14 @@ from pypdf import PdfReader
 from transformers import AutoTokenizer, AutoModelForQuestionAnswering, pipeline
-# def remove_references(text):
-#     text = re.sub(r'\[\d+\]', '', text) ##[ref]
-#     text = re.sub(r'\[https?://[^\[\]]+\s[^\[\]]+\]', '', text) ##hyperlink with text
-#     text = re.sub(r'\[https?://[^\[\]]+\]', '', text) ##just the hyperlink
-#     # text = html.unescape(text)
-#     text = re.sub(r'\s+', ' ', text).strip() ##clear out the white spaces
-#     return text
 def remove_references(text):
-    patterns = [
-        r'\[\d+\]',
-        r'\[https?://[^\[\]]+\s[^\[\]]+\]',
-        r'\[https?://[^\[\]]+\]'
-    ]
-    combined_pattern = '|'.join(patterns)
-    text = re.sub(combined_pattern, '', text)
-    text = re.sub(r'\s+', ' ', text).strip()
     return text
 def extract_text_from_pdf(file_path):
     text = ""
@@ -32,73 +20,43 @@ def extract_text_from_pdf(file_path):
         text += page.extract_text() + "\n"
     return text
-def get_answer(pipe, question, context):
-    result = pipe(question=question, context=context)
-    answered = result['answer']
-    return remove_references(answered)
-def handle_missing_input(context, question):
-    if not context and not question:
-        return "Որպեսզի ..."
-    if not context:
-        return "Ես չեմ կարողանամ ..."
-    if not question:
-        return "Ես չեմ կարողանամ ..."
-    return None
-def clean_and_capitalize(text):
-    text = text.replace('(', '', 1)
-    text = text.replace(',', '', len(text)-1)
-    return text.capitalize()
 def qa_result(context, question, file):
     model_name = "timpal0l/mdeberta-v3-base-squad2"
     pipe = model(model_name)
-    error_message = handle_missing_input(context, question)
-    if error_message:
-        return error_message
     if file is not None:
         context = extract_text_from_pdf(file.name)
-    text = get_answer(pipe, question, context)
-    return clean_and_capitalize(text)
-# def model(model_name):
-#   tokenizer = AutoTokenizer.from_pretrained(model_name)
-#   model = AutoModelForQuestionAnswering.from_pretrained(model_name,return_dict = False)
-#   model_pipeline = pipeline(
-#     "question-answering",
-#     model = model,
-#     tokenizer = tokenizer
-#     )
-#   return model_pipeline
-# def qa_result(context, question, file):
-#     model_name = "timpal0l/mdeberta-v3-base-squad2"
-#     pipe = model(model_name)
-#     if file is not None:
-#         context = extract_text_from_pdf(file.name)
-#         result = pipe(question=question, context=context)
-#         answered = result['answer']
-#         text = remove_references(answered)
-#     else:
-#         if len(context) == 0 and len(question) == 0:
-#             text = "Որպեսզի ես կարողանամ քեզ օգնել, ինձ պիտի տրամադրես համապատասխան տեքստն ու հարցերը։"
-#         elif len(context) == 0:
-#             text = "Ես չեմ կարողանամ քեզ օգնել եթե ինձ չտրամադրես տեքստը"
-#         elif len(question) == 0:
-#             text = "Ես չեմ կարողանամ քեզ օգնել եթե ինձ չտաս հարցդ"
-#         else:
-#             result = pipe(question=question, context=context)
-#             answered = result['answer']
-#             text = remove_references(answered)
-#     text = text.replace('(', '', 1)
-#     text = text.replace(',', '', len(text)-1)
-#     return text.capitalize()
 theme = gr.themes.Soft().set(
     body_background_fill='*background_fill_secondary',

 from transformers import AutoTokenizer, AutoModelForQuestionAnswering, pipeline
 def remove_references(text):
+    text = re.sub(r'\[\d+\]', '', text) ##[ref]
+    text = re.sub(r'\[https?://[^\[\]]+\s[^\[\]]+\]', '', text) ##hyperlink with text
+    text = re.sub(r'\[https?://[^\[\]]+\]', '', text) ##just the hyperlink
+    # text = html.unescape(text)
+    text = re.sub(r'\s+', ' ', text).strip() ##clear out the white spaces
     return text
 def extract_text_from_pdf(file_path):
     text = ""
         text += page.extract_text() + "\n"
     return text
+def model(model_name):
+  tokenizer = AutoTokenizer.from_pretrained(model_name)
+  model = AutoModelForQuestionAnswering.from_pretrained(model_name,return_dict = False)
+  model_pipeline = pipeline(
+    "question-answering",
+    model = model,
+    tokenizer = tokenizer
+    )
+  return model_pipeline
 def qa_result(context, question, file):
     model_name = "timpal0l/mdeberta-v3-base-squad2"
     pipe = model(model_name)
     if file is not None:
         context = extract_text_from_pdf(file.name)
+        result = pipe(question=question, context=context)
+        answered = result['answer']
+        text = remove_references(answered)
+    else:
+        if len(context) == 0 and len(question) == 0:
+            text = "Որպեսզի ես կարողանամ քեզ օգնել, ինձ պիտի տրամադրես համապատասխան տեքստն ու հարցերը։"
+        elif len(context) == 0:
+            text = "Ես չեմ կարողանամ քեզ օգնել եթե ինձ չտրամադրես տեքստը"
+        elif len(question) == 0:
+            text = "Ես չեմ կարողանամ քեզ օգնել եթե ինձ չտաս հարցդ"
+        else:
+            result = pipe(question=question, context=context)
+            answered = result['answer']
+            text = remove_references(answered)
+    text = text.replace('(', '', 1)
+    text = text.replace(',', '', len(text)-1)
+    return text.capitalize()
 theme = gr.themes.Soft().set(
     body_background_fill='*background_fill_secondary',