Spaces:

arssite
/

Examine_Docs

Build error

App Files Files Community

arssite commited on Oct 8, 2024

Commit

b5be996

verified ·

1 Parent(s): 12b2ca3

Update app.py

Browse files

Files changed (1) hide show

app.py +61 -62

app.py CHANGED Viewed

@@ -1,62 +1,61 @@
-import torch
-import gradio as gr
-from transformers import pipeline
-import fitz  # PyMuPDF for PDF handling
-from docx import Document  # For handling .docx files
-model_path = ("../Models/models--deepset--roberta-base-squad2/snapshots"
-              "/cbf50ba81465d4d8676b8bab348e31835147541b")
-question_answer = pipeline("question-answering",
-                           model="deepset/roberta-base-squad2")
-def read_file_content(file_obj):
-    try:
-        # Determine the file extension
-        file_extension = file_obj.name.split('.')[-1].lower()
-        if file_extension == 'txt':
-            # Reading text files
-            with open(file_obj.name, 'r', encoding='utf-8') as file:
-                context = file.read()
-        elif file_extension == 'pdf':
-            # Reading PDF files using PyMuPDF (fitz)
-            with fitz.open(file_obj.name) as pdf:
-                context = ""
-                for page_num in range(pdf.page_count):
-                    page = pdf.load_page(page_num)
-                    context += page.get_text()
-        elif file_extension == 'docx':
-            # Reading Word documents using python-docx
-            doc = Document(file_obj.name)
-            context = "\n".join([para.text for para in doc.paragraphs])
-        else:
-            return "Unsupported file format. Please upload a .txt, .pdf, or .docx file."
-        return context
-    except Exception as e:
-        return f"An error occurred: {e}"
-def get_answer(file, question):
-    context = read_file_content(file)
-    if "An error occurred" in context or "Unsupported" in context:
-        return context  # Return error message directly if present
-    answer = question_answer(question=question, context=context)
-    return answer["answer"]
-demo = gr.Interface(
-    fn=get_answer,
-    inputs=[
-        gr.File(label="Upload your file"),
-        gr.Textbox(label="Input your question", lines=1)
-    ],
-    outputs=[gr.Textbox(label="Answer text", lines=1)],
-    title="Document Q & A",
-    description="THIS APPLICATION WILL BE USED TO ANSWER QUESTIONS BASED ON CONTEXT PROVIDED."
-)
-demo.launch()

+import torch
+import gradio as gr
+from transformers import pipeline
+from PyPDF2 import PdfReader  # Alternative for PDF handling
+from docx import Document  # For handling .docx files
+model_path = ("../Models/models--deepset--roberta-base-squad2/snapshots"
+              "/cbf50ba81465d4d8676b8bab348e31835147541b")
+question_answer = pipeline("question-answering",
+                           model="deepset/roberta-base-squad2")
+def read_file_content(file_obj):
+    try:
+        # Determine the file extension
+        file_extension = file_obj.name.split('.')[-1].lower()
+        if file_extension == 'txt':
+            # Reading text files
+            with open(file_obj.name, 'r', encoding='utf-8') as file:
+                context = file.read()
+        elif file_extension == 'pdf':
+            # Reading PDF files using PyPDF2
+            reader = PdfReader(file_obj.name)
+            context = ""
+            for page in reader.pages:
+                context += page.extract_text()
+        elif file_extension == 'docx':
+            # Reading Word documents using python-docx
+            doc = Document(file_obj.name)
+            context = "\n".join([para.text for para in doc.paragraphs])
+        else:
+            return "Unsupported file format. Please upload a .txt, .pdf, or .docx file."
+        return context
+    except Exception as e:
+        return f"An error occurred: {e}"
+def get_answer(file, question):
+    context = read_file_content(file)
+    if "An error occurred" in context or "Unsupported" in context:
+        return context  # Return error message directly if present
+    answer = question_answer(question=question, context=context)
+    return answer["answer"]
+demo = gr.Interface(
+    fn=get_answer,
+    inputs=[
+        gr.File(label="Upload your file"),
+        gr.Textbox(label="Input your question", lines=1)
+    ],
+    outputs=[gr.Textbox(label="Answer text", lines=1)],
+    title="Explore Documents",
+    description="THIS APPLICATION WILL BE USED TO ANSWER QUESTIONS BASED ON CONTEXT PROVIDED."
+)
+demo.launch()