Spaces:

Shankarm08
/

pdfreader

Sleeping

Shankarm08 commited on Oct 5, 2024

Commit

ceb87d2

verified ·

1 Parent(s): 0e6176c

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -3,6 +3,7 @@ import torch
 from transformers import BertTokenizer, BertModel
 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
 app = FastAPI()
@@ -25,28 +26,44 @@ async def classify_text(request: TextClassificationRequest):
         return_tensors='pt'
     )
-    # Create a dictionary to store the output
-    output = {}
     # Use the pre-trained BERT model to extract features from the input text
     outputs = model(**inputs)
     # Extract the features
     features = outputs.last_hidden_state[:, 0, :]
-    # Store the output
-    output["features"] = features.tolist()
     return output
 # Create a Gradio interface
 interface = gr.Interface(
-    fn=classify_text,
-    inputs="pdf",
-    outputs="text",
     title="PDF Text Classification",
-    description="Upload a PDF file to classify its text"
 )
-# Launch the interface
-interface.launch()

 from transformers import BertTokenizer, BertModel
 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
+import pdfplumber
 app = FastAPI()
         return_tensors='pt'
     )
     # Use the pre-trained BERT model to extract features from the input text
     outputs = model(**inputs)
     # Extract the features
     features = outputs.last_hidden_state[:, 0, :]
+    # Return the features as a list
+    return {"features": features.tolist()}
+# Define a function to extract text from a PDF
+def extract_text_from_pdf(pdf_file):
+    with pdfplumber.open(pdf_file) as pdf:
+        text = ""
+        for page in pdf.pages:
+            text += page.extract_text()
+    return text
+# Create a Gradio interface for handling PDF input
+def classify_pdf(pdf_file):
+    # Extract text from the uploaded PDF
+    extracted_text = extract_text_from_pdf(pdf_file)
+    # Create the request for FastAPI
+    request = TextClassificationRequest(text=extracted_text)
+    # Simulate calling the FastAPI endpoint
+    output = classify_text(request)
     return output
 # Create a Gradio interface
 interface = gr.Interface(
+    fn=classify_pdf,
+    inputs="file",  # Expecting PDF file input
+    outputs="json",  # Outputs a JSON dictionary
     title="PDF Text Classification",
+    description="Upload a PDF file to classify its text using BERT"
 )
+# Launch the Gradio interface
+interface.launch()