Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -2,17 +2,22 @@ from fastapi import FastAPI, HTTPException
|
|
| 2 |
from pydantic import BaseModel
|
| 3 |
import base64
|
| 4 |
import pdfplumber
|
| 5 |
-
from transformers import pipeline
|
| 6 |
import torch
|
| 7 |
from typing import List, Dict
|
| 8 |
|
| 9 |
-
|
| 10 |
# Initialize FastAPI app
|
| 11 |
app = FastAPI()
|
| 12 |
|
| 13 |
-
# Load the pre-trained
|
| 14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
|
|
|
|
|
|
|
| 16 |
|
| 17 |
# Function to extract text from PDF file
|
| 18 |
def extract_text_from_pdf(pdf_data: bytes) -> str:
|
|
|
|
| 2 |
from pydantic import BaseModel
|
| 3 |
import base64
|
| 4 |
import pdfplumber
|
| 5 |
+
from transformers import DistilBertForSequenceClassification, DistilBertTokenizerFast, pipeline
|
| 6 |
import torch
|
| 7 |
from typing import List, Dict
|
| 8 |
|
|
|
|
| 9 |
# Initialize FastAPI app
|
| 10 |
app = FastAPI()
|
| 11 |
|
| 12 |
+
# Load the pre-trained model and tokenizer for classification
|
| 13 |
+
# DistilBERT model - it's important to fine-tune this model for your task, but we'll use it as-is for now
|
| 14 |
+
model_name = "distilbert-base-uncased"
|
| 15 |
+
|
| 16 |
+
tokenizer = DistilBertTokenizerFast.from_pretrained(model_name)
|
| 17 |
+
model = DistilBertForSequenceClassification.from_pretrained(model_name)
|
| 18 |
|
| 19 |
+
# Use Hugging Face's pipeline for text classification
|
| 20 |
+
classifier = pipeline("text-classification", model=model, tokenizer=tokenizer)
|
| 21 |
|
| 22 |
# Function to extract text from PDF file
|
| 23 |
def extract_text_from_pdf(pdf_data: bytes) -> str:
|