viswanani commited on
Commit
2d98bd2
·
verified ·
1 Parent(s): 4994596

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -4
app.py CHANGED
@@ -2,17 +2,22 @@ from fastapi import FastAPI, HTTPException
2
  from pydantic import BaseModel
3
  import base64
4
  import pdfplumber
5
- from transformers import pipeline
6
  import torch
7
  from typing import List, Dict
8
 
9
-
10
  # Initialize FastAPI app
11
  app = FastAPI()
12
 
13
- # Load the pre-trained BERT model for contract clause classification
14
- classifier = pipeline("text-classification", model="distilbert-base-uncased")
 
 
 
 
15
 
 
 
16
 
17
  # Function to extract text from PDF file
18
  def extract_text_from_pdf(pdf_data: bytes) -> str:
 
2
  from pydantic import BaseModel
3
  import base64
4
  import pdfplumber
5
+ from transformers import DistilBertForSequenceClassification, DistilBertTokenizerFast, pipeline
6
  import torch
7
  from typing import List, Dict
8
 
 
9
  # Initialize FastAPI app
10
  app = FastAPI()
11
 
12
+ # Load the pre-trained model and tokenizer for classification
13
+ # DistilBERT model - it's important to fine-tune this model for your task, but we'll use it as-is for now
14
+ model_name = "distilbert-base-uncased"
15
+
16
+ tokenizer = DistilBertTokenizerFast.from_pretrained(model_name)
17
+ model = DistilBertForSequenceClassification.from_pretrained(model_name)
18
 
19
+ # Use Hugging Face's pipeline for text classification
20
+ classifier = pipeline("text-classification", model=model, tokenizer=tokenizer)
21
 
22
  # Function to extract text from PDF file
23
  def extract_text_from_pdf(pdf_data: bytes) -> str: