gopichandra's picture
Update app.py
1c8a581 verified
from fastapi import FastAPI, File, Form, UploadFile
from pydantic import BaseModel
import docx
import fitz # PyMuPDF for PDF extraction
from transformers import AutoTokenizer, AutoModel
import torch
import io
app = FastAPI()
# Load the Hugging Face tokenizer and model for semantic textual similarity
tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')
model = AutoModel.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')
# Function to extract text from PDF
def extract_text_from_pdf(pdf_path: io.BytesIO):
doc = fitz.open(pdf_path)
text = ""
for page in doc:
text += page.get_text()
return text
# Function to extract text from DOCX
def extract_text_from_docx(docx_path: io.BytesIO):
doc = docx.Document(docx_path)
text = ""
for para in doc.paragraphs:
text += para.text + "\n"
return text
# Function to calculate semantic similarity score
def get_similarity_score(text1, text2):
inputs = tokenizer([text1, text2], padding=True, truncation=True, return_tensors='pt')
with torch.no_grad():
embeddings = model(**inputs)
sentence_embeddings = embeddings.last_hidden_state.mean(dim=1)
similarity_score = torch.nn.functional.cosine_similarity(sentence_embeddings[0], sentence_embeddings[1], dim=0)
return similarity_score.item()
# FastAPI endpoint to process the resume and calculate similarity with LIC profile
@app.post("/score_resume/")
async def score_resume(file: UploadFile = File(...), lic_profile: str = Form(...)):
file_content = await file.read()
if file.filename.endswith('.pdf'):
resume_text = extract_text_from_pdf(io.BytesIO(file_content))
elif file.filename.endswith('.docx'):
resume_text = extract_text_from_docx(io.BytesIO(file_content))
else:
return {"error": "Invalid file type. Please upload a PDF or DOCX file."}
if not lic_profile:
return {"error": "LIC profile text is required."}
# Calculate the similarity score between resume and LIC profile
score = get_similarity_score(resume_text, lic_profile)
return {"similarity_score": score}