Spaces:
Runtime error
Runtime error
File size: 2,155 Bytes
1c8a581 0fb32c3 55671b0 1c8a581 55671b0 1c8a581 55671b0 0fb32c3 1c8a581 0fb32c3 55671b0 0fb32c3 1c8a581 0fb32c3 55671b0 0fb32c3 55671b0 0fb32c3 1c8a581 0fb32c3 1c8a581 0fb32c3 1c8a581 55671b0 1c8a581 55671b0 0fb32c3 1c8a581 0fb32c3 1c8a581 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 |
from fastapi import FastAPI, File, Form, UploadFile
from pydantic import BaseModel
import docx
import fitz # PyMuPDF for PDF extraction
from transformers import AutoTokenizer, AutoModel
import torch
import io
app = FastAPI()
# Load the Hugging Face tokenizer and model for semantic textual similarity
tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')
model = AutoModel.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')
# Function to extract text from PDF
def extract_text_from_pdf(pdf_path: io.BytesIO):
doc = fitz.open(pdf_path)
text = ""
for page in doc:
text += page.get_text()
return text
# Function to extract text from DOCX
def extract_text_from_docx(docx_path: io.BytesIO):
doc = docx.Document(docx_path)
text = ""
for para in doc.paragraphs:
text += para.text + "\n"
return text
# Function to calculate semantic similarity score
def get_similarity_score(text1, text2):
inputs = tokenizer([text1, text2], padding=True, truncation=True, return_tensors='pt')
with torch.no_grad():
embeddings = model(**inputs)
sentence_embeddings = embeddings.last_hidden_state.mean(dim=1)
similarity_score = torch.nn.functional.cosine_similarity(sentence_embeddings[0], sentence_embeddings[1], dim=0)
return similarity_score.item()
# FastAPI endpoint to process the resume and calculate similarity with LIC profile
@app.post("/score_resume/")
async def score_resume(file: UploadFile = File(...), lic_profile: str = Form(...)):
file_content = await file.read()
if file.filename.endswith('.pdf'):
resume_text = extract_text_from_pdf(io.BytesIO(file_content))
elif file.filename.endswith('.docx'):
resume_text = extract_text_from_docx(io.BytesIO(file_content))
else:
return {"error": "Invalid file type. Please upload a PDF or DOCX file."}
if not lic_profile:
return {"error": "LIC profile text is required."}
# Calculate the similarity score between resume and LIC profile
score = get_similarity_score(resume_text, lic_profile)
return {"similarity_score": score}
|