Spaces:

gopichandra
/

LIC_PROFILE_MATCHER

Runtime error

File size: 2,155 Bytes

1c8a581
 
0fb32c3
 
55671b0
 
1c8a581
55671b0
1c8a581
55671b0
0fb32c3
 
 
 
 
1c8a581
0fb32c3
 
 
 
55671b0
 
0fb32c3
1c8a581
0fb32c3
 
 
 
55671b0
 
0fb32c3
 
 
55671b0
0fb32c3
 
 
 
 
1c8a581
 
 
 
 
0fb32c3
1c8a581
0fb32c3
1c8a581
55671b0
1c8a581
55671b0
0fb32c3
1c8a581
0fb32c3
 
 
 
1c8a581

from fastapi import FastAPI, File, Form, UploadFile
from pydantic import BaseModel
import docx
import fitz  # PyMuPDF for PDF extraction
from transformers import AutoTokenizer, AutoModel
import torch
import io

app = FastAPI()

# Load the Hugging Face tokenizer and model for semantic textual similarity
tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')
model = AutoModel.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')

# Function to extract text from PDF
def extract_text_from_pdf(pdf_path: io.BytesIO):
    doc = fitz.open(pdf_path)
    text = ""
    for page in doc:
        text += page.get_text()
    return text

# Function to extract text from DOCX
def extract_text_from_docx(docx_path: io.BytesIO):
    doc = docx.Document(docx_path)
    text = ""
    for para in doc.paragraphs:
        text += para.text + "\n"
    return text

# Function to calculate semantic similarity score
def get_similarity_score(text1, text2):
    inputs = tokenizer([text1, text2], padding=True, truncation=True, return_tensors='pt')
    with torch.no_grad():
        embeddings = model(**inputs)
        sentence_embeddings = embeddings.last_hidden_state.mean(dim=1)
    similarity_score = torch.nn.functional.cosine_similarity(sentence_embeddings[0], sentence_embeddings[1], dim=0)
    return similarity_score.item()

# FastAPI endpoint to process the resume and calculate similarity with LIC profile
@app.post("/score_resume/")
async def score_resume(file: UploadFile = File(...), lic_profile: str = Form(...)):
    file_content = await file.read()

    if file.filename.endswith('.pdf'):
        resume_text = extract_text_from_pdf(io.BytesIO(file_content))
    elif file.filename.endswith('.docx'):
        resume_text = extract_text_from_docx(io.BytesIO(file_content))
    else:
        return {"error": "Invalid file type. Please upload a PDF or DOCX file."}

    if not lic_profile:
        return {"error": "LIC profile text is required."}
    
    # Calculate the similarity score between resume and LIC profile
    score = get_similarity_score(resume_text, lic_profile)
    
    return {"similarity_score": score}