Spaces:

gopichandra
/

LIC_PROFILE_MATCHER

Runtime error

App Files Files Community

LIC_PROFILE_MATCHER / app.py

gopichandra

Update app.py

1c8a581 verified 6 months ago

raw

history blame contribute delete

2.16 kB

	from fastapi import FastAPI, File, Form, UploadFile
	from pydantic import BaseModel
	import docx
	import fitz # PyMuPDF for PDF extraction
	from transformers import AutoTokenizer, AutoModel
	import torch
	import io

	app = FastAPI()

	# Load the Hugging Face tokenizer and model for semantic textual similarity
	tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')
	model = AutoModel.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')

	# Function to extract text from PDF
	def extract_text_from_pdf(pdf_path: io.BytesIO):
	doc = fitz.open(pdf_path)
	text = ""
	for page in doc:
	text += page.get_text()
	return text

	# Function to extract text from DOCX
	def extract_text_from_docx(docx_path: io.BytesIO):
	doc = docx.Document(docx_path)
	text = ""
	for para in doc.paragraphs:
	text += para.text + "\n"
	return text

	# Function to calculate semantic similarity score
	def get_similarity_score(text1, text2):
	inputs = tokenizer([text1, text2], padding=True, truncation=True, return_tensors='pt')
	with torch.no_grad():
	embeddings = model(**inputs)
	sentence_embeddings = embeddings.last_hidden_state.mean(dim=1)
	similarity_score = torch.nn.functional.cosine_similarity(sentence_embeddings[0], sentence_embeddings[1], dim=0)
	return similarity_score.item()

	# FastAPI endpoint to process the resume and calculate similarity with LIC profile
	@app.post("/score_resume/")
	async def score_resume(file: UploadFile = File(...), lic_profile: str = Form(...)):
	file_content = await file.read()

	if file.filename.endswith('.pdf'):
	resume_text = extract_text_from_pdf(io.BytesIO(file_content))
	elif file.filename.endswith('.docx'):
	resume_text = extract_text_from_docx(io.BytesIO(file_content))
	else:
	return {"error": "Invalid file type. Please upload a PDF or DOCX file."}

	if not lic_profile:
	return {"error": "LIC profile text is required."}

	# Calculate the similarity score between resume and LIC profile
	score = get_similarity_score(resume_text, lic_profile)

	return {"similarity_score": score}