gopichandra commited on
Commit
1c8a581
·
verified ·
1 Parent(s): 96f47bc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -21
app.py CHANGED
@@ -1,18 +1,19 @@
1
- from flask import Flask, request, jsonify
 
2
  import docx
3
  import fitz # PyMuPDF for PDF extraction
4
  from transformers import AutoTokenizer, AutoModel
5
  import torch
6
- import os
7
 
8
- app = Flask(__name__)
9
 
10
  # Load the Hugging Face tokenizer and model for semantic textual similarity
11
  tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')
12
  model = AutoModel.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')
13
 
14
  # Function to extract text from PDF
15
- def extract_text_from_pdf(pdf_path):
16
  doc = fitz.open(pdf_path)
17
  text = ""
18
  for page in doc:
@@ -20,7 +21,7 @@ def extract_text_from_pdf(pdf_path):
20
  return text
21
 
22
  # Function to extract text from DOCX
23
- def extract_text_from_docx(docx_path):
24
  doc = docx.Document(docx_path)
25
  text = ""
26
  for para in doc.paragraphs:
@@ -36,28 +37,22 @@ def get_similarity_score(text1, text2):
36
  similarity_score = torch.nn.functional.cosine_similarity(sentence_embeddings[0], sentence_embeddings[1], dim=0)
37
  return similarity_score.item()
38
 
39
- # API endpoint to process the resume and calculate similarity with LIC profile
40
- @app.route('/score_resume', methods=['POST'])
41
- def score_resume():
42
- if 'file' not in request.files:
43
- return jsonify({"error": "No file part"}), 400
44
- file = request.files['file']
45
- lic_profile = request.form.get('lic_profile', '') # LIC profile text to compare against
46
-
47
  if file.filename.endswith('.pdf'):
48
- resume_text = extract_text_from_pdf(file)
49
  elif file.filename.endswith('.docx'):
50
- resume_text = extract_text_from_docx(file)
51
  else:
52
- return jsonify({"error": "Invalid file type. Please upload a PDF or DOCX file."}), 400
53
 
54
  if not lic_profile:
55
- return jsonify({"error": "LIC profile text is required."}), 400
56
 
57
  # Calculate the similarity score between resume and LIC profile
58
  score = get_similarity_score(resume_text, lic_profile)
59
 
60
- return jsonify({"similarity_score": score})
61
-
62
- if __name__ == '__main__':
63
- app.run(debug=True)
 
1
+ from fastapi import FastAPI, File, Form, UploadFile
2
+ from pydantic import BaseModel
3
  import docx
4
  import fitz # PyMuPDF for PDF extraction
5
  from transformers import AutoTokenizer, AutoModel
6
  import torch
7
+ import io
8
 
9
+ app = FastAPI()
10
 
11
  # Load the Hugging Face tokenizer and model for semantic textual similarity
12
  tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')
13
  model = AutoModel.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')
14
 
15
  # Function to extract text from PDF
16
+ def extract_text_from_pdf(pdf_path: io.BytesIO):
17
  doc = fitz.open(pdf_path)
18
  text = ""
19
  for page in doc:
 
21
  return text
22
 
23
  # Function to extract text from DOCX
24
+ def extract_text_from_docx(docx_path: io.BytesIO):
25
  doc = docx.Document(docx_path)
26
  text = ""
27
  for para in doc.paragraphs:
 
37
  similarity_score = torch.nn.functional.cosine_similarity(sentence_embeddings[0], sentence_embeddings[1], dim=0)
38
  return similarity_score.item()
39
 
40
+ # FastAPI endpoint to process the resume and calculate similarity with LIC profile
41
+ @app.post("/score_resume/")
42
+ async def score_resume(file: UploadFile = File(...), lic_profile: str = Form(...)):
43
+ file_content = await file.read()
44
+
 
 
 
45
  if file.filename.endswith('.pdf'):
46
+ resume_text = extract_text_from_pdf(io.BytesIO(file_content))
47
  elif file.filename.endswith('.docx'):
48
+ resume_text = extract_text_from_docx(io.BytesIO(file_content))
49
  else:
50
+ return {"error": "Invalid file type. Please upload a PDF or DOCX file."}
51
 
52
  if not lic_profile:
53
+ return {"error": "LIC profile text is required."}
54
 
55
  # Calculate the similarity score between resume and LIC profile
56
  score = get_similarity_score(resume_text, lic_profile)
57
 
58
+ return {"similarity_score": score}