Spaces:

ruslanmv
/

CV_Analizer

Sleeping

App Files Files Community

ruslanmv commited on Feb 4

Commit

8af823f

verified ·

1 Parent(s): c1c0b76

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -50

app.py CHANGED Viewed

@@ -7,7 +7,6 @@ from docx import Document
 # Initialize the inference client from Hugging Face.
 client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
 def extract_text_from_pdf(pdf_file_bytes):
     """Extract text from PDF bytes."""
     try:
@@ -21,7 +20,6 @@ def extract_text_from_pdf(pdf_file_bytes):
     except Exception as e:
         return f"Error reading PDF: {e}"
 def extract_text_from_docx(docx_file_bytes):
     """Extract text from DOCX bytes."""
     try:
@@ -31,58 +29,49 @@ def extract_text_from_docx(docx_file_bytes):
     except Exception as e:
         return f"Error reading DOCX: {e}"
 def parse_cv(file, job_description):
-    """Analyze the CV (PDF or DOCX) against the job description and return an analysis report."""
     if file is None:
-        return "Please upload a CV file."
-    # Correctly handle the file object when type="binary"
     try:
         file_bytes = file
-        file_ext = "pdf"  # Default assumption
-        if file_bytes:
-            # Heuristic to detect file type based on content
-            if file_bytes.startswith(b'%PDF'):
-                file_ext = "pdf"
-            elif file_bytes.startswith(b'PK\x03\x04'):  # DOCX magic number
-                file_ext = "docx"
-            else:
-                return "Unsupported file format. Cannot determine type from content"
     except Exception as e:
-        return f"Error reading the uploaded file: {e}"
     if file_ext == "pdf":
-        text = extract_text_from_pdf(file_bytes)
     elif file_ext == "docx":
-        text = extract_text_from_docx(file_bytes)
-    else:
-        return "Unsupported file format. Please upload a PDF or DOCX file."
-    if text.startswith("Error"):
-        return text  # Return extraction error if any.
-    # Print the extracted CV text
-    print("Extracted CV text (before sending to LLM):\n", text)
     prompt = (
-        f"Analyze the following CV against the provided job description. "
-        f"Provide a summary, an assessment of fit, and a score from 0 to 10.\n\n"
         f"Job Description:\n{job_description}\n\n"
-        f"Candidate CV:\n{text}"
     )
     try:
-        # Use 'max_new_tokens' instead of 'max_tokens'
-        response = client.text_generation(prompt, max_new_tokens=512)
     except Exception as e:
-        return f"Error during CV analysis: {e}"
-    return response
 def respond(message, history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p):
-    """Generate a chatbot response based on the conversation history and parameters."""
     messages = [{"role": "system", "content": system_message}]
     for user_msg, bot_msg in history:
         if user_msg:
@@ -93,11 +82,9 @@ def respond(message, history: list[tuple[str, str]], system_message, max_tokens,
     response = ""
     try:
-        # Stream response tokens from the chat completion endpoint.
-        # Replace 'max_tokens' with 'max_new_tokens'
         for message_chunk in client.chat_completion(
             messages,
-            max_new_tokens=max_tokens,
             stream=True,
             temperature=temperature,
             top_p=top_p,
@@ -108,14 +95,12 @@ def respond(message, history: list[tuple[str, str]], system_message, max_tokens,
     except Exception as e:
         yield f"Error during chat generation: {e}"
-# Build the Gradio interface
 demo = gr.Blocks()
 with demo:
     gr.Markdown("## AI-powered CV Analyzer and Chatbot")
     with gr.Tab("Chatbot"):
-        # Set type="messages" for both the chat interface and the chatbot.
         chat_interface = gr.ChatInterface(
             respond,
             chatbot=gr.Chatbot(value=[], label="Chatbot", type="messages"),
@@ -129,16 +114,18 @@ with demo:
         )
     with gr.Tab("CV Analyzer"):
-        gr.Markdown(
-            "### Upload your CV (PDF or DOCX) and provide the job description to receive a professional analysis and suitability score."
-        )
-        # Use type="binary" for the file component.
         file_input = gr.File(label="Upload CV", type="binary")
         job_desc_input = gr.Textbox(label="Job Description", lines=5)
-        output_text = gr.Textbox(label="CV Analysis Report", lines=10)
         analyze_button = gr.Button("Analyze CV")
-        analyze_button.click(parse_cv, inputs=[file_input, job_desc_input], outputs=output_text)
 if __name__ == "__main__":
-    demo.queue().launch()

 # Initialize the inference client from Hugging Face.
 client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
 def extract_text_from_pdf(pdf_file_bytes):
     """Extract text from PDF bytes."""
     try:
     except Exception as e:
         return f"Error reading PDF: {e}"
 def extract_text_from_docx(docx_file_bytes):
     """Extract text from DOCX bytes."""
     try:
     except Exception as e:
         return f"Error reading DOCX: {e}"
 def parse_cv(file, job_description):
+    """Analyze the CV and return both extracted text and analysis report."""
     if file is None:
+        return "Please upload a CV file.", ""
     try:
         file_bytes = file
+        file_ext = "pdf"
+        if file_bytes.startswith(b'%PDF'):
+            file_ext = "pdf"
+        elif file_bytes.startswith(b'PK\x03\x04'):
+            file_ext = "docx"
+        else:
+            return "Unsupported file format.", "Cannot determine file type from content"
     except Exception as e:
+        error_msg = f"Error reading file: {e}"
+        return error_msg, error_msg
+    # Extract text
     if file_ext == "pdf":
+        extracted_text = extract_text_from_pdf(file_bytes)
     elif file_ext == "docx":
+        extracted_text = extract_text_from_docx(file_bytes)
+    # Check for extraction errors
+    if extracted_text.startswith("Error"):
+        return extracted_text, "Error during text extraction. Please check the file."
+    # Prepare and send to LLM
     prompt = (
+        f"Analyze the CV against the job description. Provide a summary, assessment, and score 0-10.\n\n"
         f"Job Description:\n{job_description}\n\n"
+        f"Candidate CV:\n{extracted_text}"
     )
     try:
+        analysis = client.text_generation(prompt, max_new_tokens=512)
+        return extracted_text, analysis
     except Exception as e:
+        return extracted_text, f"Analysis Error: {e}"
 def respond(message, history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p):
+    """Generate chatbot response."""
     messages = [{"role": "system", "content": system_message}]
     for user_msg, bot_msg in history:
         if user_msg:
     response = ""
     try:
         for message_chunk in client.chat_completion(
             messages,
+            max_tokens=max_tokens,
             stream=True,
             temperature=temperature,
             top_p=top_p,
     except Exception as e:
         yield f"Error during chat generation: {e}"
+# Gradio Interface
 demo = gr.Blocks()
 with demo:
     gr.Markdown("## AI-powered CV Analyzer and Chatbot")
     with gr.Tab("Chatbot"):
         chat_interface = gr.ChatInterface(
             respond,
             chatbot=gr.Chatbot(value=[], label="Chatbot", type="messages"),
         )
     with gr.Tab("CV Analyzer"):
+        gr.Markdown("### Upload your CV and provide the job description")
         file_input = gr.File(label="Upload CV", type="binary")
         job_desc_input = gr.Textbox(label="Job Description", lines=5)
+        extracted_text = gr.Textbox(label="Extracted CV Content", lines=10, interactive=False)
+        analysis_output = gr.Textbox(label="Analysis Report", lines=10)
         analyze_button = gr.Button("Analyze CV")
+        analyze_button.click(
+            parse_cv,
+            inputs=[file_input, job_desc_input],
+            outputs=[extracted_text, analysis_output]
+        )
 if __name__ == "__main__":
+    demo.queue().launch()