Spaces:

Nasma
/

ocrgpt

Sleeping

App Files Files Community

Nasma commited on Jan 30, 2025

Commit

f9ae632

verified ·

1 Parent(s): e5f9bf9

Update main.py

Browse files

Files changed (1) hide show

main.py +65 -70

main.py CHANGED Viewed

@@ -7,16 +7,13 @@ from fastapi.middleware.cors import CORSMiddleware
 from PyPDF2 import PdfReader
 from PIL import Image
 import fitz  # PyMuPDF
-from dotenv import load_dotenv
 import openai
-#from openai import OpenAI
 # Load environment variables
 load_dotenv()
-openai.api_key = os.environ["OPENAI_API_KEY"]
-#client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])
 if not openai.api_key:
     raise RuntimeError("Missing OpenAI API key. Please set OPENAI_API_KEY in the environment variables.")
@@ -33,110 +30,108 @@ app.add_middleware(
 def vision(file_content):
-    pdf_document = fitz.open("pdf", file_content)
     base64_images = []
-    vision_data = [ {
-                    "type": "text",
-                    "text": "extract the all text from this images",
-                    }
-                  ]
-    # Convert PDF pages to images
     for page_num in range(len(pdf_document)):
         page = pdf_document.load_page(page_num)
         pix = page.get_pixmap()
-        img_bytes = pix.tobytes("png")
-        img = Image.open(io.BytesIO(img_bytes))
         # Convert the image to base64
         buffered = io.BytesIO()
         img.save(buffered, format="PNG")
         img_base64 = base64.b64encode(buffered.getvalue()).decode("utf-8")
         base64_images.append(img_base64)
-        vision_data.append(
-                        {
-                            "type": "image_url",
-                            "image_url": {"url": f"data:image/jpeg;base64,{img_base64}"},
-                        }
-                        )
     print("PDF pages converted to images successfully!")
-    # Use GPT-4 to process the images (textual representation)
     try:
         response = openai.ChatCompletion.create(
-            model="gpt-4o-mini",
-            messages=[
-                {
-                    "role": "user",
-                    "content": vision_data,
-                }
-            ],
         )
-        print(response.choices[0]["message"]["content"])
-        return response.choices[0]["message"]["content"]
     except Exception as e:
-        raise HTTPException(status_code=500, detail=f"Error generating response: {str(e)}")
 @app.post("/get_ocr_data/")
 def get_data(input_file: UploadFile = File(...)):
-    #try:
         # Read the uploaded file
         file_content = input_file.file.read()
         file_type = input_file.content_type
-        text = ""
         if file_type == "application/pdf":
             pdf_reader = PdfReader(io.BytesIO(file_content))
             for page in pdf_reader.pages:
-                text += page.extract_text()
-            if len(text.strip()):  # If PDF text extraction is insufficient
-                print("\nvision running..........................\n")
-                text = vision(file_content)
         else:
             raise HTTPException(status_code=400, detail="Unsupported file type")
-        print(text.strip())
-        # Call GPT-4o to process extracted text into structured JSON
-        prompt = f"""This is CV data: {text.strip()}.
-        IMPORTANT: The output should be a JSON array! Make Sure the JSON is valid.if you not found data then fill with "none" don't add any extra explaition text
-        need only json
         Example Output:
         ```json
-        [
-           "firstname": "firstname",
-           "lastname": "lastname",
-           "email": "email",
-           "contact_number": "contact number",
-           "home_address": "full home address",
-           "home_town": "home town or city",
-           "total_years_of_experience": "total years of experience",
-           "education": "Institution Name, Country, Degree Name, Graduation Year; Institution Name, Country, Degree Name, Graduation Year",
-           "LinkedIn_link": "LinkedIn link",
-           "experience": "experience",
            "industry": "industry of work",
-           "skills": "skills(Identify and list specific skills mentioned in both the skills section and inferred from the experience section), formatted as: Skill 1, Skill 2, Skill 3, Skill 4, Skill 5",
-           "positions": ["Job title 1, Job title 2, Job title 3"]
-        ]
-        ```
-        """
         response = openai.ChatCompletion.create(
             model="gpt-4o",
             messages=[
-                {"role": "system", "content": """You are an assistant processing CV data and formatting it into structured JSON."""
-                },
                 {"role": "user", "content": prompt}
             ]
         )
-        data = (response["choices"][0]["message"]["content"]).replace("json","").replace("```","")
-        print(data)
-        data = json.loads(data)
-        #data = response["choices"][0]["message"]["content"]
-        return {"data": data}
-    #except Exception as e:
-        #raise HTTPException(status_code=500, detail=f"Error processing file: {str(e)}")

 from PyPDF2 import PdfReader
 from PIL import Image
 import fitz  # PyMuPDF
 import openai
+import pytesseract
+from dotenv import load_dotenv
 # Load environment variables
 load_dotenv()
+openai.api_key = os.getenv("OPENAI_API_KEY")
 if not openai.api_key:
     raise RuntimeError("Missing OpenAI API key. Please set OPENAI_API_KEY in the environment variables.")
 def vision(file_content):
+    """Extract text from images inside a PDF using PyMuPDF & OCR."""
+    pdf_document = fitz.open(stream=file_content, filetype="pdf")
     base64_images = []
+    vision_data = [{"type": "text", "text": "Extract all text from these images."}]
     for page_num in range(len(pdf_document)):
         page = pdf_document.load_page(page_num)
         pix = page.get_pixmap()
+        # Convert the image to a PIL image
+        img = Image.open(io.BytesIO(pix.tobytes("png")))
         # Convert the image to base64
         buffered = io.BytesIO()
         img.save(buffered, format="PNG")
         img_base64 = base64.b64encode(buffered.getvalue()).decode("utf-8")
         base64_images.append(img_base64)
+        vision_data.append({
+            "type": "image_url",
+            "image_url": {"url": f"data:image/png;base64,{img_base64}"},
+        })
     print("PDF pages converted to images successfully!")
+    # Send images to GPT-4o for processing
     try:
         response = openai.ChatCompletion.create(
+            model="gpt-4o",
+            messages=[{"role": "user", "content": vision_data}],
         )
+        return response["choices"][0]["message"]["content"]
     except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Error in GPT-4o vision processing: {str(e)}")
 @app.post("/get_ocr_data/")
 def get_data(input_file: UploadFile = File(...)):
+    """Extract structured data from a PDF resume."""
+    try:
         # Read the uploaded file
         file_content = input_file.file.read()
         file_type = input_file.content_type
+        extracted_text = ""
         if file_type == "application/pdf":
             pdf_reader = PdfReader(io.BytesIO(file_content))
             for page in pdf_reader.pages:
+                text = page.extract_text()
+                if text:
+                    extracted_text += text + "\n"
+            if not extracted_text.strip():  # If no text found, use vision processing
+                print("\nVision OCR running...\n")
+                extracted_text = vision(file_content)
         else:
             raise HTTPException(status_code=400, detail="Unsupported file type")
+        print("Extracted Text:\n", extracted_text.strip())
+        # Call GPT-4o to structure extracted text into JSON format
+        prompt = f"""This is CV data: {extracted_text.strip()}.
+        IMPORTANT: The output should be a JSON array! Make sure the JSON is valid.
+        If no data is found, fill missing fields with "none". Do not include extra explanation text.
         Example Output:
         ```json
+        {{
+           "firstname": "First Name",
+           "lastname": "Last Name",
+           "email": "Email Address",
+           "contact_number": "Contact Number",
+           "home_address": "Full Home Address",
+           "home_town": "Home Town or City",
+           "total_years_of_experience": "Total Years of Experience",
+           "education": "Institution Name, Degree Name",
+           "LinkedIn_link": "LinkedIn URL",
+           "experience": "Job Title, Start Date - End Date, Company Name; Job Title, Start Date - End Date, Company Name; Job Title, Start Date - End Date, Company Name",
            "industry": "industry of work",
+           "skills": "Skill 1, Skill 2, Skill 3",
+           "positions": ["Job Title 1", "Job Title 2"],
+           "summary": "Summary of qualifications and experience"
+        }}
+        ```"""
         response = openai.ChatCompletion.create(
             model="gpt-4o",
             messages=[
+                {"role": "system", "content": "You are an assistant that processes CV data into structured JSON."},
                 {"role": "user", "content": prompt}
             ]
         )
+        # Ensure valid JSON output
+        json_response = response["choices"][0]["message"]["content"].replace("json", "").replace("```", "").strip()
+        structured_data = json.loads(json_response)
+        return {"data": structured_data}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Error processing file: {str(e)}")