| try: from pip._internal.operations import freeze |
| except ImportError: |
| from pip.operations import freeze |
|
|
| pkgs = freeze.freeze() |
| for pkg in pkgs: print(pkg) |
| import os |
| from fastapi import FastAPI, HTTPException, File, UploadFile |
| from fastapi.middleware.cors import CORSMiddleware |
| from PyPDF2 import PdfReader |
| import google.generativeai as genai |
| import json |
| import base64 |
| from io import BytesIO |
| from PIL import Image |
| import io |
| import requests |
|
|
| from dotenv import load_dotenv |
| |
| load_dotenv() |
|
|
| secret = os.environ["gemini_key"] |
| genai.configure(api_key=secret) |
| model_vision = genai.GenerativeModel('gemini-pro-vision') |
| model_text = genai.GenerativeModel('gemini-pro') |
|
|
| app = FastAPI() |
|
|
| app.add_middleware( |
| CORSMiddleware, |
| allow_origins=["*"], |
| allow_credentials=True, |
| allow_methods=["*"], |
| allow_headers=["*"], |
| ) |
|
|
|
|
|
|
| def encode_image(image): |
| |
| buffered = BytesIO() |
| image.save(buffered, format=image.format) |
| img_bytes = buffered.getvalue() |
|
|
| |
| base64_image = base64.b64encode(img_bytes).decode('utf-8') |
| return base64_image |
|
|
|
|
|
|
| def vision(image): |
| |
| api_key = os.environ["open_ai_key"] |
| |
|
|
| |
| base64_image = encode_image(image) |
| |
| headers = { |
| "Content-Type": "application/json", |
| "Authorization": f"Bearer {api_key}" |
| } |
| |
| payload = { |
| "model": "gpt-4o-mini", |
| "messages": [ |
| { |
| "role": "user", |
| "content": [ |
| { |
| "type": "text", |
| "text": "extract all data from this image" |
| }, |
| { |
| "type": "image_url", |
| "image_url": { |
| "url": f"data:image/jpeg;base64,{base64_image}" |
| } |
| } |
| ] |
| } |
| ], |
| "max_tokens": 300 |
| } |
| |
| response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload) |
| |
| return response.json()['choices'][0]['message']['content'] |
|
|
|
|
| @app.post("/get_ocr_data/") |
| async def get_data(input_file: UploadFile = File(...)): |
| try: |
| |
| file_content = await input_file.read() |
| file_type = input_file.content_type |
| |
| text = "" |
|
|
| if file_type == "application/pdf": |
| |
| pdf_reader = PdfReader(io.BytesIO(file_content)) |
| for page in pdf_reader.pages: |
| text += page.extract_text() |
| |
| elif file_type in ["image/jpeg", "image/png", "image/jpg"]: |
| |
| image = Image.open(io.BytesIO(file_content)) |
| text = vision(image) |
| |
| else: |
| raise HTTPException(status_code=400, detail="Unsupported file type") |
|
|
| |
| prompt = f"""This is CV data: {text.strip()} |
| I want only: |
| |
| firstname, lastname, contact number, total years of experience, LinkedIn link, experience, skills |
| |
| in JSON format only""" |
| |
| response = model_text.generate_content(prompt) |
| data = json.loads(response.text.replace("```json", "").replace("```", "")) |
| return {"data": data} |
|
|
| except Exception as e: |
| raise HTTPException(status_code=500, detail=f"Error processing file: {str(e)}") |
|
|