bizcards-extractor / biz /gemini.py
Tamanna Alam
Add Gradio app
6548988
import os, json
from typing import Dict
from google import genai
from google.genai import types
SYSTEM_INSTRUCTIONS = """
You read a cropped Japanese business card image and return strict JSON fields.
Return ONLY this JSON:
{
"person_kanji": "...", "person_kana": "...", "person_romaji": "...",
"title": "...", "department": "...", "company": "...",
"email": ["..."], "phone": ["..."],
"website": "...", "address_jp": "...", "notes": "..."
}
Use null / [] for unknowns. Romaji: family-name LAST. No extra text.
"""
def make_client():
use_vertex = os.getenv("GOOGLE_GENAI_USE_VERTEXAI", "false").lower() in ("1","true","yes")
if use_vertex:
project = os.getenv("GOOGLE_CLOUD_PROJECT")
location = os.getenv("GOOGLE_CLOUD_LOCATION", "us-central1")
if not project:
raise RuntimeError("Set GOOGLE_CLOUD_PROJECT or use GOOGLE_API_KEY.")
return genai.Client(vertexai=True, project=project, location=location)
api_key = os.getenv("GOOGLE_API_KEY")
if not api_key:
raise RuntimeError("Missing GOOGLE_API_KEY.")
return genai.Client(api_key=api_key)
def extract_from_crop(image_bytes: bytes, source_name: str) -> Dict:
client = make_client()
img_part = types.Part.from_bytes(data=image_bytes, mime_type="image/png")
resp = client.models.generate_content(
model="gemini-2.5-flash",
contents=[types.Content(role="user", parts=[
types.Part.from_text(SYSTEM_INSTRUCTIONS), img_part
])],
config=types.GenerateContentConfig(
temperature=0.2, response_mime_type="application/json"
)
)
try:
data = json.loads(resp.text)
except Exception:
data = {}
data["source_name"] = source_name
return data