Spaces:

Aramente
/

bored-cv-api

Running

Aramente commited on Apr 14

Commit

bc01feb

1 Parent(s): 7cd7958

fix: robust JSON extraction from Gemini 2.5 Flash — find JSON in response, strip thinking, fix trailing commas

Files changed (2) hide show

app/routers/linkedin.py CHANGED Viewed

@@ -48,7 +48,7 @@ async def debug_parse_pdf(file: UploadFile = File(...)):
     import google.generativeai as genai
     genai.configure(api_key=api_key)
-    model = genai.GenerativeModel("gemini-2.0-flash-lite")
     prompt = f"""Extract structured profile data from this LinkedIn PDF export.
@@ -66,7 +66,12 @@ Return valid JSON with: name, title, email, phone, linkedin, location, summary,
                 response_mime_type="application/json",
             ),
         )
-        data = json.loads(r.text)
         return {
             "ok": True,
             "name": data.get("name"),

     import google.generativeai as genai
     genai.configure(api_key=api_key)
+    model = genai.GenerativeModel("gemini-2.5-flash")
     prompt = f"""Extract structured profile data from this LinkedIn PDF export.
                 response_mime_type="application/json",
             ),
         )
+        import re as re_mod
+        raw_resp = r.text.strip()
+        start = raw_resp.find("{")
+        end = raw_resp.rfind("}") + 1
+        json_str = re_mod.sub(r",\s*([}\]])", r"\1", raw_resp[start:end])
+        data = json.loads(json_str)
         return {
             "ok": True,
             "name": data.get("name"),

app/services/pdf_parser.py CHANGED Viewed

@@ -31,7 +31,7 @@ def parse_linkedin_pdf(pdf_bytes: bytes) -> Profile:
         return _fallback_parse(raw_text)
     genai.configure(api_key=api_key)
-    model = genai.GenerativeModel("gemini-2.0-flash-lite")  # No thinking — reliable JSON, faster
     prompt = f"""Extract structured profile data from this LinkedIn PDF export. The text is messy because LinkedIn PDFs use a two-column layout — sections are interleaved. Use your judgment to reconstruct the correct structure.
@@ -82,10 +82,20 @@ IMPORTANT:
             generation_config=genai.types.GenerationConfig(
                 max_output_tokens=4000,
                 temperature=0.1,
-                response_mime_type="application/json",  # Forces valid JSON output
             ),
         )
-        data = json.loads(response.text)
         def s(val: str | None) -> str:
             """Safe string — convert None/null to empty string."""

         return _fallback_parse(raw_text)
     genai.configure(api_key=api_key)
+    model = genai.GenerativeModel("gemini-2.5-flash")
     prompt = f"""Extract structured profile data from this LinkedIn PDF export. The text is messy because LinkedIn PDFs use a two-column layout — sections are interleaved. Use your judgment to reconstruct the correct structure.
             generation_config=genai.types.GenerationConfig(
                 max_output_tokens=4000,
                 temperature=0.1,
             ),
         )
+        # Extract JSON from response — may have markdown blocks or thinking preamble
+        raw_resp = response.text.strip()
+        # Find the JSON object in the response
+        start = raw_resp.find("{")
+        end = raw_resp.rfind("}") + 1
+        if start == -1 or end == 0:
+            raise ValueError("No JSON found in response")
+        json_str = raw_resp[start:end]
+        # Fix common issues
+        import re
+        json_str = re.sub(r",\s*([}\]])", r"\1", json_str)  # trailing commas
+        data = json.loads(json_str)
         def s(val: str | None) -> str:
             """Safe string — convert None/null to empty string."""