banao-tech commited on
Commit
ba8eb97
Β·
verified Β·
1 Parent(s): bd4a430

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +53 -5
app.py CHANGED
@@ -46,6 +46,46 @@ app.add_middleware(
46
 
47
  client = anthropic.Anthropic(api_key=os.environ.get("ANTHROPIC_API_KEY", ""))
48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  # ── Request / Response Models ─────────────────────────────────────────────────
50
  class ProblemInput(BaseModel):
51
  content: str
@@ -480,12 +520,15 @@ def analyze(body: ProblemInput):
480
  """Run full 5-agent pipeline. Returns structured JSON."""
481
  if not body.content.strip():
482
  raise HTTPException(status_code=400, detail="Content cannot be empty.")
483
- if len(body.content) < 30:
 
 
 
484
  raise HTTPException(status_code=400, detail="Content too short for meaningful analysis.")
485
 
486
  try:
487
  result = run_pipeline(
488
- content=body.content,
489
  name=body.intern_name or "Intern",
490
  role=body.intern_role or "AI Developer Intern",
491
  goal=body.intern_goal or "",
@@ -505,6 +548,9 @@ def analyze_stream(body: ProblemInput):
505
  if not body.content.strip():
506
  raise HTTPException(status_code=400, detail="Content cannot be empty.")
507
 
 
 
 
508
  def event_stream():
509
  agents = [
510
  ("analyst", AGENT_ANALYST, "Problem Analyst"),
@@ -515,7 +561,7 @@ def analyze_stream(body: ProblemInput):
515
  ]
516
 
517
  context = {
518
- "content": body.content[:8000],
519
  "name": body.intern_name or "Intern",
520
  "role": body.intern_role or "AI Developer Intern",
521
  "goal": body.intern_goal or "",
@@ -528,7 +574,7 @@ def analyze_stream(body: ProblemInput):
528
 
529
  # Build context-aware prompt for this agent
530
  if key == "analyst":
531
- user_msg = f"Intern: {context['name']} | Role: {context['role']} | Goal: {context['goal']}\n\nAnalyze:\n{context['content']}"
532
  elif key == "root_cause":
533
  user_msg = f"Problem:\n{accumulated.get('analyst','')}\n\nOriginal:\n{context['content'][:2000]}"
534
  elif key == "solutions":
@@ -571,9 +617,11 @@ def analyze_pdf(body: ProblemInput):
571
  if not body.content.strip():
572
  raise HTTPException(status_code=400, detail="Content cannot be empty.")
573
 
 
 
574
  try:
575
  analysis = run_pipeline(
576
- content=body.content,
577
  name=body.intern_name or "Intern",
578
  role=body.intern_role or "AI Developer Intern",
579
  goal=body.intern_goal or "",
 
46
 
47
  client = anthropic.Anthropic(api_key=os.environ.get("ANTHROPIC_API_KEY", ""))
48
 
49
+
50
+ # ── PDF Text Extraction ───────────────────────────────────────────────────────
51
+
52
+ def decode_content(raw: str) -> str:
53
+ """
54
+ If the client sent a PDF as __PDF_BASE64__<data>, decode and extract text.
55
+ Otherwise return the string unchanged.
56
+ """
57
+ PREFIX = "__PDF_BASE64__"
58
+ if not raw.startswith(PREFIX):
59
+ return raw
60
+
61
+ import base64
62
+ from pypdf import PdfReader
63
+
64
+ b64 = raw[len(PREFIX):]
65
+ try:
66
+ pdf_bytes = base64.b64decode(b64)
67
+ except Exception:
68
+ raise HTTPException(status_code=400, detail="Invalid base64 PDF data.")
69
+
70
+ try:
71
+ reader = PdfReader(io.BytesIO(pdf_bytes))
72
+ pages = []
73
+ for page in reader.pages:
74
+ text = page.extract_text()
75
+ if text:
76
+ pages.append(text.strip())
77
+ extracted = "\n\n".join(pages).strip()
78
+ except Exception as e:
79
+ raise HTTPException(status_code=400, detail=f"Could not read PDF: {e}")
80
+
81
+ if not extracted:
82
+ raise HTTPException(
83
+ status_code=400,
84
+ detail="PDF appears to be scanned/image-based β€” no text found. Please paste the text manually.",
85
+ )
86
+ return extracted
87
+
88
+
89
  # ── Request / Response Models ─────────────────────────────────────────────────
90
  class ProblemInput(BaseModel):
91
  content: str
 
520
  """Run full 5-agent pipeline. Returns structured JSON."""
521
  if not body.content.strip():
522
  raise HTTPException(status_code=400, detail="Content cannot be empty.")
523
+
524
+ content = decode_content(body.content)
525
+
526
+ if len(content) < 30:
527
  raise HTTPException(status_code=400, detail="Content too short for meaningful analysis.")
528
 
529
  try:
530
  result = run_pipeline(
531
+ content=content,
532
  name=body.intern_name or "Intern",
533
  role=body.intern_role or "AI Developer Intern",
534
  goal=body.intern_goal or "",
 
548
  if not body.content.strip():
549
  raise HTTPException(status_code=400, detail="Content cannot be empty.")
550
 
551
+ # Decode PDF if needed before streaming starts
552
+ resolved_content = decode_content(body.content)
553
+
554
  def event_stream():
555
  agents = [
556
  ("analyst", AGENT_ANALYST, "Problem Analyst"),
 
561
  ]
562
 
563
  context = {
564
+ "content": resolved_content[:8000],
565
  "name": body.intern_name or "Intern",
566
  "role": body.intern_role or "AI Developer Intern",
567
  "goal": body.intern_goal or "",
 
574
 
575
  # Build context-aware prompt for this agent
576
  if key == "analyst":
577
+ user_msg = f"Intern: {context['name']} | Role: {context['role']} | Goal: {context['goal']}\n\nAnalyze this content:\n{context['content']}"
578
  elif key == "root_cause":
579
  user_msg = f"Problem:\n{accumulated.get('analyst','')}\n\nOriginal:\n{context['content'][:2000]}"
580
  elif key == "solutions":
 
617
  if not body.content.strip():
618
  raise HTTPException(status_code=400, detail="Content cannot be empty.")
619
 
620
+ content = decode_content(body.content)
621
+
622
  try:
623
  analysis = run_pipeline(
624
+ content=content,
625
  name=body.intern_name or "Intern",
626
  role=body.intern_role or "AI Developer Intern",
627
  goal=body.intern_goal or "",