NeerajCodz commited on
Commit
95422ca
·
1 Parent(s): 004784e

llm analyse is more descriptive

Browse files
Files changed (1) hide show
  1. app.py +36 -39
app.py CHANGED
@@ -484,9 +484,9 @@ async def llm_analyse(payload: LLMAnalysePayload):
484
  """
485
  LLM-based analysis of transactions using Gemini.
486
 
487
- Expects a list of transactions with fields: fraud_score, STATUS, cc_num, merchant, category, amt, gender, state, zip, lat, long, city_pop, job, unix_time, merch_lat, merch_long, is_fraud, age, trans_hour, trans_day, trans_month, trans_weekday, distance
488
 
489
- Converts to CSV, analyzes with Gemini, returns overall fraud_score (0-1), insights, and recommendation.
490
  """
491
  if not GEMINI_API_KEY:
492
  raise HTTPException(
@@ -502,67 +502,64 @@ async def llm_analyse(payload: LLMAnalysePayload):
502
  )
503
 
504
  try:
505
- # Convert to DataFrame and CSV string
506
  df = pd.DataFrame(transactions)
 
 
 
 
 
 
507
  csv_string = df.to_csv(index=False)
508
 
509
- # Craft prompt (Cleaned up JSON instruction and content requests)
510
  prompt = f"""
511
- Analyze the following credit card transaction data (CSV format). Each row includes fraud_score (0-100 from ML model), STATUS, and other transaction details.
512
 
513
  CSV Data:
514
  {csv_string}
515
 
516
  Instructions:
517
- - Compute an overall fraud_score (0-1 scale, where 0.12 means 12% fraud probability) based on patterns in fraud_score, amounts (amt), categories (category), locations, times, and is_fraud labels.
518
- - Provide detailed **insights** (a brief paragraph) summarizing the overall assessment and highlighting key patterns (e.g., high average fraud_score, unusual spending).
519
- - Provide a detailed **recommendation** (a brief paragraph) outlining specific actions based on the risk level.
520
- - Output ONLY valid JSON in this exact format: {{"fraud_score": <float 0-1>, "insights": "<string insights paragraph>", "recommendation": "<string recommendation paragraph>"}}.
521
- - Ensure fraud_score is a float (e.g., 0.12), rounded to 2 decimals if needed.
522
- - **insights** and **recommendation** should be brief paragraphs (minimum 100 chars total for each) without line breaks or any formatting. Do not reveal any file structure or CSV data directly in the output strings.
523
- - No preamble or additional text, ONLY the JSON object.
524
  """
525
 
526
  # Generate with Gemini
527
  model = genai.GenerativeModel('gemini-2.5-flash-lite-preview-09-2025')
528
  response = model.generate_content(prompt)
529
 
530
- # Parse response as JSON with markdown extraction
531
- try:
532
- raw_response = response.text
533
- json_str = extract_json_from_markdown(raw_response)
534
- analysis_json = json.loads(json_str)
535
-
536
- # --- CRITICAL FIX: Update validation to check for 'insights' and 'recommendation' ---
537
- if not isinstance(analysis_json.get('fraud_score'), (int, float)) or \
538
- not isinstance(analysis_json.get('insights'), str) or \
539
- not isinstance(analysis_json.get('recommendation'), str):
540
-
541
- # Re-raise with descriptive error if keys are missing or types are wrong
542
- missing_keys = [k for k in ['fraud_score', 'insights', 'recommendation'] if k not in analysis_json or not isinstance(analysis_json.get(k), (int, float, str))]
543
-
544
- raise ValueError(f"Invalid JSON structure from LLM. Missing/Wrong type keys: {missing_keys}")
545
- # --- END CRITICAL FIX ---
546
-
547
- except json.JSONDecodeError as je:
548
- raise HTTPException(
549
- status_code=500,
550
- detail=f"Failed to parse LLM response as JSON: {str(je)}. Raw response: {raw_response}"
551
- )
552
- except ValueError as ve:
553
- raise HTTPException(
554
- status_code=500,
555
- detail=f"Invalid LLM response structure: {str(ve)}. Raw response: {raw_response}"
556
- )
557
 
558
  return analysis_json
559
 
 
 
 
 
 
560
  except Exception as e:
561
  raise HTTPException(
562
  status_code=500,
563
  detail=f"LLM analysis failed: {type(e).__name__}: {str(e)}"
564
  )
565
 
 
566
  # For local development
567
  if __name__ == "__main__":
568
  import uvicorn
 
484
  """
485
  LLM-based analysis of transactions using Gemini.
486
 
487
+ Expects a list of transactions with fields including fraud_score, STATUS, cc_num, merchant, category, amt, gender, state, zip, lat, long, city_pop, job, unix_time, merch_lat, merch_long, is_fraud, age, trans_hour, trans_day, trans_month, trans_weekday, distance.
488
 
489
+ Passes fraud_score as a percentage string (e.g., '94%') for more descriptive LLM analysis.
490
  """
491
  if not GEMINI_API_KEY:
492
  raise HTTPException(
 
502
  )
503
 
504
  try:
505
+ # Convert to DataFrame
506
  df = pd.DataFrame(transactions)
507
+
508
+ # Convert fraud_score to percentage string for LLM
509
+ if 'fraud_score' in df.columns:
510
+ df['fraud_score'] = df['fraud_score'].apply(lambda x: f"{round(x, 2)}%")
511
+
512
+ # Convert DataFrame to CSV string
513
  csv_string = df.to_csv(index=False)
514
 
515
+ # Craft more descriptive prompt
516
  prompt = f"""
517
+ You are a senior fraud analyst. Analyze the following credit card transaction dataset in CSV format. Each transaction includes a fraud_score (as percentage, e.g., '94%'), STATUS, transaction details, merchant, amount, location, time, and other relevant features.
518
 
519
  CSV Data:
520
  {csv_string}
521
 
522
  Instructions:
523
+ 1. Assess the overall risk level of the dataset based on fraud_score percentages, transaction amounts, frequency, location patterns, unusual spending behaviors, and STATUS.
524
+ 2. Provide a comprehensive **overall_fraud_score** (0-1 scale, e.g., 0.12 means 12% fraud probability) summarizing the likelihood of fraudulent activity across all transactions.
525
+ 3. Generate a detailed **insights** paragraph (150-200 words) describing patterns, clusters of high fraud risk, suspicious merchants, geographic anomalies, temporal trends, or any notable behavior.
526
+ 4. Generate a detailed **recommendation** paragraph (100-150 words) outlining specific actionable steps to mitigate fraud risk, including monitoring, alerts, or further investigation.
527
+ 5. Output ONLY valid JSON in the exact format: {{"fraud_score": <float 0-1>, "insights": "<string insights paragraph>", "recommendation": "<string recommendation paragraph>"}}. Do not include any extra text or markdown formatting.
528
+
529
+ Focus on narrative-style, descriptive analysis and make the fraud_score percentages in the CSV the key reference points for your reasoning.
530
  """
531
 
532
  # Generate with Gemini
533
  model = genai.GenerativeModel('gemini-2.5-flash-lite-preview-09-2025')
534
  response = model.generate_content(prompt)
535
 
536
+ # Extract JSON from response
537
+ raw_response = response.text
538
+ json_str = extract_json_from_markdown(raw_response)
539
+ analysis_json = json.loads(json_str)
540
+
541
+ # Validate output
542
+ if not isinstance(analysis_json.get('fraud_score'), (int, float)) or \
543
+ not isinstance(analysis_json.get('insights'), str) or \
544
+ not isinstance(analysis_json.get('recommendation'), str):
545
+ missing_keys = [k for k in ['fraud_score', 'insights', 'recommendation']
546
+ if k not in analysis_json or not isinstance(analysis_json.get(k), (int, float, str))]
547
+ raise ValueError(f"Invalid JSON structure from LLM. Missing/Wrong type keys: {missing_keys}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
548
 
549
  return analysis_json
550
 
551
+ except json.JSONDecodeError as je:
552
+ raise HTTPException(
553
+ status_code=500,
554
+ detail=f"Failed to parse LLM response as JSON: {str(je)}. Raw response: {raw_response}"
555
+ )
556
  except Exception as e:
557
  raise HTTPException(
558
  status_code=500,
559
  detail=f"LLM analysis failed: {type(e).__name__}: {str(e)}"
560
  )
561
 
562
+
563
  # For local development
564
  if __name__ == "__main__":
565
  import uvicorn