Commit
·
95422ca
1
Parent(s):
004784e
llm analyse is more descriptive
Browse files
app.py
CHANGED
|
@@ -484,9 +484,9 @@ async def llm_analyse(payload: LLMAnalysePayload):
|
|
| 484 |
"""
|
| 485 |
LLM-based analysis of transactions using Gemini.
|
| 486 |
|
| 487 |
-
Expects a list of transactions with fields
|
| 488 |
|
| 489 |
-
|
| 490 |
"""
|
| 491 |
if not GEMINI_API_KEY:
|
| 492 |
raise HTTPException(
|
|
@@ -502,67 +502,64 @@ async def llm_analyse(payload: LLMAnalysePayload):
|
|
| 502 |
)
|
| 503 |
|
| 504 |
try:
|
| 505 |
-
# Convert to DataFrame
|
| 506 |
df = pd.DataFrame(transactions)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 507 |
csv_string = df.to_csv(index=False)
|
| 508 |
|
| 509 |
-
# Craft
|
| 510 |
prompt = f"""
|
| 511 |
-
Analyze the following credit card transaction
|
| 512 |
|
| 513 |
CSV Data:
|
| 514 |
{csv_string}
|
| 515 |
|
| 516 |
Instructions:
|
| 517 |
-
|
| 518 |
-
|
| 519 |
-
|
| 520 |
-
|
| 521 |
-
|
| 522 |
-
|
| 523 |
-
-
|
| 524 |
"""
|
| 525 |
|
| 526 |
# Generate with Gemini
|
| 527 |
model = genai.GenerativeModel('gemini-2.5-flash-lite-preview-09-2025')
|
| 528 |
response = model.generate_content(prompt)
|
| 529 |
|
| 530 |
-
#
|
| 531 |
-
|
| 532 |
-
|
| 533 |
-
|
| 534 |
-
|
| 535 |
-
|
| 536 |
-
|
| 537 |
-
|
| 538 |
-
|
| 539 |
-
|
| 540 |
-
|
| 541 |
-
|
| 542 |
-
missing_keys = [k for k in ['fraud_score', 'insights', 'recommendation'] if k not in analysis_json or not isinstance(analysis_json.get(k), (int, float, str))]
|
| 543 |
-
|
| 544 |
-
raise ValueError(f"Invalid JSON structure from LLM. Missing/Wrong type keys: {missing_keys}")
|
| 545 |
-
# --- END CRITICAL FIX ---
|
| 546 |
-
|
| 547 |
-
except json.JSONDecodeError as je:
|
| 548 |
-
raise HTTPException(
|
| 549 |
-
status_code=500,
|
| 550 |
-
detail=f"Failed to parse LLM response as JSON: {str(je)}. Raw response: {raw_response}"
|
| 551 |
-
)
|
| 552 |
-
except ValueError as ve:
|
| 553 |
-
raise HTTPException(
|
| 554 |
-
status_code=500,
|
| 555 |
-
detail=f"Invalid LLM response structure: {str(ve)}. Raw response: {raw_response}"
|
| 556 |
-
)
|
| 557 |
|
| 558 |
return analysis_json
|
| 559 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 560 |
except Exception as e:
|
| 561 |
raise HTTPException(
|
| 562 |
status_code=500,
|
| 563 |
detail=f"LLM analysis failed: {type(e).__name__}: {str(e)}"
|
| 564 |
)
|
| 565 |
|
|
|
|
| 566 |
# For local development
|
| 567 |
if __name__ == "__main__":
|
| 568 |
import uvicorn
|
|
|
|
| 484 |
"""
|
| 485 |
LLM-based analysis of transactions using Gemini.
|
| 486 |
|
| 487 |
+
Expects a list of transactions with fields including fraud_score, STATUS, cc_num, merchant, category, amt, gender, state, zip, lat, long, city_pop, job, unix_time, merch_lat, merch_long, is_fraud, age, trans_hour, trans_day, trans_month, trans_weekday, distance.
|
| 488 |
|
| 489 |
+
Passes fraud_score as a percentage string (e.g., '94%') for more descriptive LLM analysis.
|
| 490 |
"""
|
| 491 |
if not GEMINI_API_KEY:
|
| 492 |
raise HTTPException(
|
|
|
|
| 502 |
)
|
| 503 |
|
| 504 |
try:
|
| 505 |
+
# Convert to DataFrame
|
| 506 |
df = pd.DataFrame(transactions)
|
| 507 |
+
|
| 508 |
+
# Convert fraud_score to percentage string for LLM
|
| 509 |
+
if 'fraud_score' in df.columns:
|
| 510 |
+
df['fraud_score'] = df['fraud_score'].apply(lambda x: f"{round(x, 2)}%")
|
| 511 |
+
|
| 512 |
+
# Convert DataFrame to CSV string
|
| 513 |
csv_string = df.to_csv(index=False)
|
| 514 |
|
| 515 |
+
# Craft more descriptive prompt
|
| 516 |
prompt = f"""
|
| 517 |
+
You are a senior fraud analyst. Analyze the following credit card transaction dataset in CSV format. Each transaction includes a fraud_score (as percentage, e.g., '94%'), STATUS, transaction details, merchant, amount, location, time, and other relevant features.
|
| 518 |
|
| 519 |
CSV Data:
|
| 520 |
{csv_string}
|
| 521 |
|
| 522 |
Instructions:
|
| 523 |
+
1. Assess the overall risk level of the dataset based on fraud_score percentages, transaction amounts, frequency, location patterns, unusual spending behaviors, and STATUS.
|
| 524 |
+
2. Provide a comprehensive **overall_fraud_score** (0-1 scale, e.g., 0.12 means 12% fraud probability) summarizing the likelihood of fraudulent activity across all transactions.
|
| 525 |
+
3. Generate a detailed **insights** paragraph (150-200 words) describing patterns, clusters of high fraud risk, suspicious merchants, geographic anomalies, temporal trends, or any notable behavior.
|
| 526 |
+
4. Generate a detailed **recommendation** paragraph (100-150 words) outlining specific actionable steps to mitigate fraud risk, including monitoring, alerts, or further investigation.
|
| 527 |
+
5. Output ONLY valid JSON in the exact format: {{"fraud_score": <float 0-1>, "insights": "<string insights paragraph>", "recommendation": "<string recommendation paragraph>"}}. Do not include any extra text or markdown formatting.
|
| 528 |
+
|
| 529 |
+
Focus on narrative-style, descriptive analysis and make the fraud_score percentages in the CSV the key reference points for your reasoning.
|
| 530 |
"""
|
| 531 |
|
| 532 |
# Generate with Gemini
|
| 533 |
model = genai.GenerativeModel('gemini-2.5-flash-lite-preview-09-2025')
|
| 534 |
response = model.generate_content(prompt)
|
| 535 |
|
| 536 |
+
# Extract JSON from response
|
| 537 |
+
raw_response = response.text
|
| 538 |
+
json_str = extract_json_from_markdown(raw_response)
|
| 539 |
+
analysis_json = json.loads(json_str)
|
| 540 |
+
|
| 541 |
+
# Validate output
|
| 542 |
+
if not isinstance(analysis_json.get('fraud_score'), (int, float)) or \
|
| 543 |
+
not isinstance(analysis_json.get('insights'), str) or \
|
| 544 |
+
not isinstance(analysis_json.get('recommendation'), str):
|
| 545 |
+
missing_keys = [k for k in ['fraud_score', 'insights', 'recommendation']
|
| 546 |
+
if k not in analysis_json or not isinstance(analysis_json.get(k), (int, float, str))]
|
| 547 |
+
raise ValueError(f"Invalid JSON structure from LLM. Missing/Wrong type keys: {missing_keys}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 548 |
|
| 549 |
return analysis_json
|
| 550 |
|
| 551 |
+
except json.JSONDecodeError as je:
|
| 552 |
+
raise HTTPException(
|
| 553 |
+
status_code=500,
|
| 554 |
+
detail=f"Failed to parse LLM response as JSON: {str(je)}. Raw response: {raw_response}"
|
| 555 |
+
)
|
| 556 |
except Exception as e:
|
| 557 |
raise HTTPException(
|
| 558 |
status_code=500,
|
| 559 |
detail=f"LLM analysis failed: {type(e).__name__}: {str(e)}"
|
| 560 |
)
|
| 561 |
|
| 562 |
+
|
| 563 |
# For local development
|
| 564 |
if __name__ == "__main__":
|
| 565 |
import uvicorn
|