Spaces:

NeerajCodz
/

creditCardFraudDetection

Running

App Files Files Community

NeerajCodz commited on Dec 12, 2025

Commit

95422ca

1 Parent(s): 004784e

llm analyse is more descriptive

Browse files

Files changed (1) hide show

app.py +36 -39

app.py CHANGED Viewed

@@ -484,9 +484,9 @@ async def llm_analyse(payload: LLMAnalysePayload):
     """
     LLM-based analysis of transactions using Gemini.
-    Expects a list of transactions with fields: fraud_score, STATUS, cc_num, merchant, category, amt, gender, state, zip, lat, long, city_pop, job, unix_time, merch_lat, merch_long, is_fraud, age, trans_hour, trans_day, trans_month, trans_weekday, distance
-    Converts to CSV, analyzes with Gemini, returns overall fraud_score (0-1), insights, and recommendation.
     """
     if not GEMINI_API_KEY:
         raise HTTPException(
@@ -502,67 +502,64 @@ async def llm_analyse(payload: LLMAnalysePayload):
         )
     try:
-        # Convert to DataFrame and CSV string
         df = pd.DataFrame(transactions)
         csv_string = df.to_csv(index=False)
-        # Craft prompt (Cleaned up JSON instruction and content requests)
         prompt = f"""
-Analyze the following credit card transaction data (CSV format). Each row includes fraud_score (0-100 from ML model), STATUS, and other transaction details.
 CSV Data:
 {csv_string}
 Instructions:
-- Compute an overall fraud_score (0-1 scale, where 0.12 means 12% fraud probability) based on patterns in fraud_score, amounts (amt), categories (category), locations, times, and is_fraud labels.
-- Provide detailed **insights** (a brief paragraph) summarizing the overall assessment and highlighting key patterns (e.g., high average fraud_score, unusual spending).
-- Provide a detailed **recommendation** (a brief paragraph) outlining specific actions based on the risk level.
-- Output ONLY valid JSON in this exact format: {{"fraud_score": <float 0-1>, "insights": "<string insights paragraph>", "recommendation": "<string recommendation paragraph>"}}.
-- Ensure fraud_score is a float (e.g., 0.12), rounded to 2 decimals if needed.
-- **insights** and **recommendation** should be brief paragraphs (minimum 100 chars total for each) without line breaks or any formatting. Do not reveal any file structure or CSV data directly in the output strings.
-- No preamble or additional text, ONLY the JSON object.
 """
         # Generate with Gemini
         model = genai.GenerativeModel('gemini-2.5-flash-lite-preview-09-2025')
         response = model.generate_content(prompt)
-        # Parse response as JSON with markdown extraction
-        try:
-            raw_response = response.text
-            json_str = extract_json_from_markdown(raw_response)
-            analysis_json = json.loads(json_str)
-            # --- CRITICAL FIX: Update validation to check for 'insights' and 'recommendation' ---
-            if not isinstance(analysis_json.get('fraud_score'), (int, float)) or \
-               not isinstance(analysis_json.get('insights'), str) or \
-               not isinstance(analysis_json.get('recommendation'), str):
-                # Re-raise with descriptive error if keys are missing or types are wrong
-                missing_keys = [k for k in ['fraud_score', 'insights', 'recommendation'] if k not in analysis_json or not isinstance(analysis_json.get(k), (int, float, str))]
-                raise ValueError(f"Invalid JSON structure from LLM. Missing/Wrong type keys: {missing_keys}")
-            # --- END CRITICAL FIX ---
-        except json.JSONDecodeError as je:
-            raise HTTPException(
-                status_code=500,
-                detail=f"Failed to parse LLM response as JSON: {str(je)}. Raw response: {raw_response}"
-            )
-        except ValueError as ve:
-            raise HTTPException(
-                status_code=500,
-                detail=f"Invalid LLM response structure: {str(ve)}. Raw response: {raw_response}"
-            )
         return analysis_json
     except Exception as e:
         raise HTTPException(
             status_code=500,
             detail=f"LLM analysis failed: {type(e).__name__}: {str(e)}"
         )
 # For local development
 if __name__ == "__main__":
     import uvicorn

     """
     LLM-based analysis of transactions using Gemini.
+    Expects a list of transactions with fields including fraud_score, STATUS, cc_num, merchant, category, amt, gender, state, zip, lat, long, city_pop, job, unix_time, merch_lat, merch_long, is_fraud, age, trans_hour, trans_day, trans_month, trans_weekday, distance.
+    Passes fraud_score as a percentage string (e.g., '94%') for more descriptive LLM analysis.
     """
     if not GEMINI_API_KEY:
         raise HTTPException(
         )
     try:
+        # Convert to DataFrame
         df = pd.DataFrame(transactions)
+        # Convert fraud_score to percentage string for LLM
+        if 'fraud_score' in df.columns:
+            df['fraud_score'] = df['fraud_score'].apply(lambda x: f"{round(x, 2)}%")
+        # Convert DataFrame to CSV string
         csv_string = df.to_csv(index=False)
+        # Craft more descriptive prompt
         prompt = f"""
+You are a senior fraud analyst. Analyze the following credit card transaction dataset in CSV format. Each transaction includes a fraud_score (as percentage, e.g., '94%'), STATUS, transaction details, merchant, amount, location, time, and other relevant features.
 CSV Data:
 {csv_string}
 Instructions:
+1. Assess the overall risk level of the dataset based on fraud_score percentages, transaction amounts, frequency, location patterns, unusual spending behaviors, and STATUS.
+2. Provide a comprehensive **overall_fraud_score** (0-1 scale, e.g., 0.12 means 12% fraud probability) summarizing the likelihood of fraudulent activity across all transactions.
+3. Generate a detailed **insights** paragraph (150-200 words) describing patterns, clusters of high fraud risk, suspicious merchants, geographic anomalies, temporal trends, or any notable behavior.
+4. Generate a detailed **recommendation** paragraph (100-150 words) outlining specific actionable steps to mitigate fraud risk, including monitoring, alerts, or further investigation.
+5. Output ONLY valid JSON in the exact format: {{"fraud_score": <float 0-1>, "insights": "<string insights paragraph>", "recommendation": "<string recommendation paragraph>"}}. Do not include any extra text or markdown formatting.
+Focus on narrative-style, descriptive analysis and make the fraud_score percentages in the CSV the key reference points for your reasoning.
 """
         # Generate with Gemini
         model = genai.GenerativeModel('gemini-2.5-flash-lite-preview-09-2025')
         response = model.generate_content(prompt)
+        # Extract JSON from response
+        raw_response = response.text
+        json_str = extract_json_from_markdown(raw_response)
+        analysis_json = json.loads(json_str)
+        # Validate output
+        if not isinstance(analysis_json.get('fraud_score'), (int, float)) or \
+           not isinstance(analysis_json.get('insights'), str) or \
+           not isinstance(analysis_json.get('recommendation'), str):
+            missing_keys = [k for k in ['fraud_score', 'insights', 'recommendation']
+                            if k not in analysis_json or not isinstance(analysis_json.get(k), (int, float, str))]
+            raise ValueError(f"Invalid JSON structure from LLM. Missing/Wrong type keys: {missing_keys}")
         return analysis_json
+    except json.JSONDecodeError as je:
+        raise HTTPException(
+            status_code=500,
+            detail=f"Failed to parse LLM response as JSON: {str(je)}. Raw response: {raw_response}"
+        )
     except Exception as e:
         raise HTTPException(
             status_code=500,
             detail=f"LLM analysis failed: {type(e).__name__}: {str(e)}"
         )
 # For local development
 if __name__ == "__main__":
     import uvicorn