Spaces:

Ani14
/

Guvi-Hackathon-TeChAI

Sleeping

App Files Files Community

Ani14 commited on Feb 1

Commit

2496d5a

verified ·

1 Parent(s): 76bcc6f

Upload 3 files

Browse files

Files changed (3) hide show

agent.py +206 -39
app.py +96 -43
models.py +13 -4

agent.py CHANGED Viewed

@@ -1,67 +1,234 @@
 import os
-import re
 import requests
-from typing import List, Dict
 from langgraph.graph import StateGraph, END, START
 from langgraph.checkpoint.base import BaseCheckpointSaver
 from openai import OpenAI
 from models import AgentState, Message, ExtractedIntelligence
-CALLBACK_URL = "https://hackathon.guvi.in/api/updateHoneyPotFinalResult"
 HONEYPOT_API_KEY = os.environ.get("HONEYPOT_API_KEY", "sk_test_123456789")
 client = OpenAI(
     base_url="https://openrouter.ai/api/v1",
-    api_key=os.environ.get("OPENROUTER_API_KEY")
 )
 def detect_scam(state: AgentState) -> AgentState:
-    text = state["conversationHistory"][-1].text.lower()
-    keywords = ["bank", "upi", "otp", "verify", "urgent", "account"]
-    state["scamDetected"] = any(k in text for k in keywords)
-    state["agentNotes"] += "Scam detection executed. "
     return state
 def extract_intelligence(state: AgentState) -> AgentState:
-    scammer_text = " ".join(m.text for m in state["conversationHistory"] if m.sender == "scammer")
-    data = state["extractedIntelligence"].model_dump()
-    data["bankAccounts"] += re.findall(r"\b\d{8,20}\b", scammer_text)
-    data["upiIds"] += re.findall(r"\b[a-zA-Z0-9._-]+@[a-zA-Z0-9_-]+\b", scammer_text)
-    data["phishingLinks"] += re.findall(r"https?://\S+", scammer_text)
-    data["phoneNumbers"] += re.findall(r"\+?\d{10,15}", scammer_text)
-    state["extractedIntelligence"] = ExtractedIntelligence(**{k: list(set(v)) for k, v in data.items()})
-    state["agentNotes"] += "Intelligence extracted. "
     return state
 def final_callback(state: AgentState) -> AgentState:
-    if not state["callbackSent"] and state["scamDetected"]:
-        payload = {
-            "sessionId": state["sessionId"],
-            "scamDetected": state["scamDetected"],
-            "totalMessagesExchanged": state["totalMessagesExchanged"],
-            "extractedIntelligence": state["extractedIntelligence"].model_dump(),
-            "agentNotes": state["agentNotes"],
-        }
-        requests.post(
-            CALLBACK_URL,
-            json=payload,
-            headers={"x-api-key": HONEYPOT_API_KEY},
-            timeout=5
-        )
         state["callbackSent"] = True
     return state
 def create_honeypot_graph(checkpoint_saver: BaseCheckpointSaver):
-    g = StateGraph(AgentState)
-    g.add_node("detect_scam", detect_scam)
-    g.add_node("extract_intelligence", extract_intelligence)
-    g.add_node("final_callback", final_callback)
-    g.add_edge(START, "detect_scam")
-    g.add_edge("detect_scam", "extract_intelligence")
-    g.add_edge("extract_intelligence", "final_callback")
-    g.add_edge("final_callback", END)
-    return g.compile(checkpointer=checkpoint_saver)

 import os
+import json
 import requests
+import re
+from typing import List, Dict, Any, Optional
 from langgraph.graph import StateGraph, END, START
 from langgraph.checkpoint.base import BaseCheckpointSaver
+from langgraph.checkpoint.memory import MemorySaver
+from pydantic import ValidationError
 from openai import OpenAI
 from models import AgentState, Message, ExtractedIntelligence
+# --- Configuration ---
+CALLBACK_URL = "https://hackathon.guvi.in/api/updateHoneyPotFinalResult"
 HONEYPOT_API_KEY = os.environ.get("HONEYPOT_API_KEY", "sk_test_123456789")
+# OpenRouter configuration
+OPENROUTER_API_KEY = os.environ.get("OPENROUTER_API_KEY", "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")
+OPENROUTER_MODEL = os.environ.get("OPENROUTER_MODEL", "openai/gpt-oss-120b:free")
 client = OpenAI(
     base_url="https://openrouter.ai/api/v1",
+    api_key=OPENROUTER_API_KEY,
 )
+def call_openrouter(messages: List[Dict[str, str]], max_tokens: int = 512) -> str:
+    """Call the OpenRouter API to generate a text response."""
+    if not OPENROUTER_API_KEY:
+        raise ValueError("OPENROUTER_API_KEY is not set.")
+    try:
+        response = client.chat.completions.create(
+            model=OPENROUTER_MODEL,
+            messages=messages,
+            max_tokens=max_tokens,
+            extra_body={"reasoning": {"enabled": True}}
+        )
+        return response.choices[0].message.content
+    except Exception as e:
+        print(f"OpenRouter API error: {e}")
+        raise
+# --- LangGraph Nodes (Functions) ---
 def detect_scam(state: AgentState) -> AgentState:
+    """Node 1: Detects scam intent from the latest message."""
+    latest_message = state["conversationHistory"][-1]
+    text = latest_message.text
+    is_scam = False
+    reason = "No scam indicators found"
+    prompt = (
+        "You are a scam detection assistant. Given the following message, determine if it indicates "
+        "a scam. Respond with 'true|<reason>' if the message is a scam or 'false|<reason>' if not. "
+        f"Message: {text}"
+    )
+    try:
+        response = call_openrouter([{"role": "user", "content": prompt}], max_tokens=100)
+        first_line = response.strip().split('\n')[0]
+        parts = first_line.split('|', 1)
+        if parts:
+            flag = parts[0].strip().lower()
+            is_scam = flag in {"true", "yes"}
+            if len(parts) > 1:
+                reason = parts[1].strip()
+            else:
+                reason = "OpenRouter classification did not provide a reason"
+    except Exception as e:
+        print(f"OpenRouter classification error: {e}. Falling back to heuristic.")
+        lower_text = text.lower()
+        scam_keywords = ["bank", "account", "blocked", "verify", "otp", "password", "upi", "urgent", "link", "update"]
+        for kw in scam_keywords:
+            if kw in lower_text:
+                is_scam = True
+                reason = f"Keyword '{kw}' found in message (fallback)"
+                break
+    state["scamDetected"] = is_scam
+    if "agentNotes" not in state:
+        state["agentNotes"] = ""
+    state["agentNotes"] += f"Initial Detection: {reason}. "
+    return state
+def agent_persona_response(state: AgentState) -> AgentState:
+    """Node 2: Generates a human‑like response to engage the scammer."""
+    if not state["scamDetected"]:
+        state["agent_response_text"] = "Thank you for reaching out. Have a nice day!"
+        state["should_continue_engagement"] = False
+        return state
+    latest_text = state["conversationHistory"][-1].text
+    response_text: str = ""
+    prompt = (
+        "You are acting as a genuine user in a conversation with a potential scammer. "
+        "Here is the scammer's latest message:\n"
+        f"{latest_text}\n\n"
+        "Respond in a friendly, inquisitive tone that does not reveal suspicion, but encourages the other "
+        "person to provide more details (such as why they need your bank details, UPI ID or any links they "
+        "shared). Keep your response under 50 words."
+    )
+    try:
+        response_text = call_openrouter([{"role": "user", "content": prompt}], max_tokens=150)
+        response_text = response_text.strip().split('\n')[0]
+    except Exception as e:
+        print(f"OpenRouter persona generation error: {e}. Falling back to heuristic.")
+        response_text = "I'm not sure I understand. Could you please explain further?"
+    agent_message = Message(
+        sender="user",
+        text=response_text,
+        timestamp=state["conversationHistory"][-1].timestamp
+    )
+    state["conversationHistory"].append(agent_message)
+    state["agent_response_text"] = response_text
+    state["totalMessagesExchanged"] += 1
+    state["should_continue_engagement"] = True
     return state
 def extract_intelligence(state: AgentState) -> AgentState:
+    """Node 3: Extracts structured intelligence from the conversation."""
+    scammer_text = " ".join([m.text for m in state["conversationHistory"] if m.sender == "scammer"])
+    bank_accounts = re.findall(r"\b\d{8,20}\b", scammer_text)
+    upilds = re.findall(r"\b[a-zA-Z0-9\.\-_]+@[a-zA-Z0-9\-_]+\b", scammer_text)
+    phishing_links = re.findall(r"https?://[^\s]+", scammer_text)
+    phone_numbers = re.findall(r"\+?\d{10,15}", scammer_text)
+    scam_keywords_list = ["bank", "account", "blocked", "verify", "otp", "password", "upi", "urgent", "link", "update"]
+    found_keywords = [kw for kw in scam_keywords_list if kw.lower() in scammer_text.lower()]
+    current_intel = state.get("extractedIntelligence", ExtractedIntelligence())
+    current_data = current_intel.model_dump()
+    new_data = {
+        "bankAccounts": bank_accounts,
+        "upilds": upilds,
+        "phishingLinks": phishing_links,
+        "phoneNumbers": phone_numbers,
+        "suspiciousKeywords": found_keywords,
+    }
+    # Merge and deduplicate
+    for key in current_data:
+        combined = current_data.get(key, []) + new_data.get(key, [])
+        current_data[key] = list(set(combined))
+    state["extractedIntelligence"] = ExtractedIntelligence(**current_data)
+    if any(new_data.values()):
+        if "agentNotes" not in state:
+            state["agentNotes"] = ""
+        state["agentNotes"] += "Intelligence updated. "
+    return state
+def decide_engagement_end(state: AgentState) -> AgentState:
+    """Node 4: Decides whether to continue or end the conversation."""
+    intelligence: ExtractedIntelligence = state.get("extractedIntelligence", ExtractedIntelligence())
+    continue_engagement = True
+    # End if we have some actionable intelligence
+    if intelligence.bankAccounts or intelligence.upilds or intelligence.phishingLinks or intelligence.phoneNumbers:
+        continue_engagement = False
+    # Or if message count is high
+    if state.get("totalMessagesExchanged", 0) >= 10:
+        continue_engagement = False
+    state["should_continue_engagement"] = continue_engagement
     return state
 def final_callback(state: AgentState) -> AgentState:
+    """Node 5: Sends the mandatory final result callback."""
+    if not state["scamDetected"] or state.get("callbackSent", False):
+        return state
+    intelligence = state.get("extractedIntelligence", ExtractedIntelligence())
+    payload = {
+        "sessionId": state.get("sessionId"),
+        "scamDetected": state.get("scamDetected", False),
+        "totalMessagesExchanged": state.get("totalMessagesExchanged", 0),
+        "extractedIntelligence": intelligence.model_dump(),
+        "agentNotes": state.get("agentNotes", "")
+    }
+    headers = {
+        "Content-Type": "application/json",
+        "x-api-key": HONEYPOT_API_KEY
+    }
+    try:
+        response = requests.post(CALLBACK_URL, json=payload, headers=headers, timeout=10)
+        response.raise_for_status()
         state["callbackSent"] = True
+        if "agentNotes" not in state:
+            state["agentNotes"] = ""
+        state["agentNotes"] += "Final callback sent successfully. "
+    except Exception as e:
+        print(f"Final callback failed: {e}")
+        if "agentNotes" not in state:
+            state["agentNotes"] = ""
+        state["agentNotes"] += f"Final callback failed: {e}. "
     return state
 def create_honeypot_graph(checkpoint_saver: BaseCheckpointSaver):
+    workflow = StateGraph(AgentState)
+    workflow.add_node("detect_scam", detect_scam)
+    workflow.add_node("extract_intelligence", extract_intelligence)
+    workflow.add_node("agent_persona_response", agent_persona_response)
+    workflow.add_node("decide_engagement_end", decide_engagement_end)
+    workflow.add_node("final_callback", final_callback)
+    workflow.add_edge(START, "detect_scam")
+    def after_detection(state: AgentState) -> str:
+        return "extract_intelligence" if state["scamDetected"] else END
+    workflow.add_conditional_edges("detect_scam", after_detection)
+    workflow.add_edge("extract_intelligence", "agent_persona_response")
+    workflow.add_edge("agent_persona_response", "decide_engagement_end")
+    def after_decision(state: AgentState) -> str:
+        if state["should_continue_engagement"]:
+            return END
+        if not state.get("callbackSent", False):
+            return "final_callback"
+        return END
+    workflow.add_conditional_edges("decide_engagement_end", after_decision)
+    workflow.add_edge("final_callback", END)
+    return workflow.compile(checkpointer=checkpoint_saver)

app.py CHANGED Viewed

@@ -1,63 +1,116 @@
 import os
 import time
-from typing import Optional, Dict, Any
-from fastapi import FastAPI, HTTPException, Depends, status
 from fastapi.security import APIKeyHeader
 from langgraph.checkpoint.memory import MemorySaver
-from models import HoneypotRequest, HoneypotResponse, ExtractedIntelligence, AgentState
-from agent import create_honeypot_graph
 API_KEY_NAME = "x-api-key"
 API_KEY = os.environ.get("HONEYPOT_API_KEY", "sk_test_123456789")
 api_key_header = APIKeyHeader(name=API_KEY_NAME, auto_error=False)
-app = FastAPI(title="Agentic Honeypot API")
-checkpointer = MemorySaver()
 honeypot_app = create_honeypot_graph(checkpointer)
-async def get_api_key(api_key: str = Depends(api_key_header)):
-    if api_key != API_KEY:
-        raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="Invalid API Key")
-    return api_key
 @app.post("/api/honeypot-detection", response_model=HoneypotResponse)
 async def honeypot_detection(
-    request_data: Optional[HoneypotRequest] = None,
     api_key: str = Depends(get_api_key)
 ) -> Dict[str, Any]:
-    if request_data is None:
-        raise HTTPException(status_code=400, detail="Invalid or empty request body")
-    state = AgentState(
-        sessionId=request_data.sessionId,
-        conversationHistory=request_data.conversationHistory + [request_data.message],
-        scamDetected=False,
-        extractedIntelligence=ExtractedIntelligence(),
-        agentNotes="",
-        totalMessagesExchanged=len(request_data.conversationHistory) + 1,
-        should_continue_engagement=False,
-        agent_response_text="",
-        callbackSent=False
-    )
-    start = time.time()
-    final_state = honeypot_app.invoke(state)
-    duration = int(time.time() - start)
-    return {
-        "status": "success",
-        "scamDetected": final_state["scamDetected"],
-        "engagementMetrics": {
-            "engagementDurationSeconds": duration,
-            "totalMessagesExchanged": final_state["totalMessagesExchanged"]
-        },
-        "extractedIntelligence": final_state["extractedIntelligence"],
-        "agentNotes": final_state["agentNotes"]
-    }
 @app.get("/")
-def root():
-    return {"message": "Agentic Honeypot API running"}

 import os
 import time
+from fastapi import FastAPI, Request, HTTPException, Depends, status
 from fastapi.security import APIKeyHeader
+from typing import Dict, Any
+from datetime import datetime
+# LangGraph and Model Imports
 from langgraph.checkpoint.memory import MemorySaver
+from langgraph.checkpoint.base import BaseCheckpointSaver
+from agent import create_honeypot_graph, final_callback
+from models import (
+    HoneypotRequest, HoneypotResponse,
+    AgentState, ExtractedIntelligence, Message, EngagementMetrics
+)
+# --- Configuration ---
 API_KEY_NAME = "x-api-key"
+# Default key for testing, should be set via environment variable in production
 API_KEY = os.environ.get("HONEYPOT_API_KEY", "sk_test_123456789")
 api_key_header = APIKeyHeader(name=API_KEY_NAME, auto_error=False)
+# --- Initialization ---
+app = FastAPI(
+    title="Agentic Honey-Pot API",
+    description="REST API for Scam Detection and Intelligence Extraction (Problem Statement 2).",
+    version="1.0.0"
+)
+# Initialize LangGraph Checkpointer
+checkpointer: BaseCheckpointSaver = MemorySaver()
 honeypot_app = create_honeypot_graph(checkpointer)
+# --- Dependency for API Key Validation ---
+async def get_api_key(api_key_header: str = Depends(api_key_header)):
+    if api_key_header is None or api_key_header != API_KEY:
+        raise HTTPException(
+            status_code=status.HTTP_401_UNAUTHORIZED,
+            detail="Invalid API Key or missing 'x-api-key' header.",
+        )
+    return api_key_header
+# --- API Endpoints ---
 @app.post("/api/honeypot-detection", response_model=HoneypotResponse)
 async def honeypot_detection(
+    request_data: HoneypotRequest,
     api_key: str = Depends(get_api_key)
 ) -> Dict[str, Any]:
+    """
+    Accepts an incoming message event, runs the LangGraph agent, and returns the response.
+    Strictly follows Problem Statement 2 schema.
+    """
+    session_id = request_data.sessionId
+    config = {"configurable": {"thread_id": session_id}}
+    checkpoint = honeypot_app.get_state(config)
+    start_time = time.time()
+    if checkpoint and checkpoint.values:
+        current_state_dict = checkpoint.values
+        # Ensure all required fields exist
+        current_state_dict.setdefault("callbackSent", False)
+        current_state_dict.setdefault("agentNotes", "")
+        current_state_dict.setdefault("extractedIntelligence", ExtractedIntelligence())
+        current_state_dict.setdefault("conversationHistory", [])
+        current_state_dict.setdefault("totalMessagesExchanged", 0)
+        current_state_dict.setdefault("sessionId", session_id)
+        current_state = AgentState(**current_state_dict)
+        current_state["conversationHistory"].append(request_data.message)
+        current_state["totalMessagesExchanged"] += 1
+        input_state = current_state
+    else:
+        # New session
+        initial_history = request_data.conversationHistory + [request_data.message]
+        input_state = AgentState(
+            sessionId=session_id,
+            conversationHistory=initial_history,
+            scamDetected=False,
+            extractedIntelligence=ExtractedIntelligence(),
+            agentNotes="New session started. ",
+            totalMessagesExchanged=len(initial_history),
+            should_continue_engagement=False,
+            agent_response_text="",
+            callbackSent=False
+        )
+    try:
+        final_state_dict = honeypot_app.invoke(input_state, config=config)
+        final_state = AgentState(**final_state_dict)
+        engagement_duration = int(time.time() - start_time)
+        # Prepare response strictly matching PDF page 10/11
+        return {
+            "status": "success",
+            "scamDetected": final_state["scamDetected"],
+            "engagementMetrics": {
+                "engagementDurationSeconds": engagement_duration,
+                "totalMessagesExchanged": final_state["totalMessagesExchanged"]
+            },
+            "extractedIntelligence": final_state["extractedIntelligence"].model_dump(),
+            "agentNotes": final_state["agentNotes"]
+        }
+    except Exception as e:
+        print(f"Error in Honey-Pot: {e}")
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail=f"Internal server error: {str(e)}",
+        )
 @app.get("/")
+async def root():
+    return {"message": "Agentic Honey-Pot API is running. Use /api/honeypot-detection."}

models.py CHANGED Viewed

@@ -2,43 +2,52 @@ from typing import TypedDict, List, Optional, Annotated
 from operator import add
 from pydantic import BaseModel, Field
-# --- 1. API Input/Output Models (Problem Statement 2) ---
 class Message(BaseModel):
     sender: str = Field(..., description="scammer or user")
     text: str = Field(..., description="Message content")
     timestamp: str = Field(..., description="ISO-8601 format")
 class Metadata(BaseModel):
     channel: Optional[str] = Field(None, description="SMS / WhatsApp / Email / Chat")
     language: Optional[str] = Field(None, description="Language used")
     locale: Optional[str] = Field(None, description="Country or region")
 class HoneypotRequest(BaseModel):
     sessionId: str = Field(..., description="Unique session ID")
     message: Message = Field(..., description="The latest incoming message")
-    conversationHistory: List[Message] = Field(default_factory=list)
     metadata: Optional[Metadata] = None
 class ExtractedIntelligence(BaseModel):
     bankAccounts: List[str] = Field(default_factory=list)
-    upiIds: List[str] = Field(default_factory=list)
     phishingLinks: List[str] = Field(default_factory=list)
     phoneNumbers: List[str] = Field(default_factory=list)
     suspiciousKeywords: List[str] = Field(default_factory=list)
 class EngagementMetrics(BaseModel):
     engagementDurationSeconds: int
     totalMessagesExchanged: int
 class HoneypotResponse(BaseModel):
-    status: str
     scamDetected: bool
     engagementMetrics: EngagementMetrics
     extractedIntelligence: ExtractedIntelligence
     agentNotes: str
 class AgentState(TypedDict):
     sessionId: str
     conversationHistory: Annotated[List[Message], add]
     scamDetected: bool

 from operator import add
 from pydantic import BaseModel, Field
+# --- 1. API Input/Output Models (Strictly Problem Statement 2) ---
 class Message(BaseModel):
+    """Represents a single message in the conversation."""
     sender: str = Field(..., description="scammer or user")
     text: str = Field(..., description="Message content")
     timestamp: str = Field(..., description="ISO-8601 format")
 class Metadata(BaseModel):
+    """Optional metadata about the conversation channel."""
     channel: Optional[str] = Field(None, description="SMS / WhatsApp / Email / Chat")
     language: Optional[str] = Field(None, description="Language used")
     locale: Optional[str] = Field(None, description="Country or region")
 class HoneypotRequest(BaseModel):
+    """The incoming request body for the honeypot API."""
     sessionId: str = Field(..., description="Unique session ID")
     message: Message = Field(..., description="The latest incoming message")
+    conversationHistory: List[Message] = Field(default_factory=list, description="All previous messages")
     metadata: Optional[Metadata] = None
 class ExtractedIntelligence(BaseModel):
+    """Structured data to be extracted from the conversation."""
     bankAccounts: List[str] = Field(default_factory=list)
+    upilds: List[str] = Field(default_factory=list)
     phishingLinks: List[str] = Field(default_factory=list)
     phoneNumbers: List[str] = Field(default_factory=list)
     suspiciousKeywords: List[str] = Field(default_factory=list)
 class EngagementMetrics(BaseModel):
+    """Metrics for the engagement."""
     engagementDurationSeconds: int
     totalMessagesExchanged: int
 class HoneypotResponse(BaseModel):
+    """The outgoing response body from the honeypot API."""
+    status: str = Field(..., description="success")
     scamDetected: bool
     engagementMetrics: EngagementMetrics
     extractedIntelligence: ExtractedIntelligence
     agentNotes: str
+# --- 2. LangGraph State Model ---
 class AgentState(TypedDict):
+    """The state object for the LangGraph state machine."""
     sessionId: str
     conversationHistory: Annotated[List[Message], add]
     scamDetected: bool