Spaces:

Rayugacodes
/

Breach-OS

Sleeping

App Files Files Community

Naman Gupta commited on Apr 3

Commit

8d70360

unverified ·

2 Parent(s): 6b72bd2 3fc0eba

Merge pull request #7 from subhdotsol/fix

Browse files

Files changed (5) hide show

.env.example +0 -36
.gitignore +1 -0
inference.py +40 -11
llm/pipeline.py +4 -4
tests/test_llm.py +2 -1

.env.example DELETED Viewed

@@ -1,36 +0,0 @@
-# Copy this file to .env and fill in your values.
-# Never commit .env to git — it's already in .gitignore.
-# ------------------------------------------------------------------
-# Groq (required — Person 3's LLM pipeline uses this)
-# Get your key at: https://console.groq.com → API Keys
-# ------------------------------------------------------------------
-GROQ_API_KEY=gsk_your_key_here
-# Which Groq model to use.
-# Fast + free options: llama-3.1-8b-instant, mixtral-8x7b-32768
-# Smarter but slower: llama-3.3-70b-versatile
-MODEL_NAME=llama-3.1-8b-instant
-# ------------------------------------------------------------------
-# Server settings
-# ------------------------------------------------------------------
-# Maximum number of attack turns per episode
-MAX_TURNS=10
-# Set to true to enable FastAPI debug mode and verbose logging
-DEBUG=false
-# How long to wait for a single Groq API call (seconds)
-LLM_TIMEOUT=30
-# How many times to retry a failed Groq call before giving up
-LLM_MAX_RETRIES=3
-# ------------------------------------------------------------------
-# HuggingFace (only needed if deploying to HF Spaces)
-# The inference.py attacker script uses this to call the HF API
-# ------------------------------------------------------------------
-HF_TOKEN=hf_your_token_here
-API_BASE_URL=https://api-inference.huggingface.co/models

.gitignore CHANGED Viewed

@@ -2,6 +2,7 @@ __pycache__/
 *.py[cod]
 *$py.class
 .venv/
 .env
 .pytest_cache/
 *.swp

 *.py[cod]
 *$py.class
 .venv/
+venv
 .env
 .pytest_cache/
 *.swp

inference.py CHANGED Viewed

@@ -2,18 +2,23 @@ import os
 import asyncio
 import logging
 from openai import OpenAI
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
-API_BASE_URL = os.environ.get("API_BASE_URL", "https://rayugacodes-breach-os.hf.space")
-MODEL_NAME   = os.environ.get("MODEL_NAME",   "mistralai/Mistral-7B-Instruct-v0.3")
-HF_TOKEN     = os.environ.get("HF_TOKEN",     "")
 client = OpenAI(
-    base_url = f"https://api-inference.huggingface.co/models/{MODEL_NAME}/v1",
-    api_key  = HF_TOKEN,
 )
 def generate_attack(defender_response: str, turn: int, previous_success: float) -> dict:
     strategies = [
@@ -42,7 +47,14 @@ def generate_attack(defender_response: str, turn: int, previous_success: float)
 async def run_episode(task: str = "easy") -> dict:
     import httpx
-    async with httpx.AsyncClient(base_url=API_BASE_URL, timeout=60.0) as http:
         resp = await http.post("/reset")
         reset_data = resp.json()
         defender_resp = reset_data["observation"]["defender_response"]
@@ -52,27 +64,44 @@ async def run_episode(task: str = "easy") -> dict:
         while True:
             turn += 1
             action = generate_attack(defender_resp, turn, prev_success)
             resp = await http.post("/step", json=action)
             step_data = resp.json()
             obs = step_data["observation"]
             defender_resp = obs["defender_response"]
             prev_success = obs["attack_success_estimate"]
-            if obs["episode_done"]: break
         grade_resp = await http.post("/grade")
-        return {"task": task, "turns": turn, "grade": grade_resp.json()}
 async def main():
     import time
     start = time.time()
     for task in ["easy", "medium", "hard"]:
-        logger.info(f"Running {task}...")
         try:
             await run_episode(task)
         except Exception as e:
             logger.error(f"Failed {task}: {e}")
-    if time.time() - start > 1200:
-        logger.warning("Exceeded 20 mins!")
 if __name__ == "__main__":
     asyncio.run(main())

 import asyncio
 import logging
 from openai import OpenAI
+from dotenv import load_dotenv
+load_dotenv()
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
+# Backend server
+SERVER_URL = "https://rayugacodes-breach-os.hf.space"
+# Attacker LLM (Configured to Groq)
 client = OpenAI(
+    base_url = "https://api.groq.com/openai/v1",
+    api_key  = os.environ.get("GROQ_API_KEY"),
 )
+MODEL_NAME = "llama-3.1-8b-instant"
 def generate_attack(defender_response: str, turn: int, previous_success: float) -> dict:
     strategies = [
 async def run_episode(task: str = "easy") -> dict:
     import httpx
+    import json
+    print(f"\n{'-'*60}\nSTARTING EPISODE: [{task.upper()} TASK]\n{'-'*60}")
+    async with httpx.AsyncClient(base_url=SERVER_URL, timeout=60.0) as http:
+        # Hide httpx logs to keep it clean
+        logging.getLogger("httpx").setLevel(logging.WARNING)
         resp = await http.post("/reset")
         reset_data = resp.json()
         defender_resp = reset_data["observation"]["defender_response"]
         while True:
             turn += 1
             action = generate_attack(defender_resp, turn, prev_success)
+            print(f"\n[ TURN {turn} ] Strategy: {action['strategy_type']} | Intensity: {action['intensity']:.2f}")
+            print(f"ATTACKER: {action['framing']}\n")
             resp = await http.post("/step", json=action)
             step_data = resp.json()
             obs = step_data["observation"]
             defender_resp = obs["defender_response"]
             prev_success = obs["attack_success_estimate"]
+            print(f"DEFENDER: {defender_resp}\n")
+            print(f"[ METRICS ] Attack Success: {prev_success:.2f} | Defense Quality: {obs.get('defense_score', 0):.2f}")
+            if obs["episode_done"]:
+                print(f"\n*** EPISODE TERMINATED (Turn {turn}) ***\n")
+                break
         grade_resp = await http.post("/grade")
+        grade_data = grade_resp.json()
+        print(f"FINAL EPISODE GRADE ({task.upper()}):")
+        print(json.dumps(grade_data, indent=2))
+        print(f"{'-'*60}\n")
+        return {"task": task, "turns": turn, "grade": grade_data}
 async def main():
     import time
     start = time.time()
     for task in ["easy", "medium", "hard"]:
         try:
             await run_episode(task)
         except Exception as e:
             logger.error(f"Failed {task}: {e}")
+    total_time = time.time() - start
+    print(f"\nFULL RUN COMPLETED IN {total_time:.1f} SECONDS.")
 if __name__ == "__main__":
     asyncio.run(main())

llm/pipeline.py CHANGED Viewed

@@ -123,10 +123,10 @@ def grade_episode_with_llm(history: list[dict]) -> dict:
         logger.debug(f"Episode grader output:\n{raw_output}")
         # Parse each labeled score; use fallback for any that didn't parse
-        scores = {
-            dim: _extract_labeled_score(raw_output, dim) or fallback_scores[dim]
-            for dim in fallback_scores
-        }
     except Exception as error:
         logger.warning(f"Episode grader unavailable: {error} — using fallback scores")

         logger.debug(f"Episode grader output:\n{raw_output}")
         # Parse each labeled score; use fallback for any that didn't parse
+        scores = {}
+        for dim in fallback_scores:
+            val = _extract_labeled_score(raw_output, dim)
+            scores[dim] = val if val is not None else fallback_scores[dim]
     except Exception as error:
         logger.warning(f"Episode grader unavailable: {error} — using fallback scores")

tests/test_llm.py CHANGED Viewed

@@ -322,7 +322,8 @@ class TestGradeEpisodeWithLlm:
         assert result["robustness"]  == 0.8
         assert result["clarity"]     == 0.85
         assert result["helpfulness"] == 0.6
-        assert result["overall"]     == round((0.9 + 0.8 + 0.85 + 0.6) / 4, 3)
     def test_falls_back_to_defaults_on_api_error(self):
         from llm.pipeline import grade_episode_with_llm

         assert result["robustness"]  == 0.8
         assert result["clarity"]     == 0.85
         assert result["helpfulness"] == 0.6
+        expected_overall = round(sum([0.9, 0.8, 0.85, 0.6]) / 4, 3)
+        assert result["overall"]     == expected_overall
     def test_falls_back_to_defaults_on_api_error(self):
         from llm.pipeline import grade_episode_with_llm