Engine Upgrade: Continuous Pulse Loop & Real-time Local Summarization (Qwen)
Browse files- app/core/agent.py +12 -3
- app/core/config.py +1 -0
- app/core/filters.py +19 -0
app/core/agent.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
| 1 |
import asyncio
|
| 2 |
import logging
|
|
|
|
| 3 |
from datetime import datetime
|
| 4 |
from typing import Dict, List
|
| 5 |
from app.core.scrapers import scraper
|
|
@@ -12,7 +13,7 @@ logger = logging.getLogger("hawk.agent")
|
|
| 12 |
class HawkAgent:
|
| 13 |
def __init__(self):
|
| 14 |
self.is_running = False
|
| 15 |
-
self.
|
| 16 |
|
| 17 |
def notify_jewel_discovery(self, jewel: Dict):
|
| 18 |
"""Notify the user about a high-signal discovery."""
|
|
@@ -33,6 +34,12 @@ class HawkAgent:
|
|
| 33 |
return
|
| 34 |
logger.info(f"PHASE 1 COMPLETE: Captured {len(raw_signals)} potential signals.")
|
| 35 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
# Save raw signals for the 'All' stream in frontend
|
| 37 |
vault.save_raw(raw_signals)
|
| 38 |
|
|
@@ -77,8 +84,10 @@ class HawkAgent:
|
|
| 77 |
|
| 78 |
while self.is_running:
|
| 79 |
await self.run_swoop_cycle()
|
| 80 |
-
|
| 81 |
-
|
|
|
|
|
|
|
| 82 |
|
| 83 |
def stop(self):
|
| 84 |
"""Stop the autonomous loop."""
|
|
|
|
| 1 |
import asyncio
|
| 2 |
import logging
|
| 3 |
+
import random
|
| 4 |
from datetime import datetime
|
| 5 |
from typing import Dict, List
|
| 6 |
from app.core.scrapers import scraper
|
|
|
|
| 13 |
class HawkAgent:
|
| 14 |
def __init__(self):
|
| 15 |
self.is_running = False
|
| 16 |
+
self.pulse_interval = settings.PULSE_INTERVAL_MINUTES
|
| 17 |
|
| 18 |
def notify_jewel_discovery(self, jewel: Dict):
|
| 19 |
"""Notify the user about a high-signal discovery."""
|
|
|
|
| 34 |
return
|
| 35 |
logger.info(f"PHASE 1 COMPLETE: Captured {len(raw_signals)} potential signals.")
|
| 36 |
|
| 37 |
+
# Real-time local summarization for the telemetry stream
|
| 38 |
+
logger.info("PHASE 1.5: Generating immediate forensic gists via local Qwen...")
|
| 39 |
+
for s in raw_signals:
|
| 40 |
+
text = f"{s.get('title', '')} {s.get('description', '')}"
|
| 41 |
+
s['local_summary'] = await sieve.summarize_locally(text)
|
| 42 |
+
|
| 43 |
# Save raw signals for the 'All' stream in frontend
|
| 44 |
vault.save_raw(raw_signals)
|
| 45 |
|
|
|
|
| 84 |
|
| 85 |
while self.is_running:
|
| 86 |
await self.run_swoop_cycle()
|
| 87 |
+
# Randomized breather between pulses (jitter)
|
| 88 |
+
breather = self.pulse_interval + random.randint(-2, 5)
|
| 89 |
+
logger.info(f"Hawk is hovering for {breather} minutes before next pulse...")
|
| 90 |
+
await asyncio.sleep(max(1, breather) * 60)
|
| 91 |
|
| 92 |
def stop(self):
|
| 93 |
"""Stop the autonomous loop."""
|
app/core/config.py
CHANGED
|
@@ -17,6 +17,7 @@ class Settings(BaseSettings):
|
|
| 17 |
|
| 18 |
# Model Settings
|
| 19 |
LOCAL_MODEL_NAME: str = "Qwen/Qwen2.5-0.5B-Instruct"
|
|
|
|
| 20 |
|
| 21 |
# Search & Extraction Settings
|
| 22 |
TARGET_QUERIES: list = [
|
|
|
|
| 17 |
|
| 18 |
# Model Settings
|
| 19 |
LOCAL_MODEL_NAME: str = "Qwen/Qwen2.5-0.5B-Instruct"
|
| 20 |
+
PULSE_INTERVAL_MINUTES: int = 15 # Breather between discovery pulses
|
| 21 |
|
| 22 |
# Search & Extraction Settings
|
| 23 |
TARGET_QUERIES: list = [
|
app/core/filters.py
CHANGED
|
@@ -34,6 +34,25 @@ class SignalFilter:
|
|
| 34 |
finally:
|
| 35 |
self._is_loading = False
|
| 36 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
async def classify_with_llm(self, text: str) -> bool:
|
| 38 |
"""Use local LLM to decide if content is high-signal or junk."""
|
| 39 |
await self.ensure_model_loaded()
|
|
|
|
| 34 |
finally:
|
| 35 |
self._is_loading = False
|
| 36 |
|
| 37 |
+
async def summarize_locally(self, text: str) -> str:
|
| 38 |
+
"""Provide a 1-sentence technical gist using local LLM."""
|
| 39 |
+
await self.ensure_model_loaded()
|
| 40 |
+
if not self.model:
|
| 41 |
+
return "Local analysis offline."
|
| 42 |
+
|
| 43 |
+
prompt = f"System: You are an architectural forensic analyst. Provide a 1-sentence technical gist of the following signal.\nUser: {text[:800]}\nSummary:"
|
| 44 |
+
|
| 45 |
+
try:
|
| 46 |
+
inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
|
| 47 |
+
outputs = self.model.generate(**inputs, max_new_tokens=40)
|
| 48 |
+
response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 49 |
+
# Extract just the summary part
|
| 50 |
+
summary = response.split("Summary:")[-1].strip()
|
| 51 |
+
return summary if summary else "No distinctive pattern identified."
|
| 52 |
+
except Exception as e:
|
| 53 |
+
logger.error(f"Summarization error: {e}")
|
| 54 |
+
return "Technical synthesis failed."
|
| 55 |
+
|
| 56 |
async def classify_with_llm(self, text: str) -> bool:
|
| 57 |
"""Use local LLM to decide if content is high-signal or junk."""
|
| 58 |
await self.ensure_model_loaded()
|