rasAli02 commited on
Commit
cb59dbe
Β·
1 Parent(s): 53cd64c

feat: update AMD inference server to 165.245.143.46 and increase timeout

Browse files
Files changed (2) hide show
  1. README.md +6 -0
  2. agents.py +4 -4
README.md CHANGED
@@ -23,6 +23,12 @@ tags:
23
 
24
  # πŸ” ForgeSight β€” Multimodal Quality-Control Copilot
25
 
 
 
 
 
 
 
26
  > **AMD + lablab.ai Hackathon** β€” Track 2 (AMD Developer Cloud) Β· Track 1 (AI Agents) Β· Track 3 (Vision & Multimodal AI)
27
 
28
  ForgeSight is a production-ready AI system that performs automated visual quality control on the **AMD Instinct MI300X** GPU. Upload a product image and a 4-agent agentic pipeline delivers a structured defect report in seconds.
 
23
 
24
  # πŸ” ForgeSight β€” Multimodal Quality-Control Copilot
25
 
26
+ ### ⚑ Live Status (Hackathon Mode)
27
+ - **Primary Inference**: AMD Instinct MI300X (192GB VRAM)
28
+ - **Backend**: FastAPI + vLLM on ROCm
29
+ - **Current Server**: `165.245.143.46` (vLLM via Token Auth)
30
+ - **Status**: βœ… **ONLINE** (Live Inference Active)
31
+
32
  > **AMD + lablab.ai Hackathon** β€” Track 2 (AMD Developer Cloud) Β· Track 1 (AI Agents) Β· Track 3 (Vision & Multimodal AI)
33
 
34
  ForgeSight is a production-ready AI system that performs automated visual quality control on the **AMD Instinct MI300X** GPU. Upload a product image and a 4-agent agentic pipeline delivers a structured defect report in seconds.
agents.py CHANGED
@@ -15,17 +15,17 @@ import httpx # async HTTP β€” lightweight, no extra deps beyond requirements
15
  # ── AMD vLLM inference endpoint ─────────────────────────────────────────────
16
  # vLLM exposes an OpenAI-compatible API at /v1/chat/completions.
17
  # Set AMD_INFERENCE_URL in your .env to point at the running vLLM server.
18
- # Example: http://129.212.191.163:8000 (direct port β€” ensure firewall allows it)
19
  # Or use the Jupyter proxy route: http://129.212.191.163/proxy/8000
20
  AMD_INFERENCE_URL = os.environ.get(
21
  "AMD_INFERENCE_URL",
22
- "http://129.212.184.42"
23
  ).rstrip("/")
24
 
25
  # Token for the AMD inference server (if required)
26
  AMD_INFERENCE_TOKEN = os.environ.get(
27
  "AMD_INFERENCE_TOKEN",
28
- "sr49urlf/6cgbSvhp8lg1EyTiHd2VvsOa6dev8Rc/vfK83fra"
29
  )
30
 
31
  # The model name vLLM is serving (used in the chat/completions request).
@@ -33,7 +33,7 @@ AMD_INFERENCE_TOKEN = os.environ.get(
33
  AMD_MODEL_NAME = os.environ.get("AMD_MODEL_NAME", "Qwen/Qwen2-VL-7B-Instruct")
34
 
35
  # Timeout (seconds) to wait for the AMD server before falling back to mock.
36
- AMD_TIMEOUT = float(os.environ.get("AMD_TIMEOUT", "30"))
37
 
38
  # ── System prompts ───────────────────────────────────────────────────────────
39
  INSPECTOR_SYSTEM = """You are the INSPECTOR agent of ForgeSight β€” a multimodal quality-control copilot
 
15
  # ── AMD vLLM inference endpoint ─────────────────────────────────────────────
16
  # vLLM exposes an OpenAI-compatible API at /v1/chat/completions.
17
  # Set AMD_INFERENCE_URL in your .env to point at the running vLLM server.
18
+ # Example: http://129.212.191.163 (direct port β€” ensure firewall allows it)
19
  # Or use the Jupyter proxy route: http://129.212.191.163/proxy/8000
20
  AMD_INFERENCE_URL = os.environ.get(
21
  "AMD_INFERENCE_URL",
22
+ "http://165.245.143.46:8000"
23
  ).rstrip("/")
24
 
25
  # Token for the AMD inference server (if required)
26
  AMD_INFERENCE_TOKEN = os.environ.get(
27
  "AMD_INFERENCE_TOKEN",
28
+ "5peRa6unb0DdXvzB3Pbck48IgNTDmxeJSUvE4NdnhvW70FcaX"
29
  )
30
 
31
  # The model name vLLM is serving (used in the chat/completions request).
 
33
  AMD_MODEL_NAME = os.environ.get("AMD_MODEL_NAME", "Qwen/Qwen2-VL-7B-Instruct")
34
 
35
  # Timeout (seconds) to wait for the AMD server before falling back to mock.
36
+ AMD_TIMEOUT = float(os.environ.get("AMD_TIMEOUT", "60"))
37
 
38
  # ── System prompts ───────────────────────────────────────────────────────────
39
  INSPECTOR_SYSTEM = """You are the INSPECTOR agent of ForgeSight β€” a multimodal quality-control copilot