havinashpatil commited on
Commit
a8bc575
·
1 Parent(s): 434afdf

Fix TGI endpoint plumbing and use full TGI runtime image in Dockerfile

Browse files
Files changed (2) hide show
  1. Dockerfile +4 -14
  2. server/ai_fixer.py +7 -7
Dockerfile CHANGED
@@ -9,19 +9,8 @@ RUN npm install
9
  COPY frontend/ ./
10
  RUN npm run build
11
 
12
- # TGI stage for LLM serving
13
- FROM ghcr.io/huggingface/text-generation-inference:3.0.2 AS tgi-builder
14
-
15
- # Main stage: Python app with TGI
16
- FROM python:3.10-slim
17
-
18
- # Install TGI runtime dependencies
19
- RUN apt-get update && apt-get install -y --no-install-recommends \
20
- ca-certificates \
21
- && rm -rf /var/lib/apt/lists/*
22
-
23
- # Copy TGI binary from builder
24
- COPY --from=tgi-builder /usr/local/bin/text-generation-inference /usr/local/bin/
25
 
26
  WORKDIR /app
27
 
@@ -39,8 +28,9 @@ RUN mkdir -p /data && chmod 777 /data
39
  RUN mkdir -p /.cache && chmod 777 /.cache
40
  RUN mkdir -p /.triton && chmod 777 /.triton
41
 
42
- # Required for HF Spaces: Expose default port 7860 for FastAPI
43
  EXPOSE 7860
 
44
 
45
  # Start both FastAPI server and TGI in background
46
  CMD ["sh", "-c", "text-generation-inference --model-id TinyLlama/TinyLlama-1.1B-Chat-v1.0 --port 8080 --hostname 0.0.0.0 & uvicorn server.app:app --host 0.0.0.0 --port 7860"]
 
9
  COPY frontend/ ./
10
  RUN npm run build
11
 
12
+ # Main stage: Python app with TGI runtime
13
+ FROM ghcr.io/huggingface/text-generation-inference:3.0.2
 
 
 
 
 
 
 
 
 
 
 
14
 
15
  WORKDIR /app
16
 
 
28
  RUN mkdir -p /.cache && chmod 777 /.cache
29
  RUN mkdir -p /.triton && chmod 777 /.triton
30
 
31
+ # Required for HF Spaces: Expose default ports for FastAPI and TGI
32
  EXPOSE 7860
33
+ EXPOSE 8080
34
 
35
  # Start both FastAPI server and TGI in background
36
  CMD ["sh", "-c", "text-generation-inference --model-id TinyLlama/TinyLlama-1.1B-Chat-v1.0 --port 8080 --hostname 0.0.0.0 & uvicorn server.app:app --host 0.0.0.0 --port 7860"]
server/ai_fixer.py CHANGED
@@ -23,20 +23,20 @@ from server.memory import store_success, retrieve_memory, log_complexity_reward
23
  TGI_BASE_URL = os.environ.get("TGI_BASE_URL", "http://localhost:8080")
24
  TGI_AVAILABLE = False
25
 
26
- def check_tgi_availability():
27
  """Check if TGI server is available."""
28
  global TGI_AVAILABLE
29
  try:
30
- response = httpx.get(f"{TGI_BASE_URL}/health", timeout=5.0)
31
  TGI_AVAILABLE = response.status_code == 200
32
- except:
33
  TGI_AVAILABLE = False
34
  return TGI_AVAILABLE
35
 
36
 
37
- def fix_with_tgi(code: str) -> Optional[str]:
38
  """Use TGI for advanced code fixing."""
39
- if not TGI_AVAILABLE and not check_tgi_availability():
40
  return None
41
 
42
  prompt = f"""You are an expert competitive programmer.
@@ -54,7 +54,7 @@ Return ONLY the corrected code without any explanation:
54
 
55
  try:
56
  response = httpx.post(
57
- f"{TGI_BASE_URL}/v1/chat/completions",
58
  json={
59
  "model": "tgi",
60
  "messages": [{"role": "user", "content": prompt}],
@@ -479,7 +479,7 @@ def generate_fix(
479
  Returns: { fixed_code, method, success, explanation }
480
  """
481
  if use_tgi:
482
- fixed_code = fix_with_tgi(code)
483
  if fixed_code:
484
  # Log complexity vs reward for research tracking
485
  complexity = detect_complexity(fixed_code)
 
23
  TGI_BASE_URL = os.environ.get("TGI_BASE_URL", "http://localhost:8080")
24
  TGI_AVAILABLE = False
25
 
26
+ def check_tgi_availability(tgi_url: str = TGI_BASE_URL) -> bool:
27
  """Check if TGI server is available."""
28
  global TGI_AVAILABLE
29
  try:
30
+ response = httpx.get(f"{tgi_url}/health", timeout=5.0)
31
  TGI_AVAILABLE = response.status_code == 200
32
+ except Exception:
33
  TGI_AVAILABLE = False
34
  return TGI_AVAILABLE
35
 
36
 
37
+ def fix_with_tgi(code: str, tgi_url: str = TGI_BASE_URL) -> Optional[str]:
38
  """Use TGI for advanced code fixing."""
39
+ if not TGI_AVAILABLE and not check_tgi_availability(tgi_url):
40
  return None
41
 
42
  prompt = f"""You are an expert competitive programmer.
 
54
 
55
  try:
56
  response = httpx.post(
57
+ f"{tgi_url}/v1/chat/completions",
58
  json={
59
  "model": "tgi",
60
  "messages": [{"role": "user", "content": prompt}],
 
479
  Returns: { fixed_code, method, success, explanation }
480
  """
481
  if use_tgi:
482
+ fixed_code = fix_with_tgi(code, tgi_url=tgi_url)
483
  if fixed_code:
484
  # Log complexity vs reward for research tracking
485
  complexity = detect_complexity(fixed_code)