File size: 3,585 Bytes
1d8ed3b
 
 
 
 
 
 
 
 
 
 
620a7f6
1d8ed3b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82faabd
1d8ed3b
82faabd
 
 
 
 
 
 
 
 
 
 
1d8ed3b
82faabd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1d8ed3b
82faabd
1d8ed3b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import os
import time
import requests
import json
import codecs
from pathlib import Path


API_KEY = os.getenv("RUNPOD_API_KEY")
ENDPOINT = os.getenv("RUNPOD_ENDPOINT")


HEADERS = {
    "Authorization": f"Bearer {API_KEY}",
    "Content-Type": "application/json"
}


def format_messages_as_prompt(messages):
    """Convert messages list to a single prompt string for the model."""
    parts = []
    for message in messages:
        parts.append(f"{message['role'].capitalize()}: {message['content']}")
    parts.append("Assistant:")
    return "\n\n".join(parts)


def run_prompt(prompt: str, max_tokens: int = 512, context_length: int = 8192) -> str:
    """Submit a prompt to the RunPod endpoint and get back a response string."""
    
    payload = {
        "input": {
            "prompt": prompt,
            "max_tokens": max_tokens,
            "options": {
                "num_ctx": context_length,
                "num_predict": max_tokens
            }
        }
    }

    url = f"{ENDPOINT}/run"
    response = requests.post(url, headers=HEADERS, json=payload, timeout=60)
    
    if response.status_code != 200:
        raise RuntimeError(f"HTTP {response.status_code}: {response.text}")
    
    result = response.json()
    print(f"Initial response: {result}")
    
    # Check if it's a direct sync response
    if "success" in result:
        if result.get("success"):
            return result["response"]
        else:
            raise RuntimeError(f"Handler error: {result.get('error', 'Unknown error')}")
    
    # Handle async job response
    elif "id" in result:
        job_id = result["id"]
        print(f"[RunPod] Job started: {job_id}")
        
        # Poll for completion
        while True:
            status_url = f"{ENDPOINT}/status/{job_id}"
            status_response = requests.get(status_url, headers=HEADERS)
            
            if status_response.status_code != 200:
                raise RuntimeError(f"Status check failed: {status_response.status_code}")
            
            status_result = status_response.json()
            status = status_result.get("status")
            print(f"[RunPod] Status: {status}")
            
            if status == "COMPLETED":
                output = status_result.get("output", {})
                if output.get("success"):
                    return output["response"]
                else:
                    raise RuntimeError(f"Job failed: {output.get('error', 'Unknown error')}")
            
            elif status == "FAILED":
                error_msg = status_result.get("error", "Job failed without specific error")
                raise RuntimeError(f"RunPod job failed: {error_msg}")
            
            elif status in ["IN_QUEUE", "IN_PROGRESS"]:
                print(f"[RunPod] Waiting... Status: {status}")
                time.sleep(3)  # Wait 3 seconds before polling again
            
            else:
                raise RuntimeError(f"Unknown job status: {status}")
    
    else:
        raise RuntimeError(f"Unexpected response format: {result}")


def clean_and_parse_json(raw_text: str):
    """Clean and parse model output into JSON."""
    cleaned = raw_text.strip().strip("```json").strip("```").strip("'")
    try:
        return json.loads(cleaned)
    except json.JSONDecodeError:
        try:
            # Handle escaped quotes
            unescaped = codecs.decode(cleaned, 'unicode_escape')
            return json.loads(unescaped)
        except Exception as e:
            raise ValueError("Could not parse JSON output") from e