| | import json |
| | import requests |
| | import time |
| |
|
| | API_URL = "http://localhost:8000/v1/chat/completions" |
| |
|
| | HEADERS = { |
| | "Content-Type": "application/json", |
| | "Authorization": "Bearer 0", |
| | } |
| |
|
| | def run_test(prompt: str, max_tokens=500): |
| | payload = { |
| | "model": "custom-model", |
| | "messages": [ |
| | {"role": "system", "content": "Answer the user question about Markie Voss."}, |
| | {"role": "user", "content": prompt}, |
| | ], |
| | "max_tokens": max_tokens, |
| | "do_sample": True, |
| | "temperature": 0.6, |
| | "top_p": 0.8, |
| | "eos_token_id": [ |
| | 151645, |
| | 151643, |
| | 151668 |
| | ], |
| | "max_tokens": 1024, |
| | "enable_thinking": True, |
| | "stream": True, |
| | } |
| |
|
| | print("=" * 80) |
| | print("Prompt:", prompt) |
| | print("Streaming response:\n") |
| |
|
| | with requests.post( |
| | API_URL, |
| | headers=HEADERS, |
| | json=payload, |
| | stream=True, |
| | timeout=60, |
| | ) as r: |
| |
|
| | print("HTTP status:", r.status_code) |
| | r.raise_for_status() |
| |
|
| | full_text = "" |
| |
|
| | for line in r.iter_lines(decode_unicode=True): |
| | if not line: |
| | continue |
| |
|
| | |
| | if line.startswith("data:"): |
| | data = line[len("data:"):].strip() |
| |
|
| | if data == "[DONE]": |
| | break |
| |
|
| | try: |
| | chunk = json.loads(data) |
| | except json.JSONDecodeError: |
| | continue |
| |
|
| | delta = chunk["choices"][0]["delta"] |
| |
|
| | if "content" in delta: |
| | token = delta["content"] |
| | full_text += token |
| | print(token, end="", flush=True) |
| |
|
| | print("\n\n--- END OF STREAM ---") |
| | print("✅ Full content repr:", repr(full_text)) |
| |
|
| |
|
| | if __name__ == "__main__": |
| | print("Warming up...") |
| | time.sleep(1) |
| |
|
| | while True: |
| | p = input("User: ") |
| | run_test(p) |
| |
|