xome commited on
Commit
6d99905
·
verified ·
1 Parent(s): 55e4d94

Upload 6 files

Browse files
Files changed (6) hide show
  1. README.md +9 -5
  2. app.py +115 -0
  3. gateway.py +69 -0
  4. gitattributes +35 -0
  5. requirements.txt +1 -0
  6. utils.py +9 -0
README.md CHANGED
@@ -1,13 +1,17 @@
1
  ---
2
- title: Chat Gpt
3
- emoji:
4
  colorFrom: red
5
- colorTo: purple
6
  sdk: gradio
7
- sdk_version: 5.46.1
8
  app_file: app.py
9
  pinned: false
10
- short_description: chat gpt
 
 
 
11
  ---
 
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: GPT-OSS-120B on AMD MI300X
3
+ emoji: 💻
4
  colorFrom: red
5
+ colorTo: pink
6
  sdk: gradio
7
+ sdk_version: 5.36.2
8
  app_file: app.py
9
  pinned: false
10
+ license: apache-2.0
11
+ short_description: 'gpt-oss-120b on AMD MI300X GPUs'
12
+ models:
13
+ - openai/gpt-oss-120b
14
  ---
15
+ ***DISCLAIMER:*** Analysis is provided along with final response to allow users to gain insight into model's chain of thought, but may contain content not deemed suitable to include in final response.
16
 
17
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, re, logging, gradio as gr
2
+ from openai import OpenAI
3
+ from gateway import request_generation
4
+ from utils import LATEX_DELIMS
5
+
6
+ openai_api_key = os.getenv("API_KEY")
7
+ openai_api_base = os.getenv("API_ENDPOINT")
8
+ MODEL = os.getenv("MODEL_NAME", "")
9
+ client = OpenAI(api_key=openai_api_key, base_url=openai_api_base)
10
+ MAX_NEW_TOKENS = int(os.getenv("MAX_NEW_TOKENS", 1024))
11
+ CONCURRENCY_LIMIT = int(os.getenv("CONCURRENCY_LIMIT", 20))
12
+ QUEUE_SIZE = int(os.getenv("QUEUE_SIZE", CONCURRENCY_LIMIT * 4))
13
+
14
+ logging.basicConfig(level=logging.INFO)
15
+
16
+ def format_analysis_response(text):
17
+ m = re.search(r"analysis(.*?)assistantfinal", text, re.DOTALL)
18
+ if m:
19
+ reasoning = m.group(1).strip()
20
+ response = text.split("assistantfinal", 1)[-1].strip()
21
+ return (
22
+ f"**🤔 Analysis:**\n\n*{reasoning}*\n\n---\n\n"
23
+ f"**💬 Response:**\n\n{response}"
24
+ )
25
+ return text.strip()
26
+
27
+ def generate(message, history,
28
+ system_prompt, temperature,
29
+ frequency_penalty, presence_penalty,
30
+ max_new_tokens):
31
+
32
+ if not message.strip():
33
+ yield "Please enter a prompt."
34
+ return
35
+
36
+ msgs = []
37
+ for h in history:
38
+ if isinstance(h, dict):
39
+ msgs.append(h)
40
+ elif isinstance(h, (list, tuple)) and len(h) == 2:
41
+ u, a = h
42
+ if u: msgs.append({"role": "user", "content": u})
43
+ if a: msgs.append({"role": "assistant", "content": a})
44
+
45
+ logging.info(f"[User] {message}")
46
+ logging.info(f"[System] {system_prompt} | Temp={temperature}")
47
+
48
+ collected, buffer = "", ""
49
+ yielded_once = False
50
+
51
+ try:
52
+ for delta in request_generation(
53
+ api_key=openai_api_key, api_base=openai_api_base,
54
+ message=message, system_prompt=system_prompt,
55
+ model_name=MODEL, chat_history=msgs,
56
+ temperature=temperature,
57
+ frequency_penalty=frequency_penalty,
58
+ presence_penalty=presence_penalty,
59
+ max_new_tokens=max_new_tokens,
60
+ ):
61
+ if not delta:
62
+ continue
63
+
64
+ collected += delta
65
+ buffer += delta
66
+
67
+ if not yielded_once:
68
+ yield delta
69
+ buffer = ""
70
+ yielded_once = True
71
+ continue
72
+
73
+ if "\n" in buffer or len(buffer) > 150:
74
+ yield collected
75
+ buffer = ""
76
+
77
+ final = format_analysis_response(collected)
78
+ if final.count("$") % 2:
79
+ final += "$"
80
+ yield final
81
+
82
+ except Exception as e:
83
+ logging.exception("Stream failed")
84
+ yield f"❌ Error: {e}"
85
+
86
+ chatbot_ui = gr.ChatInterface(
87
+ fn=generate,
88
+ type="messages",
89
+ chatbot=gr.Chatbot(
90
+ label="OSS vLLM Chatbot",
91
+ type="messages",
92
+ scale=2,
93
+ height=600,
94
+ latex_delimiters=LATEX_DELIMS,
95
+ ),
96
+ stop_btn=True,
97
+ additional_inputs=[
98
+ gr.Textbox(label="System prompt", value="You are a helpful assistant.", lines=2),
99
+ gr.Slider(label="Temperature", minimum=0.0, maximum=1.0, step=0.1, value=0.7),
100
+ ],
101
+ examples=[
102
+ ["Explain the difference between supervised and unsupervised learning."],
103
+ ["Summarize the plot of Inception in two sentences."],
104
+ ["Show me the LaTeX for the quadratic formula."],
105
+ ["What are advantages of AMD Instinct MI300X GPU?"],
106
+ ["Derive the gradient of softmax cross-entropy loss."],
107
+ ["Explain why ∂/∂x xⁿ = n·xⁿ⁻¹ holds."],
108
+ ],
109
+ # title="Open-source GPT-OSS-120B on AMD MI300X",
110
+ title=" GPT-OSS-120B on AMD MI300X",
111
+ description="This Space is an Alpha release that demonstrates gpt-oss-120b model running on AMD MI300 infrastructure. The space is built with Apache 2.0 License.",
112
+ )
113
+ if __name__ == "__main__":
114
+ chatbot_ui.queue(max_size=QUEUE_SIZE,
115
+ default_concurrency_limit=CONCURRENCY_LIMIT).launch()
gateway.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ from openai import OpenAI
3
+ from typing import List, Generator, Optional
4
+
5
+ logging.basicConfig(level=logging.INFO)
6
+
7
+ def request_generation(
8
+ api_key: str,
9
+ api_base: str,
10
+ message: str,
11
+ system_prompt: str,
12
+ model_name: str,
13
+ chat_history: Optional[List[dict]] = None,
14
+ temperature: float = 0.3,
15
+ frequency_penalty: float = 0.0,
16
+ presence_penalty: float = 0.0,
17
+ max_new_tokens: int = 1024,
18
+ tools: Optional[List[dict]] = None,
19
+ tool_choice: Optional[str] = None,
20
+ ) -> Generator[str, None, None]:
21
+ """
22
+ Sends a streaming chat request to an OpenAI-compatible backend using the official OpenAI client.
23
+ Buffers output to improve LaTeX rendering.
24
+ """
25
+ client = OpenAI(api_key=api_key, base_url=api_base)
26
+
27
+ messages = [{"role": "system", "content": system_prompt}]
28
+ if chat_history:
29
+ messages.extend(chat_history)
30
+ messages.append({"role": "user", "content": message})
31
+
32
+ request_args = {
33
+ "model": model_name,
34
+ "messages": messages,
35
+ "temperature": temperature,
36
+ "frequency_penalty": frequency_penalty,
37
+ "presence_penalty": presence_penalty,
38
+ "max_tokens": max_new_tokens,
39
+ "stream": True,
40
+ }
41
+
42
+ if tools:
43
+ request_args["tools"] = tools
44
+ if tool_choice:
45
+ request_args["tool_choice"] = tool_choice
46
+
47
+ logging.info(f"[Gateway] Request to {api_base} | Model: {model_name}")
48
+
49
+ try:
50
+ stream = client.chat.completions.create(**request_args)
51
+
52
+ collected = ""
53
+ buffer = ""
54
+
55
+ for chunk in stream:
56
+ delta = chunk.choices[0].delta.content or ""
57
+ collected += delta
58
+ buffer += delta
59
+
60
+ if "\n" in buffer or len(buffer) > 150:
61
+ yield buffer
62
+ buffer = ""
63
+
64
+ if buffer:
65
+ yield buffer
66
+
67
+ except Exception as e:
68
+ logging.exception("[Gateway] Streaming failed")
69
+ yield f"Error: {e}"
gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ openai
utils.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # ----------------------------------------------------------------------
3
+ # KaTeX delimiter config for Gradio
4
+ # ----------------------------------------------------------------------
5
+
6
+ LATEX_DELIMS = [
7
+ {"left": "\\[", "right": "\\]", "display": True},
8
+ {"left": "\\(", "right": "\\)", "display": False},
9
+ ]