Trouter-Library commited on
Commit
26b6ee7
·
verified ·
1 Parent(s): 851f952

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +181 -1
README.md CHANGED
@@ -78,4 +78,184 @@ def chat_with_helion(prompt, max_length=512, temperature=0.7):
78
  # Example usage
79
  prompt = "Explain the concept of machine learning in simple terms."
80
  response = chat_with_helion(prompt)
81
- print(response)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  # Example usage
79
  prompt = "Explain the concept of machine learning in simple terms."
80
  response = chat_with_helion(prompt)
81
+ print(response)
82
+ from transformers import pipeline
83
+
84
+ # Create a chat pipeline
85
+ chat_pipeline = pipeline(
86
+ "text-generation",
87
+ model="DeepXR/Helion-V1",
88
+ tokenizer=model_name,
89
+ device_map="auto",
90
+ torch_dtype=torch.float16
91
+ )
92
+
93
+ def conversational_chat(messages, max_new_tokens=256):
94
+ formatted_prompt = tokenizer.apply_chat_template(
95
+ messages,
96
+ tokenize=False,
97
+ add_generation_prompt=True
98
+ )
99
+
100
+ outputs = chat_pipeline(
101
+ formatted_prompt,
102
+ max_new_tokens=max_new_tokens,
103
+ temperature=0.7,
104
+ do_sample=True,
105
+ top_p=0.9,
106
+ repetition_penalty=1.1
107
+ )
108
+
109
+ return outputs[0]['generated_text']
110
+
111
+ # Multi-turn conversation
112
+ conversation = [
113
+ {"role": "user", "content": "What's the weather like today?"},
114
+ {"role": "assistant", "content": "I don't have real-time weather data, but I can help you understand weather patterns or find weather services!"},
115
+ {"role": "user", "content": "Can you explain how weather forecasting works?"}
116
+ ]
117
+
118
+ response = conversational_chat(conversation)
119
+ print(response)import streamlit as st
120
+ import torch
121
+ from transformers import AutoTokenizer, AutoModelForCausalLM
122
+
123
+ @st.cache_resource
124
+ def load_model():
125
+ model_name = "DeepXR/Helion-V1"
126
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
127
+ model = AutoModelForCausalLM.from_pretrained(
128
+ model_name,
129
+ torch_dtype=torch.float16,
130
+ device_map="auto"
131
+ )
132
+ return tokenizer, model
133
+
134
+ def generate_response(prompt, tokenizer, model, max_length=512):
135
+ messages = [{"role": "user", "content": prompt}]
136
+ input_ids = tokenizer.apply_chat_template(
137
+ messages,
138
+ return_tensors="pt"
139
+ ).to(model.device)
140
+
141
+ with torch.no_grad():
142
+ outputs = model.generate(
143
+ input_ids,
144
+ max_length=max_length,
145
+ temperature=0.7,
146
+ do_sample=True,
147
+ top_p=0.9,
148
+ repetition_penalty=1.1,
149
+ pad_token_id=tokenizer.eos_token_id
150
+ )
151
+
152
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
153
+ return response
154
+
155
+ # Streamlit UI
156
+ st.set_page_config(page_title="Helion-V1 Chat", page_icon="🤖")
157
+ st.title("Helion-V1 Chat Interface")
158
+ st.write("Chat with the Helion-V1 AI assistant")
159
+
160
+ # Initialize session state
161
+ if "messages" not in st.session_state:
162
+ st.session_state.messages = []
163
+
164
+ # Load model
165
+ with st.spinner("Loading Helion-V1 model..."):
166
+ tokenizer, model = load_model()
167
+
168
+ # Display chat messages
169
+ for message in st.session_state.messages:
170
+ with st.chat_message(message["role"]):
171
+ st.markdown(message["content"])
172
+
173
+ # Chat input
174
+ if prompt := st.chat_input("What would you like to know?"):
175
+ # Add user message to chat history
176
+ st.session_state.messages.append({"role": "user", "content": prompt})
177
+ with st.chat_message("user"):
178
+ st.markdown(prompt)
179
+
180
+ # Generate response
181
+ with st.chat_message("assistant"):
182
+ with st.spinner("Thinking..."):
183
+ response = generate_response(prompt, tokenizer, model)
184
+ st.markdown(response)
185
+
186
+ # Add assistant response to chat history
187
+ st.session_state.messages.append({"role": "assistant", "content": response})from fastapi import FastAPI, HTTPException
188
+ from pydantic import BaseModel
189
+ import torch
190
+ from transformers import AutoTokenizer, AutoModelForCausalLM
191
+ import uvicorn
192
+ from typing import List, Optional
193
+
194
+ app = FastAPI(title="Helion-V1 API", version="1.0.0")
195
+
196
+ class ChatMessage(BaseModel):
197
+ role: str
198
+ content: str
199
+
200
+ class ChatRequest(BaseModel):
201
+ messages: List[ChatMessage]
202
+ max_tokens: Optional[int] = 512
203
+ temperature: Optional[float] = 0.7
204
+ top_p: Optional[float] = 0.9
205
+
206
+ class ChatResponse(BaseModel):
207
+ response: str
208
+ tokens_used: int
209
+
210
+ # Load model globally
211
+ @app.on_event("startup")
212
+ async def load_model():
213
+ global tokenizer, model
214
+ model_name = "DeepXR/Helion-V1"
215
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
216
+ model = AutoModelForCausalLM.from_pretrained(
217
+ model_name,
218
+ torch_dtype=torch.float16,
219
+ device_map="auto"
220
+ )
221
+
222
+ @app.post("/chat", response_model=ChatResponse)
223
+ async def chat_endpoint(request: ChatRequest):
224
+ try:
225
+ # Format messages
226
+ formatted_prompt = tokenizer.apply_chat_template(
227
+ [msg.dict() for msg in request.messages],
228
+ tokenize=False,
229
+ add_generation_prompt=True
230
+ )
231
+
232
+ # Tokenize
233
+ input_ids = tokenizer.encode(formatted_prompt, return_tensors="pt").to(model.device)
234
+
235
+ # Generate
236
+ with torch.no_grad():
237
+ outputs = model.generate(
238
+ input_ids,
239
+ max_length=input_ids.shape[1] + request.max_tokens,
240
+ temperature=request.temperature,
241
+ do_sample=True,
242
+ top_p=request.top_p,
243
+ repetition_penalty=1.1,
244
+ pad_token_id=tokenizer.eos_token_id
245
+ )
246
+
247
+ # Decode response
248
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
249
+ tokens_used = outputs.shape[1]
250
+
251
+ return ChatResponse(response=response, tokens_used=tokens_used)
252
+
253
+ except Exception as e:
254
+ raise HTTPException(status_code=500, detail=str(e))
255
+
256
+ @app.get("/health")
257
+ async def health_check():
258
+ return {"status": "healthy", "model": "Helion-V1"}
259
+
260
+ if __name__ == "__main__":
261
+ uvicorn.run(app, host="0.0.0.0", port=8000)