Trouter-Library commited on
Commit
55360d9
·
verified ·
1 Parent(s): 26b6ee7

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +12 -218
README.md CHANGED
@@ -26,6 +26,8 @@ Helion-V1 is a conversational AI model designed to be helpful, harmless, and hon
26
  - **Finetuned from:** Troviku-1.1
27
  - **Model Size:** 7B parameters
28
  - **Context Length:** 4096 tokens
 
 
29
 
30
  ## Model Capabilities
31
 
@@ -35,227 +37,19 @@ Helion-V1 is a conversational AI model designed to be helpful, harmless, and hon
35
  - Creative writing and content creation
36
  - Problem solving and reasoning
37
  - Safe and ethical responses
 
 
38
 
39
- ## Installation
 
 
40
 
41
  ```bash
 
42
  pip install transformers torch accelerate
43
- from transformers import AutoTokenizer, AutoModelForCausalLM
44
- import torch
45
-
46
- model_name = "DeepXR/Helion-V1"
47
- tokenizer = AutoTokenizer.from_pretrained(model_name)
48
- model = AutoModelForCausalLM.from_pretrained(
49
- model_name,
50
- torch_dtype=torch.float16,
51
- device_map="auto"
52
- )
53
-
54
- def chat_with_helion(prompt, max_length=512, temperature=0.7):
55
- messages = [
56
- {"role": "user", "content": prompt}
57
- ]
58
-
59
- input_ids = tokenizer.apply_chat_template(
60
- messages,
61
- return_tensors="pt"
62
- ).to(model.device)
63
-
64
- with torch.no_grad():
65
- outputs = model.generate(
66
- input_ids,
67
- max_length=max_length,
68
- temperature=temperature,
69
- do_sample=True,
70
- pad_token_id=tokenizer.eos_token_id,
71
- top_p=0.9,
72
- repetition_penalty=1.1
73
- )
74
-
75
- response = tokenizer.decode(outputs[0], skip_special_tokens=True)
76
- return response
77
-
78
- # Example usage
79
- prompt = "Explain the concept of machine learning in simple terms."
80
- response = chat_with_helion(prompt)
81
- print(response)
82
- from transformers import pipeline
83
-
84
- # Create a chat pipeline
85
- chat_pipeline = pipeline(
86
- "text-generation",
87
- model="DeepXR/Helion-V1",
88
- tokenizer=model_name,
89
- device_map="auto",
90
- torch_dtype=torch.float16
91
- )
92
-
93
- def conversational_chat(messages, max_new_tokens=256):
94
- formatted_prompt = tokenizer.apply_chat_template(
95
- messages,
96
- tokenize=False,
97
- add_generation_prompt=True
98
- )
99
-
100
- outputs = chat_pipeline(
101
- formatted_prompt,
102
- max_new_tokens=max_new_tokens,
103
- temperature=0.7,
104
- do_sample=True,
105
- top_p=0.9,
106
- repetition_penalty=1.1
107
- )
108
-
109
- return outputs[0]['generated_text']
110
-
111
- # Multi-turn conversation
112
- conversation = [
113
- {"role": "user", "content": "What's the weather like today?"},
114
- {"role": "assistant", "content": "I don't have real-time weather data, but I can help you understand weather patterns or find weather services!"},
115
- {"role": "user", "content": "Can you explain how weather forecasting works?"}
116
- ]
117
-
118
- response = conversational_chat(conversation)
119
- print(response)import streamlit as st
120
- import torch
121
- from transformers import AutoTokenizer, AutoModelForCausalLM
122
-
123
- @st.cache_resource
124
- def load_model():
125
- model_name = "DeepXR/Helion-V1"
126
- tokenizer = AutoTokenizer.from_pretrained(model_name)
127
- model = AutoModelForCausalLM.from_pretrained(
128
- model_name,
129
- torch_dtype=torch.float16,
130
- device_map="auto"
131
- )
132
- return tokenizer, model
133
-
134
- def generate_response(prompt, tokenizer, model, max_length=512):
135
- messages = [{"role": "user", "content": prompt}]
136
- input_ids = tokenizer.apply_chat_template(
137
- messages,
138
- return_tensors="pt"
139
- ).to(model.device)
140
-
141
- with torch.no_grad():
142
- outputs = model.generate(
143
- input_ids,
144
- max_length=max_length,
145
- temperature=0.7,
146
- do_sample=True,
147
- top_p=0.9,
148
- repetition_penalty=1.1,
149
- pad_token_id=tokenizer.eos_token_id
150
- )
151
-
152
- response = tokenizer.decode(outputs[0], skip_special_tokens=True)
153
- return response
154
-
155
- # Streamlit UI
156
- st.set_page_config(page_title="Helion-V1 Chat", page_icon="🤖")
157
- st.title("Helion-V1 Chat Interface")
158
- st.write("Chat with the Helion-V1 AI assistant")
159
-
160
- # Initialize session state
161
- if "messages" not in st.session_state:
162
- st.session_state.messages = []
163
-
164
- # Load model
165
- with st.spinner("Loading Helion-V1 model..."):
166
- tokenizer, model = load_model()
167
-
168
- # Display chat messages
169
- for message in st.session_state.messages:
170
- with st.chat_message(message["role"]):
171
- st.markdown(message["content"])
172
-
173
- # Chat input
174
- if prompt := st.chat_input("What would you like to know?"):
175
- # Add user message to chat history
176
- st.session_state.messages.append({"role": "user", "content": prompt})
177
- with st.chat_message("user"):
178
- st.markdown(prompt)
179
-
180
- # Generate response
181
- with st.chat_message("assistant"):
182
- with st.spinner("Thinking..."):
183
- response = generate_response(prompt, tokenizer, model)
184
- st.markdown(response)
185
-
186
- # Add assistant response to chat history
187
- st.session_state.messages.append({"role": "assistant", "content": response})from fastapi import FastAPI, HTTPException
188
- from pydantic import BaseModel
189
- import torch
190
- from transformers import AutoTokenizer, AutoModelForCausalLM
191
- import uvicorn
192
- from typing import List, Optional
193
-
194
- app = FastAPI(title="Helion-V1 API", version="1.0.0")
195
-
196
- class ChatMessage(BaseModel):
197
- role: str
198
- content: str
199
-
200
- class ChatRequest(BaseModel):
201
- messages: List[ChatMessage]
202
- max_tokens: Optional[int] = 512
203
- temperature: Optional[float] = 0.7
204
- top_p: Optional[float] = 0.9
205
-
206
- class ChatResponse(BaseModel):
207
- response: str
208
- tokens_used: int
209
-
210
- # Load model globally
211
- @app.on_event("startup")
212
- async def load_model():
213
- global tokenizer, model
214
- model_name = "DeepXR/Helion-V1"
215
- tokenizer = AutoTokenizer.from_pretrained(model_name)
216
- model = AutoModelForCausalLM.from_pretrained(
217
- model_name,
218
- torch_dtype=torch.float16,
219
- device_map="auto"
220
- )
221
-
222
- @app.post("/chat", response_model=ChatResponse)
223
- async def chat_endpoint(request: ChatRequest):
224
- try:
225
- # Format messages
226
- formatted_prompt = tokenizer.apply_chat_template(
227
- [msg.dict() for msg in request.messages],
228
- tokenize=False,
229
- add_generation_prompt=True
230
- )
231
-
232
- # Tokenize
233
- input_ids = tokenizer.encode(formatted_prompt, return_tensors="pt").to(model.device)
234
-
235
- # Generate
236
- with torch.no_grad():
237
- outputs = model.generate(
238
- input_ids,
239
- max_length=input_ids.shape[1] + request.max_tokens,
240
- temperature=request.temperature,
241
- do_sample=True,
242
- top_p=request.top_p,
243
- repetition_penalty=1.1,
244
- pad_token_id=tokenizer.eos_token_id
245
- )
246
-
247
- # Decode response
248
- response = tokenizer.decode(outputs[0], skip_special_tokens=True)
249
- tokens_used = outputs.shape[1]
250
-
251
- return ChatResponse(response=response, tokens_used=tokens_used)
252
-
253
- except Exception as e:
254
- raise HTTPException(status_code=500, detail=str(e))
255
 
256
- @app.get("/health")
257
- async def health_check():
258
- return {"status": "healthy", "model": "Helion-V1"}
259
 
260
- if __name__ == "__main__":
261
- uvicorn.run(app, host="0.0.0.0", port=8000)
 
26
  - **Finetuned from:** Troviku-1.1
27
  - **Model Size:** 7B parameters
28
  - **Context Length:** 4096 tokens
29
+ - **Architecture:** Transformer-based decoder-only
30
+ - **Training Method:** Supervised Fine-Tuning + RLHF
31
 
32
  ## Model Capabilities
33
 
 
37
  - Creative writing and content creation
38
  - Problem solving and reasoning
39
  - Safe and ethical responses
40
+ - Task-oriented assistance
41
+ - Technical support
42
 
43
+ ## Quick Start
44
+
45
+ ### Installation
46
 
47
  ```bash
48
+ # Install required packages
49
  pip install transformers torch accelerate
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
+ # For additional features
52
+ pip install bitsandbytes scipy
 
53
 
54
+ # For web interfaces
55
+ pip install gradio streamlit fastapi uvicorn