DeepXR
/

Helion-V1

@@ -26,6 +26,8 @@ Helion-V1 is a conversational AI model designed to be helpful, harmless, and hon
 - **Finetuned from:** Troviku-1.1
 - **Model Size:** 7B parameters
 - **Context Length:** 4096 tokens
 ## Model Capabilities
@@ -35,227 +37,19 @@ Helion-V1 is a conversational AI model designed to be helpful, harmless, and hon
 -  Creative writing and content creation
 -  Problem solving and reasoning
 -  Safe and ethical responses
-## Installation
 ```bash
 pip install transformers torch accelerate
-from transformers import AutoTokenizer, AutoModelForCausalLM
-import torch
-model_name = "DeepXR/Helion-V1"
-tokenizer = AutoTokenizer.from_pretrained(model_name)
-model = AutoModelForCausalLM.from_pretrained(
-    model_name,
-    torch_dtype=torch.float16,
-    device_map="auto"
-)
-def chat_with_helion(prompt, max_length=512, temperature=0.7):
-    messages = [
-        {"role": "user", "content": prompt}
-    ]
-    input_ids = tokenizer.apply_chat_template(
-        messages,
-        return_tensors="pt"
-    ).to(model.device)
-    with torch.no_grad():
-        outputs = model.generate(
-            input_ids,
-            max_length=max_length,
-            temperature=temperature,
-            do_sample=True,
-            pad_token_id=tokenizer.eos_token_id,
-            top_p=0.9,
-            repetition_penalty=1.1
-        )
-    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    return response
-# Example usage
-prompt = "Explain the concept of machine learning in simple terms."
-response = chat_with_helion(prompt)
-print(response)
-from transformers import pipeline
-# Create a chat pipeline
-chat_pipeline = pipeline(
-    "text-generation",
-    model="DeepXR/Helion-V1",
-    tokenizer=model_name,
-    device_map="auto",
-    torch_dtype=torch.float16
-)
-def conversational_chat(messages, max_new_tokens=256):
-    formatted_prompt = tokenizer.apply_chat_template(
-        messages,
-        tokenize=False,
-        add_generation_prompt=True
-    )
-    outputs = chat_pipeline(
-        formatted_prompt,
-        max_new_tokens=max_new_tokens,
-        temperature=0.7,
-        do_sample=True,
-        top_p=0.9,
-        repetition_penalty=1.1
-    )
-    return outputs[0]['generated_text']
-# Multi-turn conversation
-conversation = [
-    {"role": "user", "content": "What's the weather like today?"},
-    {"role": "assistant", "content": "I don't have real-time weather data, but I can help you understand weather patterns or find weather services!"},
-    {"role": "user", "content": "Can you explain how weather forecasting works?"}
-]
-response = conversational_chat(conversation)
-print(response)import streamlit as st
-import torch
-from transformers import AutoTokenizer, AutoModelForCausalLM
-@st.cache_resource
-def load_model():
-    model_name = "DeepXR/Helion-V1"
-    tokenizer = AutoTokenizer.from_pretrained(model_name)
-    model = AutoModelForCausalLM.from_pretrained(
-        model_name,
-        torch_dtype=torch.float16,
-        device_map="auto"
-    )
-    return tokenizer, model
-def generate_response(prompt, tokenizer, model, max_length=512):
-    messages = [{"role": "user", "content": prompt}]
-    input_ids = tokenizer.apply_chat_template(
-        messages,
-        return_tensors="pt"
-    ).to(model.device)
-    with torch.no_grad():
-        outputs = model.generate(
-            input_ids,
-            max_length=max_length,
-            temperature=0.7,
-            do_sample=True,
-            top_p=0.9,
-            repetition_penalty=1.1,
-            pad_token_id=tokenizer.eos_token_id
-        )
-    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    return response
-# Streamlit UI
-st.set_page_config(page_title="Helion-V1 Chat", page_icon="🤖")
-st.title("Helion-V1 Chat Interface")
-st.write("Chat with the Helion-V1 AI assistant")
-# Initialize session state
-if "messages" not in st.session_state:
-    st.session_state.messages = []
-# Load model
-with st.spinner("Loading Helion-V1 model..."):
-    tokenizer, model = load_model()
-# Display chat messages
-for message in st.session_state.messages:
-    with st.chat_message(message["role"]):
-        st.markdown(message["content"])
-# Chat input
-if prompt := st.chat_input("What would you like to know?"):
-    # Add user message to chat history
-    st.session_state.messages.append({"role": "user", "content": prompt})
-    with st.chat_message("user"):
-        st.markdown(prompt)
-    # Generate response
-    with st.chat_message("assistant"):
-        with st.spinner("Thinking..."):
-            response = generate_response(prompt, tokenizer, model)
-            st.markdown(response)
-    # Add assistant response to chat history
-    st.session_state.messages.append({"role": "assistant", "content": response})from fastapi import FastAPI, HTTPException
-from pydantic import BaseModel
-import torch
-from transformers import AutoTokenizer, AutoModelForCausalLM
-import uvicorn
-from typing import List, Optional
-app = FastAPI(title="Helion-V1 API", version="1.0.0")
-class ChatMessage(BaseModel):
-    role: str
-    content: str
-class ChatRequest(BaseModel):
-    messages: List[ChatMessage]
-    max_tokens: Optional[int] = 512
-    temperature: Optional[float] = 0.7
-    top_p: Optional[float] = 0.9
-class ChatResponse(BaseModel):
-    response: str
-    tokens_used: int
-# Load model globally
-@app.on_event("startup")
-async def load_model():
-    global tokenizer, model
-    model_name = "DeepXR/Helion-V1"
-    tokenizer = AutoTokenizer.from_pretrained(model_name)
-    model = AutoModelForCausalLM.from_pretrained(
-        model_name,
-        torch_dtype=torch.float16,
-        device_map="auto"
-    )
-@app.post("/chat", response_model=ChatResponse)
-async def chat_endpoint(request: ChatRequest):
-    try:
-        # Format messages
-        formatted_prompt = tokenizer.apply_chat_template(
-            [msg.dict() for msg in request.messages],
-            tokenize=False,
-            add_generation_prompt=True
-        )
-        # Tokenize
-        input_ids = tokenizer.encode(formatted_prompt, return_tensors="pt").to(model.device)
-        # Generate
-        with torch.no_grad():
-            outputs = model.generate(
-                input_ids,
-                max_length=input_ids.shape[1] + request.max_tokens,
-                temperature=request.temperature,
-                do_sample=True,
-                top_p=request.top_p,
-                repetition_penalty=1.1,
-                pad_token_id=tokenizer.eos_token_id
-            )
-        # Decode response
-        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-        tokens_used = outputs.shape[1]
-        return ChatResponse(response=response, tokens_used=tokens_used)
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=str(e))
-@app.get("/health")
-async def health_check():
-    return {"status": "healthy", "model": "Helion-V1"}
-if __name__ == "__main__":
-    uvicorn.run(app, host="0.0.0.0", port=8000)

 - **Finetuned from:** Troviku-1.1
 - **Model Size:** 7B parameters
 - **Context Length:** 4096 tokens
+- **Architecture:** Transformer-based decoder-only
+- **Training Method:** Supervised Fine-Tuning + RLHF
 ## Model Capabilities
 -  Creative writing and content creation
 -  Problem solving and reasoning
 -  Safe and ethical responses
+-  Task-oriented assistance
+-  Technical support
+## Quick Start
+### Installation
 ```bash
+# Install required packages
 pip install transformers torch accelerate
+# For additional features
+pip install bitsandbytes scipy
+# For web interfaces
+pip install gradio streamlit fastapi uvicorn