Spaces:
Runtime error
Runtime error
File size: 1,583 Bytes
e6da26b | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 | import os
import gradio as ui
from vllm import LLM, SamplingParams
from huggingface_hub import login
HF_TOKEN = os.getenv("HF_TOKEN")
if HF_TOKEN:
login(token=HF_TOKEN)
MODEL_ID = "coder-vansh/cypher_llm_model"
print("🔥 Spawning ZeroGPU vLLM Layer for Gemma 4...")
# vLLM automatically optimizes Gemma 4 architecture using vVRAM distribution
llm = LLM(model=MODEL_ID, trust_remote_code=True, max_model_len=2048)
def predict(message, history):
system_prompt = "You are CYPHER, a casual, witty Hinglish AI companion by Vansh & Aditya. Tagline: \"Not just an AI — YOUR AI.\""
formatted_prompt = f"<|system|>\n{system_prompt}"
if history:
for user_msg, bot_msg in history:
formatted_prompt += f"\n<|user|>\n{user_msg}\n<|assistant|>\n{bot_msg}"
formatted_prompt += f"\n<|user|>\n{message}\n<|assistant|>\n"
sampling_params = SamplingParams(temperature=0.7, max_tokens=250, top_p=0.95)
try:
outputs = llm.generate([formatted_prompt], sampling_params)
response_text = outputs[0].outputs[0].text
return response_text
except Exception as e:
return f"⚠️ Production Glitch: {str(e)}"
with ui.Blocks() as demo:
ui.Markdown("# 🤖 CYPHER AI Live Production Space (ZeroGPU Enabled)")
ui.Markdown("### *Not just an AI — YOUR AI.* | Developed by Vansh & Aditya")
ui.ChatInterface(
fn=predict,
textbox=ui.Textbox(placeholder="Bhai se kuch bhi poocho...", container=False, scale=7),
)
demo.launch(server_name="0.0.0.0", server_port=7860) |