Spaces:

LuminLabs
/

flash

Sleeping

App Files Files Community

nova commited on Jan 16

Commit

fa15145

verified ·

1 Parent(s): af61ec2

Create app.py

Browse files

Files changed (1) hide show

app.py +68 -0

app.py ADDED Viewed

	@@ -0,0 +1,68 @@

+import gradio as gr
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
+from threading import Thread
+# Model Configuration
+MODEL_ID = "microsoft/Phi-3.5-mini-instruct"
+# Check GPU
+device = "cuda" if torch.cuda.is_available() else "cpu"
+print(f"🚀 Loading {MODEL_ID} on {device}...")
+try:
+    tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
+    model = AutoModelForCausalLM.from_pretrained(
+        MODEL_ID,
+        torch_dtype=torch.float16 if device == "cuda" else torch.float32,
+        trust_remote_code=True,
+        device_map="auto"
+    )
+except Exception as e:
+    print(f"❌ Error loading model: {e}")
+    # Fallback/Exit logic
+def format_prompt(message, history, system_prompt):
+    # Phi-3 Format
+    # <|system|>\n...<|end|>\n<|user|>\n...<|end|>\n<|assistant|>\n
+    prompt = f"<|system|>\n{system_prompt}<|end|>\n"
+    for user_msg, bot_msg in history:
+        prompt += f"<|user|>\n{user_msg}<|end|>\n<|assistant|>\n{bot_msg}<|end|>\n"
+    prompt += f"<|user|>\n{message}<|end|>\n<|assistant|>\n"
+    return prompt
+def chat(message, history):
+    # Default System Prompt for Lumin
+    SYSTEM_PROMPT = "You are Lumin Flash, a helpful and efficient AI assistant."
+    # 1. Format Input
+    prompt_text = format_prompt(message, history, SYSTEM_PROMPT)
+    inputs = tokenizer(prompt_text, return_tensors="pt").to(device)
+    # 2. Streamer
+    streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
+    # 3. Generate
+    generation_kwargs = dict(
+        inputs,
+        streamer=streamer,
+        max_new_tokens=1024,
+        temperature=0.7,
+        do_sample=True
+    )
+    thread = Thread(target=model.generate, kwargs=generation_kwargs)
+    thread.start()
+    # 4. Yield Output
+    partial_text = ""
+    for new_text in streamer:
+        partial_text += new_text
+        yield partial_text
+# Gradio Interface
+demo = gr.ChatInterface(
+    fn=chat,
+    chatbot=gr.Chatbot(height=600),
+    textbox=gr.Textbox(placeholder="Ask Lumin Flash...", container=False, scale=7),
+    title="Lumin Flash (Phi-3.5)",
+    theme="soft",
+    retry_btn=None,
+    undo_btn=None,
+    clear_btn="Clear",
+)
+if __name__ == "__main__":
+    demo.queue().launch(server_name="0.0.0.0", server_port=7860)