Spaces:

LuminLabs
/

lumin_pro

Sleeping

App Files Files Community

nova commited on Jan 10

Commit

5c1061d

verified ·

1 Parent(s): f5ca8dc

Create app.py

Browse files

Files changed (1) hide show

app.py +50 -0

app.py ADDED Viewed

	@@ -0,0 +1,50 @@

+import gradio as gr
+from llama_cpp import Llama
+from huggingface_hub import hf_hub_download
+# PRO: Qwen 2.5 14B Instruct (GGUF) - Q4_K_M
+# Cabe en 16GB RAM (justo pero funciona)
+REPO_ID = "bartowski/Qwen2.5-14B-Instruct-GGUF"
+FILENAME = "Qwen2.5-14B-Instruct-Q4_K_M.gguf"
+print(f"Downloading {FILENAME} from {REPO_ID}...")
+model_path = hf_hub_download(repo_id=REPO_ID, filename=FILENAME)
+print("Loading model...")
+llm = Llama(
+    model_path=model_path,
+    n_ctx=8192,         # Contexto moderado por RAM
+    n_threads=2,        # CPU Friendly
+    verbose=False
+)
+def generate_pro(message, history):
+    # Formato ChatML (Estandard de Qwen)
+    prompt = ""
+    for user_msg, bot_msg in history:
+        prompt += f"<|im_start|>user\n{user_msg}<|im_end|>\n<|im_start|>assistant\n{bot_msg}<|im_end|>\n"
+    prompt += f"<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
+    stream = llm.create_completion(
+        prompt,
+        max_tokens=2048,
+        stop=["<|im_end|>"],
+        stream=True,
+        temperature=0.7,
+        top_p=0.9
+    )
+    partial_text = ""
+    for output in stream:
+        delta = output['choices'][0]['text']
+        partial_text += delta
+        yield partial_text
+chat_interface = gr.ChatInterface(
+    fn=generate_pro,
+    title="🌟 Lumin Pro (Qwen 14B)",
+    description="Running Qwen2.5-14B-Instruct (GGUF). Balanced Power.",
+)
+if __name__ == "__main__":
+    chat_interface.launch(server_name="0.0.0.0", server_port=7860)