Spaces:

Marcus719
/

ID2223_Lab2

Sleeping

App Files Files Community

Marcus719 commited on Nov 30, 2025

Commit

2a8403d

verified ·

1 Parent(s): d94e47a

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -134

app.py CHANGED Viewed

@@ -1,141 +1,30 @@
 import gradio as gr
-from huggingface_hub import hf_hub_download
-from llama_cpp import Llama
-# ============================================
-# 配置区域 - KTH ID2223 Lab 2
-# ============================================
-MODEL_REPO = "Marcus719/Llama-3.2-3B-Instruct-FineTome-Lab2-GGUF"
-MODEL_FILENAME = "unsloth.Q4_K_M.gguf"
-# ============================================
-# 下载并加载模型
-# ============================================
-print(f"📥 Downloading model from {MODEL_REPO}...")
-model_path = hf_hub_download(
-    repo_id=MODEL_REPO,
-    filename=MODEL_FILENAME,
 )
-print(f"✅ Model downloaded: {model_path}")
-print("🔄 Loading model (this may take a minute on CPU)...")
-llm = Llama(
-    model_path=model_path,
-    n_ctx=2048,
-    n_threads=2,
-    n_gpu_layers=0,
-    verbose=False
 )
-print("✅ Model loaded successfully!")
-# ============================================
-# Llama 3.2 Instruct 对话模板
-# ============================================
-def format_prompt(message: str, history: list, system_prompt: str) -> str:
-    prompt = f"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{system_prompt}<|eot_id|>"
-    for user_msg, assistant_msg in history:
-        if user_msg:
-            prompt += f"<|start_header_id|>user<|end_header_id|>\n\n{user_msg}<|eot_id|>"
-        if assistant_msg:
-            prompt += f"<|start_header_id|>assistant<|end_header_id|>\n\n{assistant_msg}<|eot_id|>"
-    prompt += f"<|start_header_id|>user<|end_header_id|>\n\n{message}<|eot_id|>"
-    prompt += f"<|start_header_id|>assistant<|end_header_id|>\n\n"
-    return prompt
-# ============================================
-# 生成回复函数
-# ============================================
-def chat(message: str, history: list, system_prompt: str, max_tokens: int, temperature: float, top_p: float):
-    prompt = format_prompt(message, history, system_prompt)
-    response = ""
-    stream = llm(
-        prompt,
-        max_tokens=max_tokens,
-        temperature=temperature,
-        top_p=top_p,
-        stop=["<|eot_id|>", "<|end_of_text|>"],
-        stream=True
-    )
-    for chunk in stream:
-        token = chunk["choices"][0]["text"]
-        response += token
-        yield response
-# ============================================
-# Gradio 界面
-# ============================================
-DEFAULT_SYSTEM_PROMPT = "You are a helpful, respectful and honest assistant."
-with gr.Blocks(theme=gr.themes.Soft(), title="🦙 Llama 3.2 ChatBot") as demo:
-    gr.Markdown(
-        """
-        # 🦙 Llama 3.2 3B - Fine-tuned on FineTome
-        **KTH ID2223 Lab 2** | [Model](https://huggingface.co/Marcus719/Llama-3.2-3B-Instruct-FineTome-Lab2-GGUF)
-        """
-    )
-    chatbot = gr.Chatbot(label="Chat", height=400, show_copy_button=True)
-    with gr.Row():
-        msg = gr.Textbox(placeholder="Type your message...", scale=4, container=False)
-        submit_btn = gr.Button("Send 🚀", scale=1, variant="primary")
-    with gr.Accordion("⚙️ Settings", open=False):
-        system_prompt = gr.Textbox(label="System Prompt", value=DEFAULT_SYSTEM_PROMPT, lines=2)
-        with gr.Row():
-            max_tokens = gr.Slider(64, 512, value=256, step=32, label="Max Tokens")
-            temperature = gr.Slider(0.1, 1.5, value=0.7, step=0.1, label="Temperature")
-            top_p = gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-p")
-    with gr.Row():
-        clear_btn = gr.Button("🗑️ Clear")
-        retry_btn = gr.Button("🔄 Retry")
-    gr.Examples(
-        examples=["Hello!", "Explain machine learning.", "What is fine-tuning?"],
-        inputs=msg
-    )
-    def user_input(message, history):
-        return "", history + [[message, None]]
-    def bot_response(history, system_prompt, max_tokens, temperature, top_p):
-        if not history:
-            return history
-        message = history[-1][0]
-        history_for_model = history[:-1]
-        for response in chat(message, history_for_model, system_prompt, max_tokens, temperature, top_p):
-            history[-1][1] = response
-            yield history
-    def retry_last(history, system_prompt, max_tokens, temperature, top_p):
-        if history:
-            history[-1][1] = None
-            message = history[-1][0]
-            history_for_model = history[:-1]
-            for response in chat(message, history_for_model, system_prompt, max_tokens, temperature, top_p):
-                history[-1][1] = response
-                yield history
-    msg.submit(user_input, [msg, chatbot], [msg, chatbot], queue=False).then(
-        bot_response, [chatbot, system_prompt, max_tokens, temperature, top_p], chatbot
-    )
-    submit_btn.click(user_input, [msg, chatbot], [msg, chatbot], queue=False).then(
-        bot_response, [chatbot, system_prompt, max_tokens, temperature, top_p], chatbot
-    )
-    clear_btn.click(lambda: [], None, chatbot, queue=False)
-    retry_btn.click(retry_last, [chatbot, system_prompt, max_tokens, temperature, top_p], chatbot)
-    gr.Markdown("---\nBuilt with ❤️ | KTH ID2223 Lab 2")
-if __name__ == "__main__":
-    demo.queue().launch()

 import gradio as gr
+from transformers import AutoTokenizer, AutoModelForCausalLM
+model_name = "Marcus719/Llama-3.2-3B-Instruct-Lab2"
+# load tokenizer and model
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModelForCausalLM.from_pretrained(
+    model_name,
+    low_cpu_mem_usage=True,
+    device_map="auto"
 )
+# define generate function
+def generate_text(input_text):
+    inputs = tokenizer(input_text, return_tensors="pt")
+    outputs = model.generate(inputs["input_ids"], max_length=100, num_return_sequences=1)
+    return tokenizer.decode(outputs[0], skip_special_tokens=True)
+# create gradio interface
+interface = gr.Interface(
+    fn=generate_text,
+    inputs="text",
+    outputs="text",
+    title="Hugging Face model Demo",
+    description="say something"
 )
+# launch the app
+interface.launch()