Spaces:

samzito12
/

iris

Sleeping

App Files Files Community

samzito12 commited on Nov 26, 2025

Commit

503ff85

1 Parent(s): 658ec58

updated app.py

Browse files

Files changed (1) hide show

app.py +45 -42

app.py CHANGED Viewed

@@ -1,68 +1,71 @@
 import gradio as gr
-from llama_cpp import Llama
-# Load YOUR fine-tuned model
-model_path = "samzito12/lora_model"
-print("Loading model...")
-llm = Llama.from_pretrained(
-    repo_id=model_path,
-    filename="llama-3.2-3b-instruct.Q8_0.gguf",
-    n_ctx=2048,
-    n_threads=2,
-    verbose=False
 )
-# System prompt to fix identity issue
-SYSTEM_PROMPT = """You are a helpful AI assistant based on Meta's Llama-3.2-3B model, fine-tuned on the FineTome dataset. You are NOT ChatGPT and you are NOT made by OpenAI. You were created as part of a university machine learning project."""
 def chat(message, history):
-    """Generate response from YOUR fine-tuned model"""
-    # Build conversation with system prompt
-    conversation = f"<|start_header_id|>system<|end_header_id|>\n\n{SYSTEM_PROMPT}<|eot_id|>"
-    # Add chat history
     for user_msg, assistant_msg in history:
-        conversation += f"<|start_header_id|>user<|end_header_id|>\n\n{user_msg}<|eot_id|>"
-        conversation += f"<|start_header_id|>assistant<|end_header_id|>\n\n{assistant_msg}<|eot_id|>"
-    # Add current message
-    conversation += f"<|start_header_id|>user<|end_header_id|>\n\n{message}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
-    # Generate response
-    response = llm(
-        conversation,
-        max_tokens=512,
-        temperature=0.7,
-        top_p=0.9,
-        stop=["<|eot_id|>", "<|start_header_id|>"],
-        echo=False
-    )
-    return response['choices'][0]['text'].strip()
-# Create Gradio interface
 demo = gr.ChatInterface(
     chat,
     title="🦙 My Fine-Tuned Llama-3.2-3B Chatbot",
     description="""
-    **Model**: Llama-3.2-3B fine-tuned on FineTome-100k dataset
-    This chatbot uses a custom fine-tuned model, NOT ChatGPT.
-    Created for ID2223 Lab 2 at KTH.
     """,
     examples=[
         "What model are you?",
         "Explain machine learning in simple terms",
-        "Write a Python function to reverse a string",
-        "What is the weather like in Stockholm?"
     ],
-    theme="soft",
-    retry_btn="🔄 Retry",
-    undo_btn="↩️ Undo",
-    clear_btn="🗑️ Clear"
 )
 if __name__ == "__main__":

 import gradio as gr
+from transformers import AutoTokenizer, AutoModelForCausalLM
+import torch
+print("Chargement du modèle...")
+model_name = "samzito12/lora_model"
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+tokenizer.pad_token = tokenizer.eos_token
+model = AutoModelForCausalLM.from_pretrained(
+    model_name,
+    torch_dtype=torch.float16,
+    device_map="cpu",
+    low_cpu_mem_usage=True
 )
+SYSTEM_PROMPT = "You are a helpful AI assistant based on Meta's Llama-3.2-3B model, fine-tuned on the FineTome dataset. You are NOT ChatGPT and NOT made by OpenAI."
 def chat(message, history):
+    # Build conversation
+    conversation = f"System: {SYSTEM_PROMPT}\n\n"
     for user_msg, assistant_msg in history:
+        conversation += f"User: {user_msg}\nAssistant: {assistant_msg}\n"
+    conversation += f"User: {message}\nAssistant:"
+    # Tokenize
+    inputs = tokenizer(conversation, return_tensors="pt", truncate=True, max_length=2048)
+    # Generate
+    with torch.no_grad():
+        outputs = model.generate(
+            **inputs,
+            max_new_tokens=256,
+            temperature=0.7,
+            do_sample=True,
+            pad_token_id=tokenizer.eos_token_id,
+            eos_token_id=tokenizer.eos_token_id
+        )
+    # Decode
+    full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    # Extract assistant's response
+    if "Assistant:" in full_response:
+        response = full_response.split("Assistant:")[-1].strip()
+    else:
+        response = full_response[len(conversation):].strip()
+    return response
 demo = gr.ChatInterface(
     chat,
     title="🦙 My Fine-Tuned Llama-3.2-3B Chatbot",
     description="""
+    **Model:** Llama-3.2-3B fine-tuned on FineTome-100k dataset
+    This is NOT ChatGPT - it's a custom fine-tuned model for ID2223 Lab 2.
     """,
     examples=[
         "What model are you?",
         "Explain machine learning in simple terms",
+        "Write a Python function to reverse a string"
     ],
+    theme="soft"
 )
 if __name__ == "__main__":