Spaces:

Rafay17
/

chatbot

Build error

Rafay17 commited on Oct 13, 2024

Commit

6982029

verified ·

1 Parent(s): 5f24d8f

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,37 +1,34 @@
-import gradio as gr
-from transformers import AutoTokenizer, FastLanguageModel
 # Load the model and tokenizer
-model, tokenizer = FastLanguageModel.from_pretrained(
-    model_name="lora_model",  # Replace with your trained model name
-    max_seq_length=512,
-    dtype="float16",
-    load_in_4bit=True,
-)
-FastLanguageModel.for_inference(model)
-# Define the inference function
-def generate_response(user_input):
-    # Prepare the input for the model
-    labeled_prompt = (
-        "Please provide the response with the following labels:\n"
-        f"User Input: {user_input}\n"
-        "Response:"
-    )
-    inputs = tokenizer(
-        [labeled_prompt],
-        return_tensors="pt",
-        padding=True,
-        truncation=True,
-        max_length=512,
-    ).to("cuda")
-    response = model.generate(input_ids=inputs.input_ids, attention_mask=inputs.attention_mask, max_new_tokens=100, pad_token_id=tokenizer.eos_token_id)
-    return tokenizer.decode(response[0], skip_special_tokens=True)
-# Create a Gradio interface
-iface = gr.Interface(fn=generate_response, inputs="text", outputs="text", title="Chatbot Interface", description="Enter your message below:")
-# Launch the app
-iface.launch()

+import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer
 # Load the model and tokenizer
+model_name = "Rafay17/Llama3.2_1b_customModel2"
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModelForCausalLM.from_pretrained(model_name).to("cuda")  # Ensure to load the model on GPU
+# Prepare the model for inference
+model.eval()
+# Define a function to generate responses
+def generate_response(input_text):
+    # Prepare the input for the model
+    inputs = tokenizer(input_text, return_tensors="pt").to("cuda")
+    # Set up the text streamer to stream the generated response
+    text_streamer = TextStreamer(tokenizer, skip_prompt=True)
+    # Generate the response
+    with torch.no_grad():
+        model.generate(
+            input_ids=inputs.input_ids,
+            attention_mask=inputs.attention_mask,
+            streamer=text_streamer,
+            max_new_tokens=64,  # Adjust this value as needed
+            pad_token_id=tokenizer.eos_token_id,
+        )
+# Example usage of the generate_response function
+input_text = "Hello, how can I help you today?"
+print("Generating response for input:")
+print(input_text)
+generate_response(input_text)