Spaces:

Rafay17
/

chatbot

Build error

App Files Files Community

Rafay17 commited on Oct 13, 2024

Commit

a3af99d

verified ·

1 Parent(s): 6982029

Update app.py

Browse files

Files changed (1) hide show

app.py +67 -14

app.py CHANGED Viewed

@@ -1,34 +1,87 @@
 import torch
-from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer
 # Load the model and tokenizer
-model_name = "Rafay17/Llama3.2_1b_customModel2"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
-model = AutoModelForCausalLM.from_pretrained(model_name).to("cuda")  # Ensure to load the model on GPU
-# Prepare the model for inference
-model.eval()
-# Define a function to generate responses
 def generate_response(input_text):
     # Prepare the input for the model
-    inputs = tokenizer(input_text, return_tensors="pt").to("cuda")
     # Set up the text streamer to stream the generated response
     text_streamer = TextStreamer(tokenizer, skip_prompt=True)
     # Generate the response
-    with torch.no_grad():
         model.generate(
             input_ids=inputs.input_ids,
             attention_mask=inputs.attention_mask,
             streamer=text_streamer,
-            max_new_tokens=64,  # Adjust this value as needed
             pad_token_id=tokenizer.eos_token_id,
         )
-# Example usage of the generate_response function
-input_text = "Hello, how can I help you today?"
-print("Generating response for input:")
-print(input_text)
-generate_response(input_text)

+from transformers import AutoTokenizer, TextStreamer
+from unsloth import FastLanguageModel
 import torch
 # Load the model and tokenizer
+model_name = "Rafay17/Llama3.2_1b_customModle2"  # Use your specific model name
 tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = FastLanguageModel.from_pretrained(
+    model_name=model_name,
+    max_seq_length=512,  # Adjust as needed
+    dtype="float16",     # Adjust as needed
+    load_in_4bit=True    # Adjust based on your needs
+)
+FastLanguageModel.for_inference(model)  # Call this immediately after loading the model
+# Function to generate a response
 def generate_response(input_text):
+    # Prepare the labeled prompt for the model
+    labeled_prompt = (
+        "Please provide the response with the following labels:\n"
+        "Speaker: [SPEAKER]\n"
+        "Text: [TEXT]\n"
+        "Sentiment: [SENTIMENT]\n"
+        "Emotion: [EMOTION]\n"
+        "Intent: [INTENT]\n"
+        "Tone: [TONE]\n"
+        "Confidence Level: [CONFIDENCE]\n"
+        "Frustration Level: [FRUSTRATION]\n"
+        "Response Length: [LENGTH]\n"
+        "Action Required: [ACTION]\n"
+        "Interruption: [INTERRUPTION]\n"
+        "Cooperation Level: [COOPERATION]\n"
+        "Clarity: [CLARITY]\n"
+        "Objective: [OBJECTIVE]\n"
+        "Timeline: [TIMELINE]\n"
+        "Motivation: [MOTIVATION]\n"
+        "Conversation Stage: [STAGE]\n"
+        "Resolution: [RESOLUTION]\n"
+        "Context: [CONTEXT]\n"
+        "Urgency: [URGENCY]\n"
+        "Problem Type: [PROBLEM]\n"
+        "Key Words: [KEYWORDS]\n"
+        "Expected Detail: [DETAIL]\n"
+        "Time Gap: [TIME]\n"
+        "Client Expectation: [EXPECTATION]\n"
+        "Channel: [CHANNEL]\n"
+        "Power Relationship: [POWER]\n\n"
+        f"User Input: {input_text}\n"
+        "Response:"
+    )
     # Prepare the input for the model
+    inputs = tokenizer(
+        [labeled_prompt],
+        return_tensors="pt",
+        padding=True,
+        truncation=True,
+        max_length=512,  # Ensure this matches your model's max length
+    ).to("cuda")
     # Set up the text streamer to stream the generated response
     text_streamer = TextStreamer(tokenizer, skip_prompt=True)
     # Generate the response
+    with torch.no_grad():  # Disable gradient calculation for inference
         model.generate(
             input_ids=inputs.input_ids,
             attention_mask=inputs.attention_mask,
             streamer=text_streamer,
+            max_new_tokens=100,  # Adjust this value as needed
             pad_token_id=tokenizer.eos_token_id,
         )
+# Function to take user input and generate output
+def user_interaction():
+    while True:
+        user_input = input("Enter conversation details (or type 'exit' to quit): ")
+        if user_input.lower() == 'exit':
+            print("Exiting the program.")
+            break
+        print("Generating response for input:")
+        generate_response(user_input)
+# Start the user interaction
+user_interaction()