hostserver3

Runtime error

App Files Files Community

abdullahalioo commited on May 21

Commit

eb96984

verified ·

1 Parent(s): 0bf5acd

Update main.py

Browse files

Files changed (1) hide show

main.py +25 -7

main.py CHANGED Viewed

@@ -21,6 +21,10 @@ model_name = "microsoft/DialoGPT-small"
 tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=cache_dir)
 model = AutoModelForCausalLM.from_pretrained(model_name, cache_dir=cache_dir)
 # Set device
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 model.to(device)
@@ -40,21 +44,30 @@ app.add_middleware(
 class Question(BaseModel):
     question: str
-SYSTEM_PROMPT = "You are a helpful, professional, and highly persuasive sales assistant for a premium web development and AI service website. Your tone is friendly, respectful, and high-end, making users feel valued. The website offers custom-built 2D and 3D websites based on client needs (pricing: $200 to $600, depending on features and demand) and a one-time-payment, free and unlimited AI chatbot for $119, fully customizable for the user's website. Your primary goals are to drive sales of the website services and chatbots, clearly explain the benefits and pricing, show extra respect and premium care to users, and encourage users to take action. Greet users warmly and thank them for visiting, highlight how custom and premium your service is, offer to help based on their ideas and needs, gently upsell especially emphasizing the one-time AI chatbot offer, and always respond in a concise, friendly, and confident tone. Use language that shows appreciation, such as “We truly value your vision,” “Let’s bring your dream project to life,” or “As a premium client, you deserve the best.” Mention when needed: custom 2D/3D websites from $200 to $600 depending on requirements, lifetime AI chatbot for $119 with no monthly fees and unlimited use, fast development, full support, and high-end quality. Never say “I don’t know,” “That’s not possible,” or “Sorry.” Always say “I’ll help you with that,” “Here’s what we can do,” or “That’s a great idea!”"
-chat_history_ids = None  # for continuous conversation
 async def generate_response_chunks(prompt: str):
     global chat_history_ids
-    new_input_ids = tokenizer.encode(SYSTEM_PROMPT + " User: " + prompt + " Bot:", return_tensors='pt').to(device)
     if chat_history_ids is not None:
         input_ids = torch.cat([chat_history_ids, new_input_ids], dim=-1)
     else:
         input_ids = new_input_ids
-    attention_mask = input_ids.ne(tokenizer.pad_token_id).long()
     output_ids = model.generate(
         input_ids,
         attention_mask=attention_mask,
@@ -65,15 +78,20 @@ async def generate_response_chunks(prompt: str):
         pad_token_id=tokenizer.eos_token_id
     )
-    chat_history_ids = output_ids  # update history
     response = tokenizer.decode(output_ids[:, input_ids.shape[-1]:][0], skip_special_tokens=True)
     for word in response.split():
         yield word + " "
         await asyncio.sleep(0.03)
 @app.post("/ask")
 async def ask(question: Question):
-    return StreamingResponse(generate_response_chunks(question.question), media_type="text/plain")

 tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=cache_dir)
 model = AutoModelForCausalLM.from_pretrained(model_name, cache_dir=cache_dir)
+# Set pad token if not defined
+if tokenizer.pad_token is None:
+    tokenizer.pad_token = tokenizer.eos_token
 # Set device
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 model.to(device)
 class Question(BaseModel):
     question: str
+SYSTEM_PROMPT = "You are a helpful, professional, and highly persuasive sales assistant..."
+chat_history_ids = None
 async def generate_response_chunks(prompt: str):
     global chat_history_ids
+    # Combine system prompt and user input
+    input_text = SYSTEM_PROMPT + "\nUser: " + prompt + "\nBot:"
+    new_input_ids = tokenizer.encode(input_text, return_tensors='pt').to(device)
+    # Create attention mask (handle case where pad_token_id might be None)
+    attention_mask = torch.ones_like(new_input_ids)
     if chat_history_ids is not None:
         input_ids = torch.cat([chat_history_ids, new_input_ids], dim=-1)
+        attention_mask = torch.cat([
+            torch.ones_like(chat_history_ids),
+            attention_mask
+        ], dim=-1)
     else:
         input_ids = new_input_ids
+    # Generate response
     output_ids = model.generate(
         input_ids,
         attention_mask=attention_mask,
         pad_token_id=tokenizer.eos_token_id
     )
+    # Update chat history
+    chat_history_ids = output_ids
+    # Decode only the new tokens
     response = tokenizer.decode(output_ids[:, input_ids.shape[-1]:][0], skip_special_tokens=True)
+    # Stream the response
     for word in response.split():
         yield word + " "
         await asyncio.sleep(0.03)
 @app.post("/ask")
 async def ask(question: Question):
+    return StreamingResponse(
+        generate_response_chunks(question.question),
+        media_type="text/plain"
+    )