import gradio as gr import time import random import json import mysql.connector import os import csv import spaces import torch from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer from threading import Thread from typing import Iterator from huggingface_hub import Repository, hf_hub_download from datetime import datetime # for fetch_personalized_data import mysql.connector import urllib.parse import urllib.request # for saving chat history as JSON - not used import atexit import os from huggingface_hub import HfApi, HfFolder # for saving chat history as dataset - not used import huggingface_hub from huggingface_hub import Repository from datetime import datetime # for saving chat history as dataset - used import sqlite3 import huggingface_hub import gradio as gr import pandas as pd import shutil import os import datetime from apscheduler.schedulers.background import BackgroundScheduler DATASET_REPO_URL = "https://huggingface.co/datasets/botsi/trust-game-llama-2-chat-history" DATA_DIRECTORY = "data" # Separate directory for storing data files DATA_FILENAME = "newmarion.csv" # Default filename DATA_FILE = os.path.join("data", DATA_FILENAME) HF_TOKEN = os.environ.get("HF_TOKEN") print("is none?", HF_TOKEN is None) print("hfh", huggingface_hub.__version__) repo = Repository( local_dir=DATA_DIRECTORY, clone_from=DATASET_REPO_URL ) MAX_MAX_NEW_TOKENS = 2048 DEFAULT_MAX_NEW_TOKENS = 1024 MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096")) DESCRIPTION = """\ # This is your personal space to chat. You can ask anything. """ # License and Acceptable Use Policy by Meta LICENSE = """

--- This demo is governed by the [original license](https://ai.meta.com/llama/license/) and [acceptable use policy](https://ai.meta.com/llama/use-policy/). The most recent copy of this policy can be found at ai.meta.com/llama/use-policy. """ if not torch.cuda.is_available(): DESCRIPTION += "\n

Running on CPU 🥶 This demo does not work on CPU.

" if torch.cuda.is_available(): model_id = "meta-llama/Llama-2-7b-chat-hf" model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16, device_map="auto") tokenizer = AutoTokenizer.from_pretrained(model_id) tokenizer.use_default_system_prompt = False ## trust-game-llama-2-7b-chat # app.py def get_default_system_prompt(): BSYS, ESYS = "<>\n", "\n<>\n\n" DEFAULT_SYSTEM_PROMPT = f"""You are a friendly chat assistant. First, wait for a question. Do not talk before being asked. When the user says hi, politely ask what he/she wants to know. Do not tell any more! If asked about Marion, you can answer the following: There are only two Marions, one is currently working at Accenture and the other one will shortly begin a new job at BMW. She recently quit Accenture. Nicknames by her friends include botsi and botsokrates. Her favorite project was Hypercare. Kathy/Kathi is her colleague at Accenture. Anita is her colleague at Accenture. """ print(DEFAULT_SYSTEM_PROMPT) return DEFAULT_SYSTEM_PROMPT ## trust-game-llama-2-7b-chat # app.py def construct_input_prompt(chat_history, message): input_prompt = f"[INST] <>\n{get_default_system_prompt()}\n<>\n\n " for user, assistant in chat_history: input_prompt += f"{user} [/INST] {assistant} [INST] " input_prompt += f"{message} [/INST] " return input_prompt ## trust-game-llama-2-7b-chat # app.py @spaces.GPU def generate( request: gr.Request, # To fetch query params message: str, chat_history: list[tuple[str, str]], # input_prompt: str, max_new_tokens: int = 1024, temperature: float = 0.6, top_p: float = 0.9, top_k: int = 50, repetition_penalty: float = 1.2, ) -> Iterator[str]: # Change return type hint to Iterator[str] conversation = [] # Fetch query params params = { key: value for key, value in request.query_params.items() } print('those are the query params') print(params) print("Request headers dictionary:", request.headers) print("IP address:", request.client.host) print("Query parameters:", params) # Construct the input prompt using the functions from the system_prompt_config module input_prompt = construct_input_prompt(chat_history, message) # Move the condition here after the assignment if input_prompt: conversation.append({"role": "system", "content": input_prompt}) # Convert input prompt to tensor input_ids = tokenizer(input_prompt, return_tensors="pt").to(model.device) for user, assistant in chat_history: conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}]) conversation.append({"role": "user", "content": message}) input_ids = tokenizer.apply_chat_template(conversation, return_tensors="pt") if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH: input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:] gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.") input_ids = input_ids.to(model.device) # Set up the TextIteratorStreamer streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True) # Set up the generation arguments generate_kwargs = dict( input_ids=input_ids, streamer=streamer, max_new_tokens=max_new_tokens, do_sample=True, top_p=top_p, top_k=top_k, temperature=temperature, num_beams=1, repetition_penalty=repetition_penalty, ) # Start the model generation thread t = Thread(target=model.generate, kwargs=generate_kwargs) t.start() # Yield generated text chunks outputs = [] for text in streamer: outputs.append(text) yield "".join(outputs) # Fix bug that last answer is not recorded! # Parse the outputs into a readable sentence and record them # Filter out empty strings and join the remaining strings with spaces readable_sentence = ' '.join(filter(lambda x: x.strip(), outputs)) # Print the readable sentence print(readable_sentence) # Save chat history to .csv file on HuggingFace Hub # Generate filename with bot id and session id filename = f"{DATA_FILENAME}" data_file = os.path.join(DATA_DIRECTORY, filename) # Generate timestamp timestamp = datetime.datetime.now() # Check if the file already exists if os.path.exists(data_file): # If file exists, load existing data existing_data = pd.read_csv(data_file) # Add timestamp column conversation_df = pd.DataFrame(conversation) conversation_df['ip_address'] = request.client.host conversation_df['readable_sentence'] = readable_sentence conversation_df['timestamp'] = timestamp # Append new conversation to existing data updated_data = pd.concat([existing_data, conversation_df], ignore_index=True) updated_data.to_csv(data_file, index=False) else: # If file doesn't exist, create new file with conversation data conversation_df = pd.DataFrame(conversation) conversation_df['ip_address'] = request.client.host conversation_df['readable_sentence'] = readable_sentence conversation_df['timestamp'] = timestamp conversation_df.to_csv(data_file, index=False) print("Updating .csv") repo.push_to_hub(blocking=False, commit_message=f"Updating data at {timestamp}") chat_interface = gr.ChatInterface( fn=generate, retry_btn=None, clear_btn=None, undo_btn=None, chatbot=gr.Chatbot(avatar_images=('user.png', 'bot.png'), bubble_full_width=False), examples=[ ["What is your favorite fruit?"], ["What do you think about AI in the workplace?"], ], ) with gr.Blocks(css="style.css", theme=gr.themes.Default(primary_hue=gr.themes.colors.emerald, secondary_hue=gr.themes.colors.indigo)) as demo: gr.Markdown(DESCRIPTION) chat_interface.render() gr.Markdown(LICENSE) if __name__ == "__main__": demo.queue(max_size=20).launch() # Launching the interface with queueing and maximum size limit # demo.launch(share=True, debug=True) # Uncomment this line if you want to launch the interface with sharing and debug mode '''# Original code from https://huggingface.co/spaces/huggingface-projects/llama-2-7b-chat # Modified for trust game purposes import gradio as gr import time import random import json import mysql.connector import os import csv import spaces import torch from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer from threading import Thread from typing import Iterator from huggingface_hub import Repository, hf_hub_download from datetime import datetime # for fetch_personalized_data import mysql.connector import urllib.parse import urllib.request # for saving chat history as JSON - not used import atexit import os from huggingface_hub import HfApi, HfFolder # for saving chat history as dataset - not used import huggingface_hub from huggingface_hub import Repository from datetime import datetime # for saving chat history as dataset - used import sqlite3 import huggingface_hub import gradio as gr import pandas as pd import shutil import os import datetime from apscheduler.schedulers.background import BackgroundScheduler DATASET_REPO_URL = "https://huggingface.co/datasets/botsi/trust-game-llama-2-chat-history" DATA_DIRECTORY = "data" # Separate directory for storing data files DATA_FILENAME = "marion.csv" # Default filename DATA_FILE = os.path.join("data", DATA_FILENAME) HF_TOKEN = os.environ.get("HF_TOKEN") print("is none?", HF_TOKEN is None) print("hfh", huggingface_hub.__version__) repo = Repository( local_dir=DATA_DIRECTORY, clone_from=DATASET_REPO_URL ) MAX_MAX_NEW_TOKENS = 2048 DEFAULT_MAX_NEW_TOKENS = 1024 MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096")) DESCRIPTION = """\ # This is your personal space to chat. You can ask anything. """ # License and Acceptable Use Policy by Meta LICENSE = """

--- This demo is governed by the [original license](https://ai.meta.com/llama/license/) and [acceptable use policy](https://ai.meta.com/llama/use-policy/). The most recent copy of this policy can be found at ai.meta.com/llama/use-policy. """ if not torch.cuda.is_available(): DESCRIPTION += "\n

Running on CPU 🥶 This demo does not work on CPU.

" if torch.cuda.is_available(): model_id = "meta-llama/Llama-2-7b-chat-hf" model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16, device_map="auto") tokenizer = AutoTokenizer.from_pretrained(model_id) tokenizer.use_default_system_prompt = False ## trust-game-llama-2-7b-chat # app.py def get_default_system_prompt(): #BOS, EOS = "", "" #BINST, EINST = "[INST]", "[/INST]" BSYS, ESYS = "<>\n", "\n<>\n\n" DEFAULT_SYSTEM_PROMPT = f"""You are a friendly chat assistant. First, wait for a question. Do not talk before being asked. When the user says hi, politely ask what he/she wants to know. Do not tell any more! If asked about Marion, you can answer the following: There are only two Marions, one is currently working at Accenture and the other one will shortly begin a new job at BMW. She recently quit Accenture. Nicknames by her friends include botsi and botsokrates. Her favorite project was Hypercare. Kathy/Kathi is her colleague at Accenture. Anita is her colleague at Accenture. """ print(DEFAULT_SYSTEM_PROMPT) return DEFAULT_SYSTEM_PROMPT ## trust-game-llama-2-7b-chat # app.py def construct_input_prompt(chat_history, message): input_prompt = f"[INST] <>\n{get_default_system_prompt()}\n<>\n\n " for user, assistant in chat_history: input_prompt += f"{user} [/INST] {assistant} [INST] " input_prompt += f"{message} [/INST] " return input_prompt ## trust-game-llama-2-7b-chat # app.py @spaces.GPU def generate( request: gr.Request, # To fetch query params message: str, chat_history: list[tuple[str, str]], # input_prompt: str, max_new_tokens: int = 1024, temperature: float = 0.6, top_p: float = 0.9, top_k: int = 50, repetition_penalty: float = 1.2, ) -> Iterator[str]: # Change return type hint to Iterator[str] conversation = [] # Fetch query params params = { key: value for key, value in gr.Request.query_params.items() } print('those are the query params') print(params) print("Request headers dictionary:", gr.Request.headers) print("IP address:", gr.Request.client.host) print("Query parameters:", params) # Construct the input prompt using the functions from the system_prompt_config module input_prompt = construct_input_prompt(chat_history, message) # Move the condition here after the assignment if input_prompt: conversation.append({"role": "system", "content": input_prompt}) # Convert input prompt to tensor input_ids = tokenizer(input_prompt, return_tensors="pt").to(model.device) for user, assistant in chat_history: conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}]) conversation.append({"role": "user", "content": message}) input_ids = tokenizer.apply_chat_template(conversation, return_tensors="pt") if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH: input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:] gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.") input_ids = input_ids.to(model.device) # Set up the TextIteratorStreamer streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True) # Set up the generation arguments generate_kwargs = dict( {"input_ids": input_ids}, streamer=streamer, max_new_tokens=max_new_tokens, do_sample=True, top_p=top_p, top_k=top_k, temperature=temperature, num_beams=1, repetition_penalty=repetition_penalty, ) # Start the model generation thread t = Thread(target=model.generate, kwargs=generate_kwargs) t.start() # Yield generated text chunks outputs = [] for text in streamer: outputs.append(text) yield "".join(outputs) # Fix bug that last answer is not recorded! # Parse the outputs into a readable sentence and record them # Filter out empty strings and join the remaining strings with spaces readable_sentence = ' '.join(filter(lambda x: x.strip(), outputs)) # Print the readable sentence print(readable_sentence) # Save chat history to .csv file on HuggingFace Hub # Generate filename with bot id and session id filename = f"{DATA_FILENAME}" data_file = os.path.join(DATA_DIRECTORY, filename) # Generate timestamp timestamp = datetime.datetime.now() # Check if the file already exists if os.path.exists(data_file): # If file exists, load existing data existing_data = pd.read_csv(data_file) # Add timestamp column conversation_df = pd.DataFrame(conversation) conversation_df['ip_address'] = request.client.host conversation_df['readable_sentence'] = readable_sentence conversation_df['timestamp'] = timestamp # Append new conversation to existing data updated_data = pd.concat([existing_data, conversation_df], ignore_index=True) updated_data.to_csv(data_file, index=False) else: # If file doesn't exist, create new file with conversation data conversation_df = pd.DataFrame(conversation) conversation_df['ip_address'] = request.client.host conversation_df['readable_sentence'] = readable_sentence conversation_df['timestamp'] = timestamp conversation_df.to_csv(data_file, index=False) print("Updating .csv") repo.push_to_hub(blocking=False, commit_message=f"Updating data at {timestamp}") chat_interface = gr.ChatInterface( fn=generate, retry_btn=None, clear_btn=None, undo_btn=None, chatbot=gr.Chatbot(avatar_images=('user.png', 'bot.png'), bubble_full_width = False), examples=[ ["What is your favorite fruit?"], ["What do you think about AI in the workplace?"], ], ) with gr.Blocks(css="style.css", theme=gr.themes.Default(primary_hue=gr.themes.colors.emerald,secondary_hue=gr.themes.colors.indigo)) as demo: gr.Markdown(DESCRIPTION) chat_interface.render() gr.Markdown(LICENSE) if __name__ == "__main__": demo.queue(max_size=20).launch() #demo.queue(max_size=20) demo.launch(share=True, debug=True) '''