Spaces:
Sleeping
Sleeping
| import re | |
| import time | |
| import streamlit as st | |
| from transformers import pipeline, Conversation, AutoTokenizer | |
| from langdetect import detect | |
| import torch | |
| print(f"Is CUDA available: {torch.cuda.is_available()}") | |
| # True | |
| print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}") | |
| # Tesla T4 | |
| # choose your model here by setting model_chosen_id equal to 1 or 2 | |
| model_chosen_id = 2 | |
| model_name_options = { | |
| 1: "meta-llama/Llama-2-13b-chat-hf", | |
| 2: "BramVanroy/Llama-2-13b-chat-dutch" | |
| } | |
| model_chosen = model_name_options[model_chosen_id] | |
| my_config = {'model_name': model_chosen, 'do_sample': True, 'temperature': 0.1, 'repetition_penalty': 1.1, 'max_new_tokens': 500, } | |
| print(f"Selected model: {my_config['model_name']}") | |
| print(f"Parameters are: {my_config}") | |
| def count_words(text): | |
| # Use a simple regular expression to count words | |
| words = re.findall(r'\b\w+\b', text) | |
| return len(words) | |
| def generate_with_llama_chat(my_config): | |
| # get the parameters from the config dict | |
| do_sample = my_config.get('do_sample', True) | |
| temperature = my_config.get('temperature', 0.1) | |
| repetition_penalty = my_config.get('repetition_penalty', 1.1) | |
| max_new_tokens = my_config.get('max_new_tokens', 500) | |
| start_time = time.time() | |
| model = my_config['model_name'] | |
| tokenizer = AutoTokenizer.from_pretrained(model) | |
| chatbot = pipeline("conversational",model=model, | |
| tokenizer=tokenizer, | |
| do_sample=do_sample, | |
| temperature=temperature, | |
| repetition_penalty=repetition_penalty, | |
| #max_length=2000, | |
| max_new_tokens=max_new_tokens, | |
| #model_kwargs={"device_map": "auto","load_in_8bit": True} | |
| ) #, "src_lang": "en", "tgt_lang": "nl"}) does not work! | |
| end_time = time.time() | |
| elapsed_time = end_time - start_time | |
| print(f"Loading the model: {elapsed_time} seconds") | |
| return chatbot | |
| def get_answer(chatbot, input_text): | |
| start_time = time.time() | |
| print(f"Processing the input\n {input_text}\n") | |
| print('Processing the answer....') | |
| conversation = Conversation(input_text) | |
| print(f"Conversation(input_text): {conversation}") | |
| output = (chatbot(conversation))[1]['content'] | |
| elapsed_time = time.time() - start_time | |
| #Add the last print statement to the output variable | |
| output += f"\nAnswered in {elapsed_time:.1f} seconds, Nr generated words: {count_words(output)}" | |
| return output | |
| if "model_name" not in st.session_state.keys(): | |
| # Initialize the model with the default option | |
| st.session_state["model_name"] = "BramVanroy/Llama-2-13b-chat-dutch" | |
| my_config.update({'model_name': st.session_state["model_name"]}) | |
| llm_chatbot = generate_with_llama_chat(my_config) | |
| st.session_state["model"] = llm_chatbot | |
| text = st.text_area("Enter text to summarize here.") | |
| if text: | |
| out = get_answer(st.session_state["model"], text) | |
| st.write(out) |