Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import os | |
| import torch | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| from huggingface_hub import login | |
| # App title | |
| st.set_page_config(page_title="GenAI Chat Bot") | |
| # Hugging Face Credentials | |
| with st.sidebar: | |
| st.title('GenAI Chat Bot') | |
| #st.write('This is a generative AI Chat Bot.') | |
| # Use Hugging Face API Key from secrets or environment | |
| api_key = os.getenv("llama3") | |
| if not api_key: | |
| st.error("API key is missing!") | |
| st.stop() | |
| # Authenticate with Hugging Face Hub | |
| try: | |
| login(api_key) | |
| st.success('API successfully authenticated!', icon='✅') | |
| except Exception as e: | |
| st.error(f"Authentication failed: {e}") | |
| st.stop() | |
| st.subheader('Models and parameters') | |
| # Model selection categories | |
| model_options = { | |
| "Basic": [ | |
| "meta-llama/Llama-3.2-1B" | |
| ], | |
| "Basic-Medium": [ | |
| "meta-llama/Llama-3.2-1B-Instruct" | |
| ], | |
| "Medium-Fine": [ | |
| "meta-llama/Llama-3.2-3B" | |
| ], | |
| "Finest": [ | |
| "meta-llama/Llama-3.2-3B-Instruct" | |
| ] | |
| } | |
| # Select category (default set to "Basic-Medium") | |
| selected_category = st.sidebar.selectbox('Select Model Category', ["Basic", "Basic-Medium", "Medium-Fine", "Finest"], index=1) | |
| # Set selected model based on category | |
| selected_model = model_options[selected_category][0] | |
| # Slider inputs for parameters | |
| temperature = st.sidebar.slider('temperature', min_value=0.01, max_value=1.0, value=0.3, step=0.01) | |
| top_p = st.sidebar.slider('top_p', min_value=0.01, max_value=1.0, value=0.9, step=0.01) | |
| max_length = st.sidebar.slider('max_length', min_value=20, max_value=80, value=65, step=5) | |
| st.markdown("Disclaimer: The performance and speed of this GenAI tool depends on the machine configuration and model selection") | |
| # Store LLM generated responses | |
| if "messages" not in st.session_state.keys(): | |
| st.session_state.messages = [{"role": "assistant", "content": "How may I assist you today?"}] | |
| # Display or clear chat messages | |
| for message in st.session_state.messages: | |
| with st.chat_message(message["role"]): | |
| st.write(message["content"]) | |
| def clear_chat_history(): | |
| st.session_state.messages = [{"role": "assistant", "content": "How may I assist you today?"}] | |
| st.sidebar.button('Clear Chat History', on_click=clear_chat_history) | |
| # Load the tokenizer and model | |
| try: | |
| tokenizer = AutoTokenizer.from_pretrained(selected_model) | |
| model = AutoModelForCausalLM.from_pretrained(selected_model, torch_dtype=torch.bfloat16, device_map="auto") | |
| except Exception as e: | |
| st.error(f"Error loading model: {e}") | |
| st.stop() | |
| # Function for generating response using Hugging Face model | |
| def generate_huggingface_response(prompt_input): | |
| inputs = tokenizer(prompt_input, return_tensors="pt").to(model.device) | |
| try: | |
| # Generate response from the model | |
| with torch.no_grad(): | |
| outputs = model.generate(inputs["input_ids"], max_new_tokens=max_length, temperature=temperature, top_p=top_p, do_sample=True) | |
| # Decode the generated response | |
| response = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| return response | |
| except Exception as e: | |
| st.error(f"Error generating response: {e}") | |
| return "Oops! Something went wrong." | |
| # User-provided prompt | |
| if prompt := st.chat_input(disabled=not api_key): | |
| st.session_state.messages.append({"role": "user", "content": prompt}) | |
| with st.chat_message("user"): | |
| st.write(prompt) | |
| # Generate a new response if last message is not from assistant | |
| if st.session_state.messages[-1]["role"] != "assistant": | |
| with st.chat_message("assistant"): | |
| with st.spinner("Thinking..."): | |
| response = generate_huggingface_response(prompt) | |
| placeholder = st.empty() | |
| full_response = '' | |
| for item in response: | |
| full_response += item | |
| placeholder.markdown(full_response) | |
| placeholder.markdown(full_response) | |
| message = {"role": "assistant", "content": full_response} | |
| st.session_state.messages.append(message) | |