Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| from huggingface_hub import InferenceClient, HfApi | |
| import time | |
| import requests | |
| from requests.exceptions import RequestException | |
| from gtts import gTTS # Google Text-to-Speech | |
| import tempfile | |
| import os | |
| # Set page config at the very beginning | |
| st.set_page_config(page_title="Phi-3.5 Chatbot", page_icon="🤖") | |
| # Add a text input for the Hugging Face API token | |
| hf_token = st.text_input("Enter your Hugging Face API token", type="password") | |
| def get_client(token): | |
| return InferenceClient("microsoft/Phi-3.5-mini-instruct", token=token) | |
| def validate_token(token): | |
| try: | |
| api = HfApi(token=token) | |
| api.whoami() | |
| return True | |
| except Exception as e: | |
| st.error(f"Token validation failed: {str(e)}") | |
| return False | |
| def make_request_with_retries(client, prompt, max_new_tokens, temperature, top_p, max_retries=5, initial_delay=1): | |
| for attempt in range(max_retries): | |
| try: | |
| response = client.text_generation( | |
| prompt, | |
| max_new_tokens=max_new_tokens, | |
| temperature=temperature, | |
| top_p=top_p, | |
| ) | |
| return response | |
| except RequestException as e: | |
| if attempt < max_retries - 1: | |
| delay = initial_delay * (2 ** attempt) # Exponential backoff | |
| st.warning(f"Request failed. Retrying in {delay} seconds... (Attempt {attempt + 1}/{max_retries})") | |
| time.sleep(delay) | |
| else: | |
| raise e | |
| def respond(message, history, system_message, max_tokens, temperature, top_p): | |
| if not hf_token: | |
| st.error("Please enter your Hugging Face API token.") | |
| return | |
| if not validate_token(hf_token): | |
| return | |
| client = get_client(hf_token) | |
| # Construct the prompt | |
| prompt = f"{system_message}\n\n" | |
| for user_msg, assistant_msg in history: | |
| prompt += f"Human: {user_msg}\nAssistant: {assistant_msg}\n\n" | |
| prompt += f"Human: {message}\nAssistant:" | |
| try: | |
| response = make_request_with_retries(client, prompt, max_tokens, temperature, top_p) | |
| yield response | |
| except Exception as e: | |
| st.error(f"An error occurred: {str(e)}") | |
| yield "I'm sorry, but I encountered an error while processing your request." | |
| def text_to_speech(text): | |
| # Create a named temporary file | |
| tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") | |
| tts = gTTS(text=text, lang='en') | |
| tts.save(tmp_file.name) | |
| return tmp_file.name | |
| st.title("Phi-3.5 Mini Chatbot") | |
| if "messages" not in st.session_state: | |
| st.session_state.messages = [] | |
| system_message = st.text_input("System message", value="You are a helpful AI assistant.") | |
| max_tokens = st.slider("Max new tokens", min_value=1, max_value=1024, value=256, step=1) | |
| temperature = st.slider("Temperature", min_value=0.01, max_value=1.0, value=0.7, step=0.01) | |
| top_p = st.slider("Top P", min_value=0.0, max_value=1.0, value=0.9, step=0.01) | |
| for message in st.session_state.messages: | |
| with st.chat_message(message["role"]): | |
| st.markdown(message["content"]) | |
| if prompt := st.chat_input("What is your message?"): | |
| st.session_state.messages.append({"role": "user", "content": prompt}) | |
| with st.chat_message("user"): | |
| st.markdown(prompt) | |
| with st.chat_message("assistant"): | |
| message_placeholder = st.empty() | |
| full_response = "" | |
| for response in respond(prompt, | |
| [(msg["content"], st.session_state.messages[i+1]["content"]) | |
| for i, msg in enumerate(st.session_state.messages[:-1:2])], | |
| system_message, | |
| max_tokens, | |
| temperature, | |
| top_p): | |
| message_placeholder.markdown(response) | |
| full_response = response | |
| st.session_state.messages.append({"role": "assistant", "content": full_response}) | |
| # Generate audio from the assistant's response | |
| audio_file = text_to_speech(full_response) | |
| st.audio(audio_file, format='audio/mp3') |