import gradio as gr import speech_recognition as sr from datetime import datetime, timedelta import os import threading from huggingface_hub import InferenceClient from dotenv import load_dotenv load_dotenv() system_prompt = """ You are an AI coding assistant designed to solve coding problems and provide code snippets based on the user's query. When given a query, follow these guidelines. 1. Return only the necessary and helpful code. 2. Include any related details that enhance understanding or usability of the code. 3. Ensure the code is clean, efficient, and follows best practices. 4. Add comments to explain complex or non-obvious parts of the code. 5. If there are multiple possible solutions, provide the most optimal one first. """ ip_requests = {} ip_requests_lock = threading.Lock() def allow_ip(request: gr.Request, show_error=True): ip = request.headers.get("X-Forwarded-For") now = datetime.now() window = timedelta(hours=24) with ip_requests_lock: if ip in ip_requests: ip_requests[ip] = [timestamp for timestamp in ip_requests[ip] if now - timestamp < window] if len(ip_requests.get(ip, [])) >= 15: raise gr.Error("Rate limit exceeded. Please try again tomorrow or use your Hugging Face Pro token.", visible=show_error) ip_requests.setdefault(ip, []).append(now) print("ip_requests", ip_requests) return True def inference(prompt, hf_token, model, model_name, max_new_tokens): messages = [{"role": "system", "content": system_prompt}, {"role": "user", "content": prompt}] if hf_token is None or not hf_token.strip(): hf_token = os.getenv("HF_TOKEN") client = InferenceClient(model=model, token=hf_token) tokens = f"**`{model_name}`**\n\n" for completion in client.chat_completion(messages, max_tokens=max_new_tokens, stream=True): token = completion.choices[0].delta.content tokens += token yield tokens def speech_to_text(audio): recognizer = sr.Recognizer() try: with sr.AudioFile(audio) as source: audio_data = recognizer.record(source) try: text = recognizer.recognize_google(audio_data) except sr.UnknownValueError: text = "Google Speech Recognition could not understand the audio" except sr.RequestError as e: text = f"Could not request results from Google Speech Recognition service; {e}" except Exception as e: text = f"Could not process the audio, please try to record one more time" return text def respond(message, chat_history, system_prompt, hf_token, model_id, max_new_tokens): bot_message = "" for token in inference(message, hf_token, model_id, model_id.split("/")[-1], max_new_tokens): bot_message += token chat_history.append((message, bot_message)) yield "", chat_history def clear_chat(chat_history): return [] def transcribe_audio(audio): text = speech_to_text(audio) return text def llm_ui(): with gr.Blocks() as demo: model_ids_llm = ["Mistralai/Mistral-7B-Instruct-v0.2", "meta-llama/Meta-Llama-3-8B-Instruct", "meta-llama/Meta-Llama-3.1-8B-Instruct"] gr.Markdown("# AI Coding Assistant") with gr.Row(): chatbot = gr.Chatbot() hf_token_box = gr.Textbox(lines=1, placeholder="Check if you have access to selected model", label="Hugging Face Token - Required", type="password") with gr.Group(): with gr.Row(): user_input = gr.Textbox(placeholder="Type your coding problem here...", label="User Input", show_label=False, scale=8) send_button = gr.Button("Send", scale=2, variant = "primary") with gr.Row(): model_selection = gr.Dropdown(choices=model_ids_llm, value=model_ids_llm[0], label="Model", scale=3) clear_button = gr.Button("Clear Chat", scale=2) with gr.Row(): voice_input = gr.Microphone(type="filepath", label="Voice Input", scale=7) voice_button = gr.Button("Use Audio as User Input", scale=3) voice_button.click(transcribe_audio, inputs=voice_input, outputs=user_input) max_new_tokens_slider = gr.Slider(minimum=50, maximum=2000, value=500, step=10, label="Max New Tokens", info="Maximum number of tokens to generate in the response.") # Wrap system_prompt in a Gradio component system_prompt_component = gr.State(value=system_prompt) send_button.click(respond, [user_input, chatbot, system_prompt_component, hf_token_box, model_selection, max_new_tokens_slider], [user_input, chatbot], scroll_to_output=True) clear_button.click(clear_chat, [chatbot], [chatbot]) return demo def multimodal_llm_ui(): with gr.Blocks() as demo: model_ids_multimodal = ["Model-1", "Model-2", "Model-3"] gr.Markdown("# Coding Vision Model") with gr.Row(): chatbot = gr.Chatbot(height=550) with gr.Column(): hf_token_box = gr.Textbox(lines=1, placeholder="Check if you have access to selected model", label="Hugging Face Token - Required", type="password") image_input = gr.Image(type="filepath", label="Input your Image Here....") with gr.Group(): with gr.Row(): user_input = gr.Textbox(placeholder="Type your problem here...", label="User Input", show_label=False, scale=8) send_button = gr.Button("Send", scale=2, elem_id="send-button", variant = "primary") with gr.Row(): model_selection = gr.Dropdown(choices=model_ids_multimodal, value=model_ids_multimodal[0], label="Select Model", scale=3) clear_button = gr.Button("Clear Chat", scale=2) with gr.Row(): voice_input = gr.Microphone(type="filepath", label="Voice Input", scale=7) voice_button = gr.Button("Use Audio as User Input", scale=3) voice_button.click(transcribe_audio, inputs=voice_input, outputs=user_input) max_new_tokens_slider = gr.Slider(minimum=50, maximum=2000, value=500, step=10, label="Max New Tokens", info="Maximum number of tokens to generate in the response.") # Wrap system_prompt in a Gradio component system_prompt_component = gr.State(value=system_prompt) send_button.click(respond, [user_input, chatbot, system_prompt_component, hf_token_box, model_selection, max_new_tokens_slider], [user_input, chatbot]) clear_button.click(clear_chat, [chatbot], [chatbot]) return demo demo = gr.TabbedInterface([llm_ui(), multimodal_llm_ui()], ["LLM", "Image + LLM"]) demo.launch()