File size: 6,907 Bytes
a79c4d0
 
fc3e670
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a79c4d0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fc3e670
 
 
 
 
 
 
 
 
 
 
 
 
a79c4d0
 
 
fc3e670
 
 
 
a79c4d0
 
 
fc3e670
08a88d3
fc3e670
 
 
 
 
08a88d3
 
 
 
 
a79c4d0
 
 
 
 
fc3e670
a79c4d0
fc3e670
 
 
 
 
a79c4d0
 
 
 
fc3e670
 
 
 
a79c4d0
 
 
 
08a88d3
fc3e670
a79c4d0
fc3e670
 
 
 
 
a79c4d0
08a88d3
 
 
 
a79c4d0
 
 
 
 
fc3e670
 
 
 
 
 
a79c4d0
 
 
fc3e670
a79c4d0
fc3e670
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
import gradio as gr
import speech_recognition as sr
from datetime import datetime, timedelta
import os
import threading
from huggingface_hub import InferenceClient
from dotenv import load_dotenv
load_dotenv()


system_prompt = """
You are an AI coding assistant designed to solve coding problems and provide code snippets based on the user's query. When given a query, follow these guidelines.
1. Return only the necessary and helpful code.
2. Include any related details that enhance understanding or usability of the code.
3. Ensure the code is clean, efficient, and follows best practices.
4. Add comments to explain complex or non-obvious parts of the code.
5. If there are multiple possible solutions, provide the most optimal one first.
"""
ip_requests = {}
ip_requests_lock = threading.Lock()

def allow_ip(request: gr.Request, show_error=True):
    ip = request.headers.get("X-Forwarded-For")
    now = datetime.now()
    window = timedelta(hours=24)
    with ip_requests_lock:
        if ip in ip_requests:
            ip_requests[ip] = [timestamp for timestamp in ip_requests[ip] if now - timestamp < window]
        if len(ip_requests.get(ip, [])) >= 15:
            raise gr.Error("Rate limit exceeded. Please try again tomorrow or use your Hugging Face Pro token.", visible=show_error)
        ip_requests.setdefault(ip, []).append(now)
        print("ip_requests", ip_requests)
    return True

def inference(prompt, hf_token, model, model_name, max_new_tokens):
    messages = [{"role": "system", "content": system_prompt}, {"role": "user", "content": prompt}]
    if hf_token is None or not hf_token.strip():
        hf_token = os.getenv("HF_TOKEN")
    client = InferenceClient(model=model, token=hf_token)
    tokens = f"**`{model_name}`**\n\n"
    for completion in client.chat_completion(messages, max_tokens=max_new_tokens, stream=True):
        token = completion.choices[0].delta.content
        tokens += token
    yield tokens

def speech_to_text(audio):
    recognizer = sr.Recognizer()
    try:
        with sr.AudioFile(audio) as source:
            audio_data = recognizer.record(source)
            try:
                text = recognizer.recognize_google(audio_data)
            except sr.UnknownValueError:
                text = "Google Speech Recognition could not understand the audio"
            except sr.RequestError as e:
                text = f"Could not request results from Google Speech Recognition service; {e}"
    except Exception as e:
        text = f"Could not process the audio, please try to record one more time"
    return text

def respond(message, chat_history, system_prompt, hf_token, model_id, max_new_tokens):
    bot_message = ""
    for token in inference(message, hf_token, model_id, model_id.split("/")[-1], max_new_tokens):
        bot_message += token
    chat_history.append((message, bot_message))
    yield "", chat_history
        
def clear_chat(chat_history):
    return []

def transcribe_audio(audio):
    text = speech_to_text(audio)
    return text

def llm_ui():
    with gr.Blocks() as demo:
        
        model_ids_llm = ["Mistralai/Mistral-7B-Instruct-v0.2", "meta-llama/Meta-Llama-3-8B-Instruct",  "meta-llama/Meta-Llama-3.1-8B-Instruct"]
        
        gr.Markdown("# AI Coding Assistant")

        with gr.Row():
            chatbot = gr.Chatbot()
            
        hf_token_box = gr.Textbox(lines=1, placeholder="Check if you have access to selected model", label="Hugging Face Token - Required", type="password")
        
        with gr.Group():
            with gr.Row():
                user_input = gr.Textbox(placeholder="Type your coding problem here...", label="User Input", show_label=False, scale=8)
                send_button = gr.Button("Send", scale=2, variant = "primary")
            
        with gr.Row():
            model_selection = gr.Dropdown(choices=model_ids_llm, value=model_ids_llm[0], label="Model", scale=3)
            clear_button = gr.Button("Clear Chat", scale=2)
        
        with gr.Row():
            voice_input = gr.Microphone(type="filepath", label="Voice Input", scale=7)
            voice_button = gr.Button("Use Audio as User Input", scale=3)
            voice_button.click(transcribe_audio, inputs=voice_input, outputs=user_input)

        max_new_tokens_slider = gr.Slider(minimum=50, maximum=2000, value=500, step=10, label="Max New Tokens", info="Maximum number of tokens to generate in the response.")

        # Wrap system_prompt in a Gradio component
        system_prompt_component = gr.State(value=system_prompt)

        send_button.click(respond, [user_input, chatbot, system_prompt_component, hf_token_box, model_selection, max_new_tokens_slider], [user_input, chatbot], scroll_to_output=True)
        clear_button.click(clear_chat, [chatbot], [chatbot])
    return demo

def multimodal_llm_ui():
    with gr.Blocks() as demo:
        
        model_ids_multimodal = ["Model-1", "Model-2",  "Model-3"]
        
        gr.Markdown("# Coding Vision Model")

        with gr.Row():
            chatbot = gr.Chatbot(height=550)
            with gr.Column():
                hf_token_box = gr.Textbox(lines=1, placeholder="Check if you have access to selected model", label="Hugging Face Token - Required", type="password")
                
                image_input = gr.Image(type="filepath", label="Input your Image Here....")
                
                with gr.Group():
                    with gr.Row():
                        user_input = gr.Textbox(placeholder="Type your problem here...", label="User Input", show_label=False, scale=8)
                        send_button = gr.Button("Send", scale=2, elem_id="send-button", variant = "primary")

                with gr.Row():
                    model_selection = gr.Dropdown(choices=model_ids_multimodal, value=model_ids_multimodal[0], label="Select Model", scale=3)
                    clear_button = gr.Button("Clear Chat", scale=2)
                
                with gr.Row():
                    voice_input = gr.Microphone(type="filepath", label="Voice Input", scale=7)
                    voice_button = gr.Button("Use Audio as User Input", scale=3)
                    voice_button.click(transcribe_audio, inputs=voice_input, outputs=user_input)

                max_new_tokens_slider = gr.Slider(minimum=50, maximum=2000, value=500, step=10, label="Max New Tokens", info="Maximum number of tokens to generate in the response.")

        # Wrap system_prompt in a Gradio component
        system_prompt_component = gr.State(value=system_prompt)

        send_button.click(respond, [user_input, chatbot, system_prompt_component, hf_token_box, model_selection, max_new_tokens_slider], [user_input, chatbot])
        clear_button.click(clear_chat, [chatbot], [chatbot])
    return demo

demo = gr.TabbedInterface([llm_ui(), multimodal_llm_ui()], ["LLM", "Image + LLM"])

demo.launch()