Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from openai import OpenAI | |
| import os | |
| from datetime import datetime | |
| # App title and description | |
| APP_TITLE = "NO GPU, Multi LLMs Uses" | |
| APP_DESCRIPTION = "Access and chat with multiple language models without requiring a GPU" | |
| # Load environment variables | |
| ACCESS_TOKEN = os.getenv("HF_TOKEN") | |
| client = OpenAI( | |
| base_url="https://api-inference.huggingface.co/v1/", | |
| api_key=ACCESS_TOKEN, | |
| ) | |
| # Model categories for better organization | |
| MODEL_CATEGORIES = { | |
| "Qwen": [ | |
| "Qwen/Qwen2.5-72B-Instruct", | |
| "Qwen/Qwen2.5-3B-Instruct", | |
| "Qwen/Qwen2.5-0.5B-Instruct", | |
| "Qwen/Qwen2.5-Coder-32B-Instruct", | |
| ], | |
| "Meta LLaMa": [ | |
| "meta-llama/Llama-3.3-70B-Instruct", | |
| "meta-llama/Llama-3.1-70B-Instruct", | |
| "meta-llama/Llama-3.0-70B-Instruct", | |
| "meta-llama/Llama-3.2-3B-Instruct", | |
| "meta-llama/Llama-3.2-1B-Instruct", | |
| "meta-llama/Llama-3.1-8B-Instruct", | |
| ], | |
| "Mistral": [ | |
| "mistralai/Mistral-Nemo-Instruct-2407", | |
| "mistralai/Mixtral-8x7B-Instruct-v0.1", | |
| "mistralai/Mistral-7B-Instruct-v0.3", | |
| "mistralai/Mistral-7B-Instruct-v0.2", | |
| ], | |
| "Microsoft Phi": [ | |
| "microsoft/Phi-3.5-mini-instruct", | |
| "microsoft/Phi-3-mini-128k-instruct", | |
| "microsoft/Phi-3-mini-4k-instruct", | |
| ], | |
| "Other Models": [ | |
| "NousResearch/Hermes-3-Llama-3.1-8B", | |
| "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO", | |
| "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", | |
| "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", | |
| "HuggingFaceH4/zephyr-7b-beta", | |
| "HuggingFaceTB/SmolLM2-360M-Instruct", | |
| "tiiuae/falcon-7b-instruct", | |
| "01-ai/Yi-1.5-34B-Chat", | |
| ] | |
| } | |
| # Flatten the model list | |
| ALL_MODELS = [m for models in MODEL_CATEGORIES.values() for m in models] | |
| def get_model_info(model_name): | |
| parts = model_name.split('/') | |
| if len(parts) != 2: | |
| return f"**Model:** {model_name}\n**Format:** Unknown" | |
| org, model = parts | |
| import re | |
| size_match = re.search(r'(\d+\.?\d*)B', model) | |
| size = size_match.group(1) + "B" if size_match else "Unknown" | |
| return f"**Organization:** {org}\n**Model:** {model}\n**Size:** {size}" | |
| def respond( | |
| message, | |
| history, | |
| system_message, | |
| max_tokens, | |
| temperature, | |
| top_p, | |
| frequency_penalty, | |
| seed, | |
| selected_model | |
| ): | |
| # Prepare messages | |
| if seed == -1: | |
| seed = None | |
| messages = [{"role": "system", "content": system_message}] | |
| for user_msg, assistant_msg in history: | |
| if user_msg: | |
| messages.append({"role": "user", "content": user_msg}) | |
| if assistant_msg: | |
| messages.append({"role": "assistant", "content": assistant_msg}) | |
| messages.append({"role": "user", "content": message}) | |
| model_to_use = selected_model or ALL_MODELS[0] | |
| new_history = list(history) + [(message, "")] | |
| current_response = "" | |
| try: | |
| for chunk in client.chat.completions.create( | |
| model=model_to_use, | |
| max_tokens=max_tokens, | |
| stream=True, | |
| temperature=temperature, | |
| top_p=top_p, | |
| frequency_penalty=frequency_penalty, | |
| seed=seed, | |
| messages=messages, | |
| ): | |
| delta = chunk.choices[0].delta.content | |
| if delta: | |
| current_response += delta | |
| new_history[-1] = (message, current_response) | |
| yield new_history | |
| except Exception as e: | |
| err = f"Error: {e}" | |
| new_history[-1] = (message, err) | |
| yield new_history | |
| with gr.Blocks(title=APP_TITLE, theme=gr.themes.Soft()) as demo: | |
| gr.Markdown(f"## {APP_TITLE}\n\n{APP_DESCRIPTION}") | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| # Model selection via Dropdown | |
| selected_model = gr.Dropdown( | |
| choices=ALL_MODELS, | |
| value=ALL_MODELS[0], | |
| label="Select Model" | |
| ) | |
| model_info = gr.Markdown(get_model_info(ALL_MODELS[0])) | |
| def update_info(model_name): | |
| return get_model_info(model_name) | |
| selected_model.change( | |
| fn=update_info, | |
| inputs=[selected_model], | |
| outputs=[model_info] | |
| ) | |
| # Conversation settings | |
| system_message = gr.Textbox( | |
| value="You are a helpful assistant.", | |
| label="System Prompt", | |
| lines=2 | |
| ) | |
| max_tokens = gr.Slider(1, 4096, value=512, label="Max New Tokens") | |
| temperature = gr.Slider(0.1, 4.0, value=0.7, step=0.1, label="Temperature") | |
| top_p = gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top-P") | |
| freq_penalty = gr.Slider(-2.0, 2.0, value=0.0, step=0.1, label="Frequency Penalty") | |
| seed = gr.Slider(-1, 65535, value=-1, step=1, label="Seed (-1 random)") | |
| with gr.Column(scale=3): | |
| chatbot = gr.Chatbot() | |
| msg = gr.Textbox(placeholder="Type your message here...", show_label=False) | |
| send_btn = gr.Button("Send") | |
| send_btn.click( | |
| fn=respond, | |
| inputs=[ | |
| msg, chatbot, system_message, | |
| max_tokens, temperature, top_p, | |
| freq_penalty, seed, selected_model | |
| ], | |
| outputs=[chatbot], | |
| queue=True | |
| ) | |
| msg.submit( | |
| fn=respond, | |
| inputs=[ | |
| msg, chatbot, system_message, | |
| max_tokens, temperature, top_p, | |
| freq_penalty, seed, selected_model | |
| ], | |
| outputs=[chatbot], | |
| queue=True | |
| ) | |
| demo.launch() |