Spaces:
Running
on
Zero
Running
on
Zero
| import gradio as gr | |
| from huggingface_hub import InferenceClient | |
| # βββ set this to the exact name of your HF repo | |
| HF_MODEL_ID = "rieon/DeepCoder-14B-Preview-Suger" | |
| # explicitly tell the client you want text-generation | |
| client = InferenceClient(model=HF_MODEL_ID) | |
| def respond( | |
| message: str, | |
| history: list[tuple[str, str]], | |
| system_message: str, | |
| max_tokens: int, | |
| temperature: float, | |
| top_p: float, | |
| ): | |
| # assemble a single prompt from system message + history | |
| prompt = system_message.strip() + "\n" | |
| for user, bot in history: | |
| prompt += f"User: {user}\nAssistant: {bot}\n" | |
| prompt += f"User: {message}\nAssistant:" | |
| # stream back tokens | |
| generated = "" | |
| for chunk in client.text_generation( | |
| inputs=prompt, | |
| max_new_tokens=max_tokens, | |
| temperature=temperature, | |
| top_p=top_p, | |
| stream=True, | |
| ): | |
| # the API returns a small JSON with .generated_text | |
| generated += chunk.generated_text | |
| yield generated | |
| demo = gr.ChatInterface( | |
| fn=respond, | |
| type="messages", | |
| title="DeepCoder with Suger", | |
| description="Upload any text or pdf files and ask questions about them!", | |
| additional_inputs=[ | |
| gr.Textbox(value="You are a helpful coding assistant.", label="System message"), | |
| gr.Slider(1, 2048, value=512, step=1, label="Max new tokens"), | |
| gr.Slider(0.1, 4.0, value=0.7, step=0.1, label="Temperature"), | |
| gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top-p"), | |
| ], | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |