Spaces:
Sleeping
Sleeping
| import inspect | |
| from typing import get_type_hints, Callable, Any | |
| import gradio as gr | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| # --- Load Model and Tokenizer --- | |
| model_id = "unsloth/SmolLM2-135M-Instruct-GGUF" | |
| filename = "SmolLM2-135M-Instruct-Q8_0.gguf" | |
| tokenizer = AutoTokenizer.from_pretrained(model_id, gguf_file=filename) | |
| model = AutoModelForCausalLM.from_pretrained(model_id, gguf_file=filename) | |
| # --- System Prompt Template --- | |
| SYSTEM_PROMPT = """You are a helpful AI assistant. Your job is to provide clear and concise responses based on the user's input. | |
| Keep your answers straightforward and avoid unnecessary information.""" | |
| def parse_docstring(func): | |
| doc = inspect.getdoc(func) | |
| if not doc: | |
| return {"title": "Untitled", "description": ""} | |
| lines = doc.splitlines() | |
| title = next((line.replace("Title:", "").strip() for line in lines if line.startswith("Title:")), "Untitled") | |
| description = "\n".join(line.strip() for line in lines if line.startswith("Description:")) | |
| description = description.replace("Description:", "").strip() | |
| return {"title": title, "description": description} | |
| def gradio_app_with_docs(func: Callable) -> Callable: | |
| sig = inspect.signature(func) | |
| type_hints = get_type_hints(func) | |
| metadata = parse_docstring(func) | |
| """ | |
| A decorator that automatically builds and launches a Gradio interface | |
| based on function type hints. | |
| Args: | |
| func: A callable with type-hinted parameters and return type. | |
| Returns: | |
| The wrapped function with a `.launch()` method to start the app. | |
| """ | |
| def _map_type(t: type) -> gr.Component: | |
| if t == str: | |
| return gr.Textbox(label="Input") | |
| elif t == int: | |
| return gr.Number(precision=0) | |
| elif t == float: | |
| return gr.Number() | |
| elif t == bool: | |
| return gr.Checkbox() | |
| elif hasattr(t, "__origin__") and t.__origin__ == list: | |
| elem_type = t.__args__[0] | |
| if elem_type == str: | |
| return gr.Dropdown(choices=["Option1", "Option2"]) | |
| else: | |
| raise ValueError(f"Unsupported list element type: {elem_type}") | |
| else: | |
| raise ValueError(f"Unsupported type: {t}") | |
| # Build inputs | |
| inputs = [] | |
| for name, param in sig.parameters.items(): | |
| if name == "self": | |
| continue | |
| param_type = type_hints.get(name, Any) | |
| component = _map_type(param_type) | |
| component.label = name.replace("_", " ").title() | |
| inputs.append(component) | |
| # Build outputs | |
| return_type = type_hints.get("return", Any) | |
| outputs = _map_type(return_type) | |
| # Wrap function with Gradio interface | |
| with gr.Blocks() as demo: | |
| gr.Markdown(f"## {metadata['title']}\n{metadata['description']}") | |
| gr.Interface(fn=func, inputs=inputs, outputs=outputs) | |
| def wrapper(*args, **kwargs): | |
| return func(*args, **kwargs) | |
| wrapper.launch = lambda: demo.launch() | |
| return wrapper | |
| def generate_response(prompt: str) -> str: | |
| """ | |
| Title: Super Tiny GGUF Model on CPU | |
| Description: A Simple app to test out the potentials of small GGUF LLM model. | |
| Args: | |
| prompt (str): A simple prompt. | |
| Returns: | |
| str: Simplified response. | |
| """ | |
| # Apply system prompt + user input | |
| # full_prompt = f"<|begin_of_text|>System: {SYSTEM_PROMPT}\nUser: {prompt}\nAssistant:" | |
| # inputs = tokenizer(full_prompt, return_tensors="pt").to("cpu") | |
| messages = [ | |
| {"role": "system", "content": SYSTEM_PROMPT}, | |
| {"role": "user", "content": prompt} | |
| ] | |
| text = tokenizer.apply_chat_template( | |
| messages, | |
| tokenize=False, | |
| add_generation_prompt=True, | |
| enable_thinking=True # Switches between thinking and non-thinking modes. Default is True. | |
| ) | |
| inputs = tokenizer([text], return_tensors="pt").to(model.device) | |
| outputs = model.generate( | |
| **inputs, | |
| max_new_tokens=100, | |
| # temperature=0.7, | |
| # top_p=0.9 | |
| ) | |
| return tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| if __name__ == "__main__": | |
| generate_response.launch() |