from ctransformers import AutoModelForCausalLM import gradio as gr import os from huggingface_hub import login import re login(token=os.getenv("HF_TOKEN")) if not os.path.exists("model.gguf"): os.system("wget https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf -O model.gguf") print("model.gguf downloaded.") # Load GGUF model with ctransformers llm = AutoModelForCausalLM.from_pretrained( "./", model_file="model.gguf", model_type="llama", max_new_tokens=256, temperature=0.7, top_p=0.9 ) # Clean and generate response with multi-language code formatting def clean_response(raw): raw = re.sub(r"[{}]+", "", raw) raw = raw.replace("\\n", "\n") # properly format newlines raw = raw.strip() if raw.startswith("````"): return raw language_blocks = [ ("python", ["def ", "print(", "class ", "import ", "return "]), ("javascript", ["console.log(", "function ", "let ", "const "]), ("bash", ["#!/bin/bash", "sudo ", "echo ", "apt-get"]), ("sql", ["SELECT ", "FROM ", "WHERE "]), ("java", ["public static void main", "System.out.println"]) ] for lang, keywords in language_blocks: if any(kw in raw for kw in keywords): return f"```{lang}\n{raw}\n```" return raw def chat(prompt): if not prompt.strip(): return "" wrapped_prompt = ( "You are Fermata-LightCoder, a helpful, fast, and concise AI coding assistant created by Rangga Fermata. " "You specialize in writing and explaining programming code, shell scripts, algorithms, and small dev utilities. " "When the user asks in another language like Indonesian, respond completely in that language.\n\n" f"User: {prompt}\nAssistant:" ) try: response = llm(wrapped_prompt) return clean_response(str(response)) except Exception as e: return f"❌ Error: {str(e)}" # Gradio UI for Fermata-LightCoder demo = gr.Interface( fn=chat, inputs=gr.Textbox(lines=5, label="Ask Fermata-LightCoder"), outputs=gr.Markdown(label="Fermata-LightCoder Says:"), title="🧠 Fermata-LightCoder (TinyLlama 1.1B via ctransformers)", description="A compact, CPU-optimized AI assistant that specializes in generating code and scripts. Powered by TinyLlama GGUF + ctransformers.", allow_flagging="never" ) # Launch App if __name__ == "__main__": demo.launch(mcp_server=True)