import gradio as gr from huggingface_hub import hf_hub_download from llama_cpp import Llama # πŸ“₯ Model Download model_path = hf_hub_download( repo_id="RockSky1/Infinity_1.0", filename="Infinity_1.0.gguf" ) print("Loading model...") llm = Llama( model_path=model_path, n_ctx=512, n_threads=2, n_batch=128 ) print("Model loaded βœ…") def chat_function(message, history): try: output = llm.create_chat_completion( messages=[ { "role": "system", "content": "You are Infinity AI πŸ”₯ created by Shivam Kumar (RockSky1) from Bihar, India." }, *[ {"role": "user", "content": h[0]} if i % 2 == 0 else {"role": "assistant", "content": h[1]} for i, h in enumerate(history) ], {"role": "user", "content": message} ], max_tokens=128, temperature=0.7 ) reply = output["choices"][0]["message"]["content"] except Exception as e: reply = f"Error: {str(e)}" return reply with gr.Blocks() as demo: gr.Markdown("# ♾️ Infinity-LLM v1.0 πŸš€") gr.ChatInterface( fn=chat_function, examples=[ "Who is Shivam Kumar?", "Tell me about Infinity-LLM.", "Write a simple Python script." ] ) demo.launch()