Spaces:
Build error
Build error
| import gradio as gr | |
| from huggingface_hub import hf_hub_download | |
| from llama_cpp import Llama | |
| # --- 1. MODEL LOADING --- | |
| # We still load the quantized GGUF model, which is perfect for CPU. | |
| # We will focus on Llama-3 as it's best for a general-purpose assistant. | |
| model_name_or_path = "QuantFactory/Meta-Llama-3-8B-Instruct-GGUF" | |
| model_file = "Meta-Llama-3-8B-Instruct.Q4_K_M.gguf" | |
| try: | |
| model_path = hf_hub_download(repo_id=model_name_or_path, filename=model_file) | |
| except Exception as e: | |
| raise RuntimeError(f"Failed to download the model. Error: {e}") | |
| # Load the model with llama-cpp-python | |
| # n_ctx is the context window size; 2048 is a safe bet for CPU Spaces. | |
| # n_gpu_layers=0 ensures it runs entirely on the CPU. | |
| try: | |
| llm = Llama( | |
| model_path=model_path, | |
| n_ctx=2048, | |
| n_threads=4, # Set to a reasonable number of threads for the CPU | |
| n_gpu_layers=0, | |
| verbose=False | |
| ) | |
| except Exception as e: | |
| raise RuntimeError(f"Failed to load the GGUF model. Error: {e}") | |
| # --- 2. THE "BRAIN'S INSTRUCTION MANUAL" (SYSTEM PROMPT) --- | |
| # This is the most critical part. We tell the AI how to behave. | |
| # This prompt guides it to be helpful, analytical, and honest about its limitations. | |
| SYSTEM_PROMPT = """You are 'NexusAI', a helpful and highly intelligent AI assistant built by a creative developer. | |
| Your primary goal is to provide comprehensive, insightful, and helpful responses. You must be robust and handle any user input, no matter how brief or poorly phrased. | |
| When a user asks a question, follow these steps: | |
| 1. **Analyze the Intent:** First, understand the user's *true* goal. If they ask "cost to build building?", they don't want you to invent a number. They need a *checklist* of cost categories to research. If their question is vague, identify what they are likely trying to accomplish. | |
| 2. **Provide a Direct Answer:** If you can directly answer, do so clearly and concisely. | |
| 3. **Elaborate and Add Value:** After the direct answer, provide deeper context, explain the "why" behind the answer, and offer related suggestions or next steps. Give the user more than they asked for. | |
| 4. **Acknowledge Limitations:** You are not a real-time calculator, a search engine, or a financial advisor. If a question requires real-world, live data (like prices, stock quotes, personal advice), you MUST state that you cannot provide it. Instead, provide a framework or a list of steps the user can take to find the information themselves. NEVER invent facts. | |
| 5. **Maintain a Friendly, Encouraging Tone:** Be a partner in the user's creative or analytical process. | |
| """ | |
| # --- 3. THE GRADIO CHAT INTERFACE --- | |
| def predict(message, history): | |
| """ | |
| This function is called by the Gradio ChatInterface for each new message. | |
| 'message' is the new user input. | |
| 'history' is the entire conversation history as a list of lists. | |
| """ | |
| # Format the conversation history for the model | |
| # The history format is [['user_message', 'assistant_response'], ...] | |
| chat_history_formatted = [{"role": "system", "content": SYSTEM_PROMPT}] | |
| for user_msg, assistant_msg in history: | |
| chat_history_formatted.append({"role": "user", "content": user_msg}) | |
| chat_history_formatted.append({"role": "assistant", "content": assistant_msg}) | |
| # Add the latest user message | |
| chat_history_formatted.append({"role": "user", "content": message}) | |
| # Use the model to generate a response stream | |
| # stream=True allows the text to appear token-by-token for a better UX | |
| generator = llm.create_chat_completion( | |
| messages=chat_history_formatted, | |
| max_tokens=1024, | |
| temperature=0.7, | |
| stream=True | |
| ) | |
| # Yield partial responses to create the streaming effect | |
| partial_message = "" | |
| for chunk in generator: | |
| delta = chunk['choices'][0]['delta'] | |
| if 'content' in delta: | |
| partial_message += delta['content'] | |
| yield partial_message | |
| # We use gr.ChatInterface, which creates a complete chat UI for us. | |
| # It manages history, input boxes, and message display automatically. | |
| gr.ChatInterface( | |
| fn=predict, | |
| title="🤖 NexusAI Assistant", | |
| description="A powerful, conversational AI running on a Hugging Face CPU. Ask me anything!", | |
| examples=[ | |
| ["How do I learn to code?"], | |
| ["Explain the concept of 'supply and demand' like I'm five."], | |
| ["I want to build a PC, where do I start?"], | |
| ["I am building a building, how much would it cost me"] # The "bad" prompt from before! | |
| ], | |
| theme="soft" | |
| ).launch() |