import os import sys import subprocess # Ensure huggingface_hub is installed try: from huggingface_hub import InferenceClient, get_token except ImportError: print("[*] Installing required library 'huggingface_hub'...") subprocess.check_call([sys.executable, "-m", "pip", "install", "huggingface_hub"]) from huggingface_hub import InferenceClient, get_token # Terminal color codes (ANSI escape sequences) RESET = "\033[0m" BOLD = "\033[1m" BLUE = "\033[94m" GREEN = "\033[92m" YELLOW = "\033[93m" RED = "\033[91m" CYAN = "\033[96m" def clear_screen(): os.system('cls' if os.name == 'nt' else 'clear') def main(): clear_screen() print(f"{BOLD}{BLUE}" + "="*60 + f"{RESET}") print(f"{BOLD}{BLUE} HUGGING FACE REAL-TIME TERMINAL CLI CHAT{RESET}") print(f"{BOLD}{BLUE}" + "="*60 + f"{RESET}") # 1. Automatic Token Detection token = get_token() if token: print(f"\n{GREEN}[+] Found cached/environment Hugging Face Token! Running in Authenticated Mode.{RESET}") else: print(f"\n{YELLOW}[!] No cached token found. Running in Anonymous Mode (Rate-limited).{RESET}") print(f"{YELLOW} Tip: Add your 'HF_TOKEN' as a Secret in Space Settings to remove limits.{RESET}") # 2. Select Model print(f"\n{BOLD}Select a model to chat with:{RESET}") models = { "1": ("DeepSeek-R1 (Reasoning)", "deepseek-ai/DeepSeek-R1"), "2": ("Llama-3.3-70B-Instruct", "meta-llama/Llama-3.3-70B-Instruct"), "3": ("Qwen2.5-Coder-32B", "Qwen/Qwen2.5-Coder-32B-Instruct"), "4": ("Phi-3-Mini-Instruct", "microsoft/Phi-3-mini-4k-instruct"), "5": ("Enter Custom Model ID", None) } for key, (name, path) in models.items(): if path: print(f" [{key}] {name} ({path})") else: print(f" [{key}] {name}") choice = input(f"\nChoose model number (1-5, default: 3): ").strip() if not choice: choice = "3" if choice not in models: print(f"{RED}[!] Invalid choice. Defaulting to Qwen2.5-Coder-32B.{RESET}") choice = "3" model_name, model_path = models[choice] if choice == "5": model_path = input(f"\nEnter Hugging Face Model ID (e.g. meta-llama/Llama-3.2-3B-Instruct): ").strip() if not model_path: print(f"{RED}[!] No model ID entered. Exiting.{RESET}") return model_name = model_path.split("/")[-1] print(f"\n{GREEN}[*] Initializing client for model: {model_path}...{RESET}") # Initialize Inference Client try: client = InferenceClient(model=model_path, token=token) except Exception as e: print(f"{RED}[!] Error initializing client: {e}{RESET}") return print(f"\n{BOLD}{CYAN}Chat initialized! Type '/exit' or '/quit' to close the chat.{RESET}") print(f"{CYAN}Type '/clear' to clear conversation history.{RESET}") print(f"{BLUE}" + "-"*60 + f"{RESET}") messages = [ {"role": "system", "content": "You are a helpful and concise assistant."} ] while True: try: user_input = input(f"\n{BOLD}{GREEN}You > {RESET}").strip() if not user_input: continue if user_input.lower() in ['/exit', '/quit']: print(f"\n{YELLOW}Goodbye! (Refresh browser page to restart chat){RESET}") break if user_input.lower() == '/clear': messages = [{"role": "system", "content": "You are a helpful and concise assistant."}] print(f"{YELLOW}Conversation history cleared!{RESET}") continue messages.append({"role": "user", "content": user_input}) # Print assistant response prefix print(f"{BOLD}{BLUE}{model_name} > {RESET}", end="", flush=True) # Call API with streaming response full_response = "" try: response_stream = client.chat_completion( messages=messages, max_tokens=1024, stream=True ) for chunk in response_stream: content = chunk.choices[0].delta.content if content: print(content, end="", flush=True) full_response += content print() # New line after streaming ends # Append assistant response to history messages.append({"role": "assistant", "content": full_response}) except Exception as e: print(f"\n{RED}[!] Error during generation: {e}{RESET}") # Remove the last user message since it failed to get a response messages.pop() except KeyboardInterrupt: print(f"\n{YELLOW}Goodbye!{RESET}") break if __name__ == "__main__": main()