Spaces:
Paused
Paused
| import os | |
| import sys | |
| import subprocess | |
| # Ensure huggingface_hub is installed | |
| try: | |
| from huggingface_hub import InferenceClient, get_token | |
| except ImportError: | |
| print("[*] Installing required library 'huggingface_hub'...") | |
| subprocess.check_call([sys.executable, "-m", "pip", "install", "huggingface_hub"]) | |
| from huggingface_hub import InferenceClient, get_token | |
| # Terminal color codes (ANSI escape sequences) | |
| RESET = "\033[0m" | |
| BOLD = "\033[1m" | |
| BLUE = "\033[94m" | |
| GREEN = "\033[92m" | |
| YELLOW = "\033[93m" | |
| RED = "\033[91m" | |
| CYAN = "\033[96m" | |
| def clear_screen(): | |
| os.system('cls' if os.name == 'nt' else 'clear') | |
| def main(): | |
| clear_screen() | |
| print(f"{BOLD}{BLUE}" + "="*60 + f"{RESET}") | |
| print(f"{BOLD}{BLUE} HUGGING FACE REAL-TIME TERMINAL CLI CHAT{RESET}") | |
| print(f"{BOLD}{BLUE}" + "="*60 + f"{RESET}") | |
| # 1. Automatic Token Detection | |
| token = get_token() | |
| if token: | |
| print(f"\n{GREEN}[+] Found cached/environment Hugging Face Token! Running in Authenticated Mode.{RESET}") | |
| else: | |
| print(f"\n{YELLOW}[!] No cached token found. Running in Anonymous Mode (Rate-limited).{RESET}") | |
| print(f"{YELLOW} Tip: Add your 'HF_TOKEN' as a Secret in Space Settings to remove limits.{RESET}") | |
| # 2. Select Model | |
| print(f"\n{BOLD}Select a model to chat with:{RESET}") | |
| models = { | |
| "1": ("DeepSeek-R1 (Reasoning)", "deepseek-ai/DeepSeek-R1"), | |
| "2": ("Llama-3.3-70B-Instruct", "meta-llama/Llama-3.3-70B-Instruct"), | |
| "3": ("Qwen2.5-Coder-32B", "Qwen/Qwen2.5-Coder-32B-Instruct"), | |
| "4": ("Phi-3-Mini-Instruct", "microsoft/Phi-3-mini-4k-instruct"), | |
| "5": ("Enter Custom Model ID", None) | |
| } | |
| for key, (name, path) in models.items(): | |
| if path: | |
| print(f" [{key}] {name} ({path})") | |
| else: | |
| print(f" [{key}] {name}") | |
| choice = input(f"\nChoose model number (1-5, default: 3): ").strip() | |
| if not choice: | |
| choice = "3" | |
| if choice not in models: | |
| print(f"{RED}[!] Invalid choice. Defaulting to Qwen2.5-Coder-32B.{RESET}") | |
| choice = "3" | |
| model_name, model_path = models[choice] | |
| if choice == "5": | |
| model_path = input(f"\nEnter Hugging Face Model ID (e.g. meta-llama/Llama-3.2-3B-Instruct): ").strip() | |
| if not model_path: | |
| print(f"{RED}[!] No model ID entered. Exiting.{RESET}") | |
| return | |
| model_name = model_path.split("/")[-1] | |
| print(f"\n{GREEN}[*] Initializing client for model: {model_path}...{RESET}") | |
| # Initialize Inference Client | |
| try: | |
| client = InferenceClient(model=model_path, token=token) | |
| except Exception as e: | |
| print(f"{RED}[!] Error initializing client: {e}{RESET}") | |
| return | |
| print(f"\n{BOLD}{CYAN}Chat initialized! Type '/exit' or '/quit' to close the chat.{RESET}") | |
| print(f"{CYAN}Type '/clear' to clear conversation history.{RESET}") | |
| print(f"{BLUE}" + "-"*60 + f"{RESET}") | |
| messages = [ | |
| {"role": "system", "content": "You are a helpful and concise assistant."} | |
| ] | |
| while True: | |
| try: | |
| user_input = input(f"\n{BOLD}{GREEN}You > {RESET}").strip() | |
| if not user_input: | |
| continue | |
| if user_input.lower() in ['/exit', '/quit']: | |
| print(f"\n{YELLOW}Goodbye! (Refresh browser page to restart chat){RESET}") | |
| break | |
| if user_input.lower() == '/clear': | |
| messages = [{"role": "system", "content": "You are a helpful and concise assistant."}] | |
| print(f"{YELLOW}Conversation history cleared!{RESET}") | |
| continue | |
| messages.append({"role": "user", "content": user_input}) | |
| # Print assistant response prefix | |
| print(f"{BOLD}{BLUE}{model_name} > {RESET}", end="", flush=True) | |
| # Call API with streaming response | |
| full_response = "" | |
| try: | |
| response_stream = client.chat_completion( | |
| messages=messages, | |
| max_tokens=1024, | |
| stream=True | |
| ) | |
| for chunk in response_stream: | |
| content = chunk.choices[0].delta.content | |
| if content: | |
| print(content, end="", flush=True) | |
| full_response += content | |
| print() # New line after streaming ends | |
| # Append assistant response to history | |
| messages.append({"role": "assistant", "content": full_response}) | |
| except Exception as e: | |
| print(f"\n{RED}[!] Error during generation: {e}{RESET}") | |
| # Remove the last user message since it failed to get a response | |
| messages.pop() | |
| except KeyboardInterrupt: | |
| print(f"\n{YELLOW}Goodbye!{RESET}") | |
| break | |
| if __name__ == "__main__": | |
| main() | |