test / hf_cli_chat.py
Asmita2682hshs's picture
Upload 3 files
1f44356 verified
import os
import sys
import subprocess
# Ensure huggingface_hub is installed
try:
from huggingface_hub import InferenceClient, get_token
except ImportError:
print("[*] Installing required library 'huggingface_hub'...")
subprocess.check_call([sys.executable, "-m", "pip", "install", "huggingface_hub"])
from huggingface_hub import InferenceClient, get_token
# Terminal color codes (ANSI escape sequences)
RESET = "\033[0m"
BOLD = "\033[1m"
BLUE = "\033[94m"
GREEN = "\033[92m"
YELLOW = "\033[93m"
RED = "\033[91m"
CYAN = "\033[96m"
def clear_screen():
os.system('cls' if os.name == 'nt' else 'clear')
def main():
clear_screen()
print(f"{BOLD}{BLUE}" + "="*60 + f"{RESET}")
print(f"{BOLD}{BLUE} HUGGING FACE REAL-TIME TERMINAL CLI CHAT{RESET}")
print(f"{BOLD}{BLUE}" + "="*60 + f"{RESET}")
# 1. Automatic Token Detection
token = get_token()
if token:
print(f"\n{GREEN}[+] Found cached/environment Hugging Face Token! Running in Authenticated Mode.{RESET}")
else:
print(f"\n{YELLOW}[!] No cached token found. Running in Anonymous Mode (Rate-limited).{RESET}")
print(f"{YELLOW} Tip: Add your 'HF_TOKEN' as a Secret in Space Settings to remove limits.{RESET}")
# 2. Select Model
print(f"\n{BOLD}Select a model to chat with:{RESET}")
models = {
"1": ("DeepSeek-R1 (Reasoning)", "deepseek-ai/DeepSeek-R1"),
"2": ("Llama-3.3-70B-Instruct", "meta-llama/Llama-3.3-70B-Instruct"),
"3": ("Qwen2.5-Coder-32B", "Qwen/Qwen2.5-Coder-32B-Instruct"),
"4": ("Phi-3-Mini-Instruct", "microsoft/Phi-3-mini-4k-instruct"),
"5": ("Enter Custom Model ID", None)
}
for key, (name, path) in models.items():
if path:
print(f" [{key}] {name} ({path})")
else:
print(f" [{key}] {name}")
choice = input(f"\nChoose model number (1-5, default: 3): ").strip()
if not choice:
choice = "3"
if choice not in models:
print(f"{RED}[!] Invalid choice. Defaulting to Qwen2.5-Coder-32B.{RESET}")
choice = "3"
model_name, model_path = models[choice]
if choice == "5":
model_path = input(f"\nEnter Hugging Face Model ID (e.g. meta-llama/Llama-3.2-3B-Instruct): ").strip()
if not model_path:
print(f"{RED}[!] No model ID entered. Exiting.{RESET}")
return
model_name = model_path.split("/")[-1]
print(f"\n{GREEN}[*] Initializing client for model: {model_path}...{RESET}")
# Initialize Inference Client
try:
client = InferenceClient(model=model_path, token=token)
except Exception as e:
print(f"{RED}[!] Error initializing client: {e}{RESET}")
return
print(f"\n{BOLD}{CYAN}Chat initialized! Type '/exit' or '/quit' to close the chat.{RESET}")
print(f"{CYAN}Type '/clear' to clear conversation history.{RESET}")
print(f"{BLUE}" + "-"*60 + f"{RESET}")
messages = [
{"role": "system", "content": "You are a helpful and concise assistant."}
]
while True:
try:
user_input = input(f"\n{BOLD}{GREEN}You > {RESET}").strip()
if not user_input:
continue
if user_input.lower() in ['/exit', '/quit']:
print(f"\n{YELLOW}Goodbye! (Refresh browser page to restart chat){RESET}")
break
if user_input.lower() == '/clear':
messages = [{"role": "system", "content": "You are a helpful and concise assistant."}]
print(f"{YELLOW}Conversation history cleared!{RESET}")
continue
messages.append({"role": "user", "content": user_input})
# Print assistant response prefix
print(f"{BOLD}{BLUE}{model_name} > {RESET}", end="", flush=True)
# Call API with streaming response
full_response = ""
try:
response_stream = client.chat_completion(
messages=messages,
max_tokens=1024,
stream=True
)
for chunk in response_stream:
content = chunk.choices[0].delta.content
if content:
print(content, end="", flush=True)
full_response += content
print() # New line after streaming ends
# Append assistant response to history
messages.append({"role": "assistant", "content": full_response})
except Exception as e:
print(f"\n{RED}[!] Error during generation: {e}{RESET}")
# Remove the last user message since it failed to get a response
messages.pop()
except KeyboardInterrupt:
print(f"\n{YELLOW}Goodbye!{RESET}")
break
if __name__ == "__main__":
main()