Rajan Sharma commited on
Commit
2d3153a
·
verified ·
1 Parent(s): 13d0234

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +61 -72
app.py CHANGED
@@ -1,90 +1,46 @@
1
-
2
-
3
  import gradio as gr
4
  from transformers import AutoTokenizer, AutoModelForCausalLM
5
  from datetime import datetime, timezone
6
  import os
7
  from huggingface_hub import login, HfApi
8
  from huggingface_hub.utils import RepositoryNotFoundError, HfHubHTTPError
 
9
  import requests
 
10
 
11
- def get_timestamp():
12
- """Get current UTC datetime in specified format"""
13
- return datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%SS')
14
-
15
- def format_system_info():
16
- """Format system information header"""
17
- return (
18
- f"Current Date and Time (UTC - YYYY-MM-DD HH:MM:SS formatted): {get_timestamp()}\n"
19
- f"Current User's Login: Raj-VedAI\n"
20
- )
21
-
22
- def verify_model_access():
23
- system_info = format_system_info()
24
- try:
25
- token = os.getenv("HUGGING_FACE_HUB_TOKEN")
26
- if not token:
27
- return False, f"{system_info}Status: No token found"
28
-
29
- # Method 1: Direct API check
30
- api = HfApi(token=token)
31
- try:
32
- model_info = api.model_info("CohereLabs/c4ai-command-a-03-2025")
33
- return True, f"{system_info}Status: ✅ Access granted\nModel: CohereLabs/c4ai-command-a-03-2025"
34
- except Exception as e:
35
- if "403" in str(e):
36
- return False, f"{system_info}Status: ❌ Access denied\nPlease request access at https://huggingface.co/CohereLabs/c4ai-command-a-03-2025"
37
- return False, f"{system_info}Status: ❌ Error\nDetails: {str(e)}"
38
-
39
- except Exception as e:
40
- return False, f"{system_info}Status: ❌ Unexpected error\nDetails: {str(e)}"
41
-
42
  def initialize_model():
43
  try:
44
  token = os.getenv("HUGGING_FACE_HUB_TOKEN")
45
  if not token:
46
  return False, "No token found. Please set HUGGING_FACE_HUB_TOKEN in Space secrets.", None
47
 
48
- login(token=token)
 
49
 
50
- # Initialize the model and tokenizer with token
51
  model_id = "CohereLabs/c4ai-command-a-03-2025"
52
  tokenizer = AutoTokenizer.from_pretrained(
53
  model_id,
54
- token=token
 
55
  )
56
  model = AutoModelForCausalLM.from_pretrained(
57
  model_id,
58
- token=token
 
 
 
59
  )
60
  return True, model, tokenizer
61
- except RepositoryNotFoundError:
62
- return False, "Model repository not found. Please check the model ID.", None
63
- except HfHubHTTPError as e:
64
- if e.response.status_code == 401:
65
- return False, "Authentication failed. Please check your token permissions.", None
66
- elif e.response.status_code == 403:
67
- return False, "Access denied. Please request access at https://huggingface.co/CohereLabs/c4ai-command-a-03-2025", None
68
- else:
69
- return False, f"An error occurred: {str(e)}", None
70
  except Exception as e:
71
- return False, f"Unexpected error: {str(e)}", None
72
-
73
- def check_access_status():
74
- access_granted, message = verify_model_access()
75
- return message
76
 
 
77
  def chat(message, history):
78
  system_info = format_system_info()
79
 
80
- # Verify access before proceeding
81
- access_granted, status_message = verify_model_access()
82
- if not access_granted:
83
- return [(message, f"{system_info}Error: {status_message}")]
84
-
85
- if history is None:
86
- history = []
87
-
88
  try:
89
  # Initialize model if not already done
90
  success, result, tokenizer = initialize_model()
@@ -92,35 +48,59 @@ def chat(message, history):
92
  return [(message, f"{system_info}Error: {result}")]
93
  model = result
94
 
95
- # Format messages with the c4ai-command-a-03-2025 chat template
 
 
 
96
  messages = [{"role": "user", "content": message}]
97
  input_ids = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True)
98
 
99
- # Generate response
100
  gen_tokens = model.generate(
101
  input_ids,
102
  max_new_tokens=100,
103
  do_sample=True,
104
  temperature=0.3,
 
 
105
  )
106
 
107
  # Decode response
108
- gen_text = tokenizer.decode(gen_tokens[0])
109
 
110
  # Format the full response with system info
111
  formatted_response = f"{system_info}{gen_text}"
112
  history.append((message, formatted_response))
113
  return history
114
  except Exception as e:
115
- return [(message, f"{system_info}Error during chat: {str(e)}")]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
 
117
- # Create the Gradio interface with both chat and status check
118
  with gr.Blocks(theme=gr.themes.Default()) as demo:
119
- gr.Markdown(f"# Medical Decision Support AI\n{format_system_info()}")
120
 
121
  with gr.Row():
122
- status_btn = gr.Button("Check Access Status")
123
- status_output = gr.Textbox(label="Access Status", lines=6)
124
 
125
  chat_interface = gr.ChatInterface(
126
  fn=chat,
@@ -132,11 +112,20 @@ with gr.Blocks(theme=gr.themes.Default()) as demo:
132
  ]
133
  )
134
 
135
- status_btn.click(check_access_status, outputs=status_output)
136
 
137
- # Perform initial access check
138
- access_granted, status_message = verify_model_access()
139
- if not access_granted:
140
- gr.Warning(status_message)
 
 
 
 
 
 
 
 
 
141
 
142
  demo.launch()
 
 
 
1
  import gradio as gr
2
  from transformers import AutoTokenizer, AutoModelForCausalLM
3
  from datetime import datetime, timezone
4
  import os
5
  from huggingface_hub import login, HfApi
6
  from huggingface_hub.utils import RepositoryNotFoundError, HfHubHTTPError
7
+ import time
8
  import requests
9
+ from tenacity import retry, stop_after_attempt, wait_exponential
10
 
11
+ # Add retry decorator for connection attempts
12
+ @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  def initialize_model():
14
  try:
15
  token = os.getenv("HUGGING_FACE_HUB_TOKEN")
16
  if not token:
17
  return False, "No token found. Please set HUGGING_FACE_HUB_TOKEN in Space secrets.", None
18
 
19
+ # Force re-login to refresh connection
20
+ login(token=token, add_to_git_credential=False)
21
 
22
+ # Initialize with device mapping and low memory settings
23
  model_id = "CohereLabs/c4ai-command-a-03-2025"
24
  tokenizer = AutoTokenizer.from_pretrained(
25
  model_id,
26
+ token=token,
27
+ use_fast=True
28
  )
29
  model = AutoModelForCausalLM.from_pretrained(
30
  model_id,
31
+ token=token,
32
+ device_map="auto",
33
+ low_cpu_mem_usage=True,
34
+ torch_dtype="auto"
35
  )
36
  return True, model, tokenizer
 
 
 
 
 
 
 
 
 
37
  except Exception as e:
38
+ return False, f"Error during initialization: {str(e)}", None
 
 
 
 
39
 
40
+ @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
41
  def chat(message, history):
42
  system_info = format_system_info()
43
 
 
 
 
 
 
 
 
 
44
  try:
45
  # Initialize model if not already done
46
  success, result, tokenizer = initialize_model()
 
48
  return [(message, f"{system_info}Error: {result}")]
49
  model = result
50
 
51
+ if history is None:
52
+ history = []
53
+
54
+ # Format messages with the chat template
55
  messages = [{"role": "user", "content": message}]
56
  input_ids = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True)
57
 
58
+ # Generate response with safety settings
59
  gen_tokens = model.generate(
60
  input_ids,
61
  max_new_tokens=100,
62
  do_sample=True,
63
  temperature=0.3,
64
+ pad_token_id=tokenizer.eos_token_id,
65
+ attention_mask=input_ids.new_ones(input_ids.shape)
66
  )
67
 
68
  # Decode response
69
+ gen_text = tokenizer.decode(gen_tokens[0], skip_special_tokens=True)
70
 
71
  # Format the full response with system info
72
  formatted_response = f"{system_info}{gen_text}"
73
  history.append((message, formatted_response))
74
  return history
75
  except Exception as e:
76
+ error_msg = f"{system_info}Error during chat: {str(e)}\nAttempting reconnection..."
77
+ if history is None:
78
+ history = []
79
+ history.append((message, error_msg))
80
+ return history
81
+
82
+ def check_connection():
83
+ timestamp = get_timestamp()
84
+ try:
85
+ token = os.getenv("HUGGING_FACE_HUB_TOKEN")
86
+ api = HfApi(token=token)
87
+ model_info = api.model_info("CohereLabs/c4ai-command-a-03-2025")
88
+ return f"""
89
+ {format_system_info()}
90
+ Connection Status: ✅ Connected
91
+ Model: {model_info.modelId}
92
+ Last Modified: {model_info.lastModified}
93
+ """
94
+ except Exception as e:
95
+ return f"{format_system_info()}Connection Status: ❌ Error\nDetails: {str(e)}"
96
 
97
+ # Create the Gradio interface with connection monitoring
98
  with gr.Blocks(theme=gr.themes.Default()) as demo:
99
+ gr.Markdown(f"# Medical Decision Support AI")
100
 
101
  with gr.Row():
102
+ connection_btn = gr.Button("Check Connection Status")
103
+ connection_status = gr.Textbox(label="Connection Status", lines=6)
104
 
105
  chat_interface = gr.ChatInterface(
106
  fn=chat,
 
112
  ]
113
  )
114
 
115
+ connection_btn.click(check_connection, outputs=connection_status)
116
 
117
+ # Check connection on startup
118
+ connection_status.value = check_connection()
119
+
120
+ # Add requirements to requirements.txt
121
+ requirements = """
122
+ gradio>=3.50.2
123
+ transformers
124
+ torch
125
+ accelerate
126
+ huggingface_hub
127
+ requests
128
+ tenacity
129
+ """
130
 
131
  demo.launch()