likhonhfai commited on
Commit
5bad6b6
·
verified ·
1 Parent(s): 40ec775

Refactor app.py: lazy imports and fallback to avoid missing dependencies

Browse files
Files changed (1) hide show
  1. app.py +23 -29
app.py CHANGED
@@ -1,13 +1,15 @@
1
  import gradio as gr
2
- from transformers import AutoModelForCausalLM, AutoTokenizer
3
- import torch
4
 
5
  MODEL_NAME = "likhonhfai/mysterious-coding-model"
6
 
7
-
8
  def load_model():
9
- """Attempt to load the CodeAI model. Returns (model, tokenizer) or (None, None) on failure."""
 
 
 
10
  try:
 
 
11
  model = AutoModelForCausalLM.from_pretrained(
12
  MODEL_NAME,
13
  torch_dtype=torch.float16,
@@ -17,24 +19,24 @@ def load_model():
17
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
18
  return model, tokenizer
19
  except Exception:
20
- # Fallback when the large model cannot be loaded due to resource constraints.
21
  return None, None
22
 
23
 
24
- # Load the model at module import time
25
  model, tokenizer = load_model()
26
 
27
 
28
  def respond(message, history):
29
- """Respond to user messages using the loaded model or a fallback."""
30
- # If the model is loaded successfully, generate a response from it.
 
 
31
  if model is not None and tokenizer is not None:
32
- # Build conversation prompt from history and the current message
33
  prompt = ""
34
  for user_msg, bot_msg in history:
35
  prompt += f"User: {user_msg}\nAssistant: {bot_msg}\n"
36
  prompt += f"User: {message}\nAssistant:"
37
- # Encode and generate output
38
  inputs = tokenizer.encode(prompt, return_tensors="pt").to(model.device)
39
  with torch.no_grad():
40
  output_ids = model.generate(
@@ -45,41 +47,33 @@ def respond(message, history):
45
  pad_token_id=tokenizer.eos_token_id,
46
  )
47
  output_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
48
- # Extract the assistant's response after the last 'Assistant:' marker
49
  if "Assistant:" in output_text:
50
- response = output_text.split("Assistant:")[-1].strip()
51
  else:
52
- response = output_text.strip()
53
- return response
54
 
55
  # Fallback responses when the model is unavailable
56
  lower = message.lower()
57
  if "hello" in lower:
58
  return (
59
- "Hello! I'm a placeholder chatbot while the full CodeAI model loads. "
60
- "Ask me about our capabilities like long-context processing, multimodal understanding, "
61
- "and advanced code generation."
62
  )
63
  if "code" in lower:
64
  return (
65
- "Our model specializes in coding tasks such as code generation, completion, bug fixing, "
66
- "refactoring, and documentation. For example, try asking: 'write a python function to add two numbers'."
67
  )
68
  if "image" in lower:
69
- return (
70
- "The CodeAI model supports image understanding tasks like visual question answering and image captioning."
71
- )
72
  if "audio" in lower or "speech" in lower:
73
- return (
74
- "Our model can process audio for speech recognition and audio understanding tasks."
75
- )
76
  if "thanks" in lower or "thank you" in lower:
77
- return "You're welcome! Let me know if you have any more questions."
78
- # Default fallback summary
79
  return (
80
  "This is a demo placeholder response. The CodeAI model uses safetensors storage, supports 8-bit and mxfp4 "
81
- "mixed-precision variants, is compatible with the vLLM inference engine, and is trained using Hugging Face AutoTrain. "
82
- "It can handle long contexts (up to 200,000 tokens) and perform text, image, audio, and multimodal reasoning tasks."
83
  )
84
 
85
 
 
1
  import gradio as gr
 
 
2
 
3
  MODEL_NAME = "likhonhfai/mysterious-coding-model"
4
 
 
5
  def load_model():
6
+ """
7
+ Attempt to lazily import transformers and torch and load the CodeAI model.
8
+ Returns (model, tokenizer) if loaded successfully, otherwise (None, None).
9
+ """
10
  try:
11
+ from transformers import AutoModelForCausalLM, AutoTokenizer
12
+ import torch
13
  model = AutoModelForCausalLM.from_pretrained(
14
  MODEL_NAME,
15
  torch_dtype=torch.float16,
 
19
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
20
  return model, tokenizer
21
  except Exception:
 
22
  return None, None
23
 
24
 
25
+ # Load the model once at startup
26
  model, tokenizer = load_model()
27
 
28
 
29
  def respond(message, history):
30
+ """
31
+ Generate a response using the loaded model or provide a placeholder message.
32
+ """
33
+ # If the model is available, generate a response using it
34
  if model is not None and tokenizer is not None:
35
+ import torch # Safe to import since it was available during model loading
36
  prompt = ""
37
  for user_msg, bot_msg in history:
38
  prompt += f"User: {user_msg}\nAssistant: {bot_msg}\n"
39
  prompt += f"User: {message}\nAssistant:"
 
40
  inputs = tokenizer.encode(prompt, return_tensors="pt").to(model.device)
41
  with torch.no_grad():
42
  output_ids = model.generate(
 
47
  pad_token_id=tokenizer.eos_token_id,
48
  )
49
  output_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
 
50
  if "Assistant:" in output_text:
51
+ return output_text.split("Assistant:")[-1].strip()
52
  else:
53
+ return output_text.strip()
 
54
 
55
  # Fallback responses when the model is unavailable
56
  lower = message.lower()
57
  if "hello" in lower:
58
  return (
59
+ "Hello! I'm a placeholder chatbot while the full CodeAI model loads. Ask me about long-context processing, "
60
+ "multimodal understanding, or code generation."
 
61
  )
62
  if "code" in lower:
63
  return (
64
+ "Our model excels at code generation, completion, bug fixing, refactoring and documentation. "
65
+ "Try asking: 'write a python function to add two numbers'."
66
  )
67
  if "image" in lower:
68
+ return "The CodeAI model supports image understanding tasks like visual question answering and image captioning."
 
 
69
  if "audio" in lower or "speech" in lower:
70
+ return "Our model can process audio for speech recognition and audio understanding."
 
 
71
  if "thanks" in lower or "thank you" in lower:
72
+ return "You're welcome! Let me know if you have more questions."
 
73
  return (
74
  "This is a demo placeholder response. The CodeAI model uses safetensors storage, supports 8-bit and mxfp4 "
75
+ "mixed-precision variants, is compatible with the vLLM engine, and is trained using Hugging Face AutoTrain. "
76
+ "It handles long contexts (up to 200,000 tokens) and performs text, image, audio, and multimodal reasoning tasks."
77
  )
78
 
79