VincentGOURBIN commited on
Commit
d061785
Β·
verified Β·
1 Parent(s): c29050e

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. src/ai/qwen_zerogpu_analyzer.py +52 -40
src/ai/qwen_zerogpu_analyzer.py CHANGED
@@ -1,22 +1,22 @@
1
  """
2
- Qwen model with ZeroGPU support for Hugging Face Spaces.
3
  Uses transformers with @spaces.GPU decorator.
4
  """
5
  import torch
6
  from typing import List, Dict
7
- from transformers import AutoTokenizer, AutoModelForCausalLM
8
  import spaces
9
 
10
 
11
  class QwenZeroGPUAnalyzer:
12
  """
13
  Qwen3 model analyzer with ZeroGPU support.
14
- Uses Qwen3-4B-Instruct for diagram generation.
15
  """
16
 
17
  def __init__(
18
  self,
19
- model_name: str = "Qwen/Qwen3-4B-Instruct"
20
  ):
21
  """
22
  Initialize the Qwen ZeroGPU analyzer.
@@ -26,30 +26,28 @@ class QwenZeroGPUAnalyzer:
26
  """
27
  self.model_name = model_name
28
  self.model = None
29
- self.tokenizer = None
30
 
31
  print(f"βœ“ Qwen ZeroGPU analyzer initialized (model will load on first inference)")
32
  print(f" Model: {self.model_name}")
33
 
34
  def _load_model(self):
35
- """Load model and tokenizer (called on first inference)."""
36
  if self.model is not None:
37
  return
38
 
39
  print(f"Loading model: {self.model_name}...")
40
 
41
- # Load tokenizer
42
- self.tokenizer = AutoTokenizer.from_pretrained(
43
- self.model_name,
44
- trust_remote_code=True
45
  )
46
 
47
- # Load model (will be moved to GPU by @spaces.GPU decorator)
48
- self.model = AutoModelForCausalLM.from_pretrained(
49
  self.model_name,
50
- torch_dtype=torch.bfloat16,
51
- device_map="auto",
52
- trust_remote_code=True
53
  )
54
 
55
  print(f"βœ“ Model loaded: {self.model_name}")
@@ -70,35 +68,49 @@ class QwenZeroGPUAnalyzer:
70
  if self.model is None:
71
  self._load_model()
72
 
73
- # Apply chat template
74
- prompt = self.tokenizer.apply_chat_template(
75
- conversation,
76
- tokenize=False,
77
- add_generation_prompt=True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  )
79
 
80
- # Tokenize
81
- inputs = self.tokenizer(prompt, return_tensors="pt")
82
- inputs = {k: v.to(self.model.device) for k, v in inputs.items()}
83
-
84
- # Generate with ZeroGPU
85
- with torch.no_grad():
86
- outputs = self.model.generate(
87
- **inputs,
88
- max_new_tokens=max_tokens,
89
- temperature=0.2, # Low temperature for consistent diagrams
90
- do_sample=False, # Greedy decoding for deterministic output
91
- pad_token_id=self.tokenizer.eos_token_id
92
- )
93
-
94
- # Decode response (skip input tokens)
95
- input_length = inputs["input_ids"].shape[1]
96
- response = self.tokenizer.decode(
97
- outputs[0][input_length:],
98
- skip_special_tokens=True
99
  )
100
 
101
- return response.strip()
102
 
103
  def cleanup_model(self):
104
  """Cleanup (managed by ZeroGPU)."""
 
1
  """
2
+ Qwen3-VL model with ZeroGPU support for Hugging Face Spaces.
3
  Uses transformers with @spaces.GPU decorator.
4
  """
5
  import torch
6
  from typing import List, Dict
7
+ from transformers import AutoProcessor, Qwen3VLForConditionalGeneration
8
  import spaces
9
 
10
 
11
  class QwenZeroGPUAnalyzer:
12
  """
13
  Qwen3 model analyzer with ZeroGPU support.
14
+ Uses Qwen3-VL-4B-Instruct for diagram generation.
15
  """
16
 
17
  def __init__(
18
  self,
19
+ model_name: str = "Qwen/Qwen3-VL-4B-Instruct"
20
  ):
21
  """
22
  Initialize the Qwen ZeroGPU analyzer.
 
26
  """
27
  self.model_name = model_name
28
  self.model = None
29
+ self.processor = None
30
 
31
  print(f"βœ“ Qwen ZeroGPU analyzer initialized (model will load on first inference)")
32
  print(f" Model: {self.model_name}")
33
 
34
  def _load_model(self):
35
+ """Load model and processor (called on first inference)."""
36
  if self.model is not None:
37
  return
38
 
39
  print(f"Loading model: {self.model_name}...")
40
 
41
+ # Load processor (for Qwen3-VL)
42
+ self.processor = AutoProcessor.from_pretrained(
43
+ self.model_name
 
44
  )
45
 
46
+ # Load model (Qwen3-VL model)
47
+ self.model = Qwen3VLForConditionalGeneration.from_pretrained(
48
  self.model_name,
49
+ torch_dtype="auto", # Use auto dtype like in official example
50
+ device_map="auto"
 
51
  )
52
 
53
  print(f"βœ“ Model loaded: {self.model_name}")
 
68
  if self.model is None:
69
  self._load_model()
70
 
71
+ # Format conversation for Qwen3-VL (text-only usage)
72
+ # Build prompt from conversation history
73
+ messages = []
74
+ for msg in conversation:
75
+ role = msg["role"]
76
+ content = msg["content"]
77
+
78
+ # Qwen3-VL expects specific format
79
+ messages.append({
80
+ "role": role,
81
+ "content": [{"type": "text", "text": content}]
82
+ })
83
+
84
+ # Apply chat template (following official example)
85
+ inputs = self.processor.apply_chat_template(
86
+ messages,
87
+ tokenize=True,
88
+ add_generation_prompt=True,
89
+ return_dict=True,
90
+ return_tensors="pt"
91
+ )
92
+ inputs = inputs.to(self.model.device)
93
+
94
+ # Generate with ZeroGPU (following official example)
95
+ generated_ids = self.model.generate(
96
+ **inputs,
97
+ max_new_tokens=max_tokens
98
  )
99
 
100
+ # Trim generated ids (remove input tokens)
101
+ generated_ids_trimmed = [
102
+ out_ids[len(in_ids):]
103
+ for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
104
+ ]
105
+
106
+ # Decode response
107
+ output_text = self.processor.batch_decode(
108
+ generated_ids_trimmed,
109
+ skip_special_tokens=True,
110
+ clean_up_tokenization_spaces=False
 
 
 
 
 
 
 
 
111
  )
112
 
113
+ return output_text[0].strip()
114
 
115
  def cleanup_model(self):
116
  """Cleanup (managed by ZeroGPU)."""