Karim0111 commited on
Commit
4134e11
·
verified ·
1 Parent(s): a1c3e17

Update tools.py

Browse files
Files changed (1) hide show
  1. tools.py +21 -2
tools.py CHANGED
@@ -59,7 +59,26 @@ def use_vision_model(question: str, images: List[Image.Image]) -> str:
59
  }
60
  ]
61
 
62
- output = image_model(messages).content
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  print(f'Model returned: {output}')
64
  return output
65
 
@@ -327,5 +346,5 @@ def transcribe_audio(audio_file_path: str) -> str:
327
  """
328
  model_size: str = "small"
329
  model = whisper.load_model(model_size)
330
- result = model.transcribe(audio_file_path)
331
  return result['text']
 
59
  }
60
  ]
61
 
62
+ # Get the response and properly extract the content as a string
63
+ response = image_model(messages)
64
+
65
+ # Handle different response formats
66
+ if hasattr(response, 'content'):
67
+ output = response.content
68
+ # If content is a list, extract text from it
69
+ if isinstance(output, list):
70
+ text_parts = []
71
+ for item in output:
72
+ if isinstance(item, dict) and 'text' in item:
73
+ text_parts.append(item['text'])
74
+ elif isinstance(item, str):
75
+ text_parts.append(item)
76
+ output = ' '.join(text_parts) if text_parts else str(output)
77
+ elif not isinstance(output, str):
78
+ output = str(output)
79
+ else:
80
+ output = str(response)
81
+
82
  print(f'Model returned: {output}')
83
  return output
84
 
 
346
  """
347
  model_size: str = "small"
348
  model = whisper.load_model(model_size)
349
+ result = model.transcribe(audio_path)
350
  return result['text']