| import os | |
| from dotenv import load_dotenv | |
| # Load environment variables | |
| load_dotenv() | |
| GOOGLE_AI_STUDIO_API_KEY = os.environ.get("GOOGLE_AI_STUDIO_API_KEY") | |
| import base64 | |
| def encode_image(image_path): | |
| image_file=open(image_path, "rb") | |
| return base64.b64encode(image_file.read()).decode('utf-8') | |
| #Step3: Setup Multimodal LLM | |
| import google.generativeai as genai | |
| def analyze_image_with_query(query, model, encoded_image): | |
| genai.configure(api_key=GOOGLE_AI_STUDIO_API_KEY) | |
| # Setup the model | |
| gemini_model = genai.GenerativeModel(model) | |
| # Create the content with text and image | |
| image_parts = [ | |
| { | |
| "mime_type": "image/jpeg", | |
| "data": encoded_image | |
| } | |
| ] | |
| # Generate response | |
| response = gemini_model.generate_content( | |
| contents=[ | |
| query, | |
| image_parts[0] | |
| ] | |
| ) | |
| return response.text | |