Spaces:
Paused
Paused
| import sys, os | |
| import traceback | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| import os, io | |
| sys.path.insert( | |
| 0, os.path.abspath("../..") | |
| ) # Adds the parent directory to the system path | |
| import pytest, asyncio | |
| import litellm | |
| from litellm import embedding, completion, completion_cost, Timeout, acompletion | |
| from litellm import RateLimitError | |
| import json | |
| import os | |
| import tempfile | |
| litellm.num_retries = 3 | |
| litellm.cache = None | |
| user_message = "Write a short poem about the sky" | |
| messages = [{"content": user_message, "role": "user"}] | |
| def load_vertex_ai_credentials(): | |
| # Define the path to the vertex_key.json file | |
| print("loading vertex ai credentials") | |
| filepath = os.path.dirname(os.path.abspath(__file__)) | |
| vertex_key_path = filepath + "/vertex_key.json" | |
| # Read the existing content of the file or create an empty dictionary | |
| try: | |
| with open(vertex_key_path, "r") as file: | |
| # Read the file content | |
| print("Read vertexai file path") | |
| content = file.read() | |
| # If the file is empty or not valid JSON, create an empty dictionary | |
| if not content or not content.strip(): | |
| service_account_key_data = {} | |
| else: | |
| # Attempt to load the existing JSON content | |
| file.seek(0) | |
| service_account_key_data = json.load(file) | |
| except FileNotFoundError: | |
| # If the file doesn't exist, create an empty dictionary | |
| service_account_key_data = {} | |
| # Update the service_account_key_data with environment variables | |
| private_key_id = os.environ.get("VERTEX_AI_PRIVATE_KEY_ID", "") | |
| private_key = os.environ.get("VERTEX_AI_PRIVATE_KEY", "") | |
| private_key = private_key.replace("\\n", "\n") | |
| service_account_key_data["private_key_id"] = private_key_id | |
| service_account_key_data["private_key"] = private_key | |
| # Create a temporary file | |
| with tempfile.NamedTemporaryFile(mode="w+", delete=False) as temp_file: | |
| # Write the updated content to the temporary file | |
| json.dump(service_account_key_data, temp_file, indent=2) | |
| # Export the temporary file as GOOGLE_APPLICATION_CREDENTIALS | |
| os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = os.path.abspath(temp_file.name) | |
| async def get_response(): | |
| load_vertex_ai_credentials() | |
| prompt = '\ndef count_nums(arr):\n """\n Write a function count_nums which takes an array of integers and returns\n the number of elements which has a sum of digits > 0.\n If a number is negative, then its first signed digit will be negative:\n e.g. -123 has signed digits -1, 2, and 3.\n >>> count_nums([]) == 0\n >>> count_nums([-1, 11, -11]) == 1\n >>> count_nums([1, 1, 2]) == 3\n """\n' | |
| try: | |
| response = await acompletion( | |
| model="gemini-pro", | |
| messages=[ | |
| { | |
| "role": "system", | |
| "content": "Complete the given code with no more explanation. Remember that there is a 4-space indent before the first line of your generated code.", | |
| }, | |
| {"role": "user", "content": prompt}, | |
| ], | |
| ) | |
| return response | |
| except litellm.UnprocessableEntityError as e: | |
| pass | |
| except Exception as e: | |
| pytest.fail(f"An error occurred - {str(e)}") | |
| def test_vertex_ai(): | |
| import random | |
| load_vertex_ai_credentials() | |
| test_models = ( | |
| litellm.vertex_chat_models | |
| + litellm.vertex_code_chat_models | |
| + litellm.vertex_text_models | |
| + litellm.vertex_code_text_models | |
| ) | |
| litellm.set_verbose = False | |
| litellm.vertex_project = "reliablekeys" | |
| test_models = random.sample(test_models, 1) | |
| # test_models += litellm.vertex_language_models # always test gemini-pro | |
| test_models = litellm.vertex_language_models # always test gemini-pro | |
| for model in test_models: | |
| try: | |
| if model in [ | |
| "code-gecko", | |
| "code-gecko@001", | |
| "code-gecko@002", | |
| "code-gecko@latest", | |
| "code-bison@001", | |
| "text-bison@001", | |
| ]: | |
| # our account does not have access to this model | |
| continue | |
| print("making request", model) | |
| response = completion( | |
| model=model, | |
| messages=[{"role": "user", "content": "hi"}], | |
| temperature=0.7, | |
| ) | |
| print("\nModel Response", response) | |
| print(response) | |
| assert type(response.choices[0].message.content) == str | |
| assert len(response.choices[0].message.content) > 1 | |
| except Exception as e: | |
| pytest.fail(f"Error occurred: {e}") | |
| # test_vertex_ai() | |
| def test_vertex_ai_stream(): | |
| load_vertex_ai_credentials() | |
| litellm.set_verbose = False | |
| litellm.vertex_project = "reliablekeys" | |
| import random | |
| test_models = ( | |
| litellm.vertex_chat_models | |
| + litellm.vertex_code_chat_models | |
| + litellm.vertex_text_models | |
| + litellm.vertex_code_text_models | |
| ) | |
| test_models = random.sample(test_models, 1) | |
| test_models += litellm.vertex_language_models # always test gemini-pro | |
| for model in test_models: | |
| try: | |
| if model in [ | |
| "code-gecko", | |
| "code-gecko@001", | |
| "code-gecko@002", | |
| "code-gecko@latest", | |
| "code-bison@001", | |
| "text-bison@001", | |
| ]: | |
| # our account does not have access to this model | |
| continue | |
| print("making request", model) | |
| response = completion( | |
| model=model, | |
| messages=[ | |
| {"role": "user", "content": "write 10 line code code for saying hi"} | |
| ], | |
| stream=True, | |
| ) | |
| completed_str = "" | |
| for chunk in response: | |
| print(chunk) | |
| content = chunk.choices[0].delta.content or "" | |
| print("\n content", content) | |
| completed_str += content | |
| assert type(content) == str | |
| # pass | |
| assert len(completed_str) > 4 | |
| except Exception as e: | |
| pytest.fail(f"Error occurred: {e}") | |
| # test_vertex_ai_stream() | |
| async def test_async_vertexai_response(): | |
| import random | |
| load_vertex_ai_credentials() | |
| test_models = ( | |
| litellm.vertex_chat_models | |
| + litellm.vertex_code_chat_models | |
| + litellm.vertex_text_models | |
| + litellm.vertex_code_text_models | |
| ) | |
| test_models = random.sample(test_models, 1) | |
| test_models += litellm.vertex_language_models # always test gemini-pro | |
| for model in test_models: | |
| print(f"model being tested in async call: {model}") | |
| if model in [ | |
| "code-gecko", | |
| "code-gecko@001", | |
| "code-gecko@002", | |
| "code-gecko@latest", | |
| "code-bison@001", | |
| "text-bison@001", | |
| ]: | |
| # our account does not have access to this model | |
| continue | |
| try: | |
| user_message = "Hello, how are you?" | |
| messages = [{"content": user_message, "role": "user"}] | |
| response = await acompletion( | |
| model=model, messages=messages, temperature=0.7, timeout=5 | |
| ) | |
| print(f"response: {response}") | |
| except litellm.Timeout as e: | |
| pass | |
| except Exception as e: | |
| pytest.fail(f"An exception occurred: {e}") | |
| # asyncio.run(test_async_vertexai_response()) | |
| async def test_async_vertexai_streaming_response(): | |
| import random | |
| load_vertex_ai_credentials() | |
| test_models = ( | |
| litellm.vertex_chat_models | |
| + litellm.vertex_code_chat_models | |
| + litellm.vertex_text_models | |
| + litellm.vertex_code_text_models | |
| ) | |
| test_models = random.sample(test_models, 1) | |
| test_models += litellm.vertex_language_models # always test gemini-pro | |
| for model in test_models: | |
| if model in [ | |
| "code-gecko", | |
| "code-gecko@001", | |
| "code-gecko@002", | |
| "code-gecko@latest", | |
| "code-bison@001", | |
| "text-bison@001", | |
| ]: | |
| # our account does not have access to this model | |
| continue | |
| try: | |
| user_message = "Hello, how are you?" | |
| messages = [{"content": user_message, "role": "user"}] | |
| response = await acompletion( | |
| model="gemini-pro", | |
| messages=messages, | |
| temperature=0.7, | |
| timeout=5, | |
| stream=True, | |
| ) | |
| print(f"response: {response}") | |
| complete_response = "" | |
| async for chunk in response: | |
| print(f"chunk: {chunk}") | |
| complete_response += chunk.choices[0].delta.content | |
| print(f"complete_response: {complete_response}") | |
| assert len(complete_response) > 0 | |
| except litellm.Timeout as e: | |
| pass | |
| except Exception as e: | |
| print(e) | |
| pytest.fail(f"An exception occurred: {e}") | |
| # asyncio.run(test_async_vertexai_streaming_response()) | |
| def test_gemini_pro_vision(): | |
| try: | |
| load_vertex_ai_credentials() | |
| litellm.set_verbose = True | |
| litellm.num_retries = 0 | |
| resp = litellm.completion( | |
| model="vertex_ai/gemini-pro-vision", | |
| messages=[ | |
| { | |
| "role": "user", | |
| "content": [ | |
| {"type": "text", "text": "Whats in this image?"}, | |
| { | |
| "type": "image_url", | |
| "image_url": { | |
| "url": "gs://cloud-samples-data/generative-ai/image/boats.jpeg" | |
| }, | |
| }, | |
| ], | |
| } | |
| ], | |
| ) | |
| print(resp) | |
| prompt_tokens = resp.usage.prompt_tokens | |
| # DO Not DELETE this ASSERT | |
| # Google counts the prompt tokens for us, we should ensure we use the tokens from the orignal response | |
| assert prompt_tokens == 263 # the gemini api returns 263 to us | |
| except Exception as e: | |
| import traceback | |
| traceback.print_exc() | |
| raise e | |
| # test_gemini_pro_vision() | |
| def gemini_pro_function_calling(): | |
| load_vertex_ai_credentials() | |
| tools = [ | |
| { | |
| "type": "function", | |
| "function": { | |
| "name": "get_current_weather", | |
| "description": "Get the current weather in a given location", | |
| "parameters": { | |
| "type": "object", | |
| "properties": { | |
| "location": { | |
| "type": "string", | |
| "description": "The city and state, e.g. San Francisco, CA", | |
| }, | |
| "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]}, | |
| }, | |
| "required": ["location"], | |
| }, | |
| }, | |
| } | |
| ] | |
| messages = [{"role": "user", "content": "What's the weather like in Boston today?"}] | |
| completion = litellm.completion( | |
| model="gemini-pro", messages=messages, tools=tools, tool_choice="auto" | |
| ) | |
| print(f"completion: {completion}") | |
| # gemini_pro_function_calling() | |
| async def gemini_pro_async_function_calling(): | |
| load_vertex_ai_credentials() | |
| tools = [ | |
| { | |
| "type": "function", | |
| "function": { | |
| "name": "get_current_weather", | |
| "description": "Get the current weather in a given location", | |
| "parameters": { | |
| "type": "object", | |
| "properties": { | |
| "location": { | |
| "type": "string", | |
| "description": "The city and state, e.g. San Francisco, CA", | |
| }, | |
| "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]}, | |
| }, | |
| "required": ["location"], | |
| }, | |
| }, | |
| } | |
| ] | |
| messages = [{"role": "user", "content": "What's the weather like in Boston today?"}] | |
| completion = await litellm.acompletion( | |
| model="gemini-pro", messages=messages, tools=tools, tool_choice="auto" | |
| ) | |
| print(f"completion: {completion}") | |
| asyncio.run(gemini_pro_async_function_calling()) | |
| # Extra gemini Vision tests for completion + stream, async, async + stream | |
| # if we run into issues with gemini, we will also add these to our ci/cd pipeline | |
| # def test_gemini_pro_vision_stream(): | |
| # try: | |
| # litellm.set_verbose = False | |
| # litellm.num_retries=0 | |
| # print("streaming response from gemini-pro-vision") | |
| # resp = litellm.completion( | |
| # model = "vertex_ai/gemini-pro-vision", | |
| # messages=[ | |
| # { | |
| # "role": "user", | |
| # "content": [ | |
| # { | |
| # "type": "text", | |
| # "text": "Whats in this image?" | |
| # }, | |
| # { | |
| # "type": "image_url", | |
| # "image_url": { | |
| # "url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" | |
| # } | |
| # } | |
| # ] | |
| # } | |
| # ], | |
| # stream=True | |
| # ) | |
| # print(resp) | |
| # for chunk in resp: | |
| # print(chunk) | |
| # except Exception as e: | |
| # import traceback | |
| # traceback.print_exc() | |
| # raise e | |
| # test_gemini_pro_vision_stream() | |
| # def test_gemini_pro_vision_async(): | |
| # try: | |
| # litellm.set_verbose = True | |
| # litellm.num_retries=0 | |
| # async def test(): | |
| # resp = await litellm.acompletion( | |
| # model = "vertex_ai/gemini-pro-vision", | |
| # messages=[ | |
| # { | |
| # "role": "user", | |
| # "content": [ | |
| # { | |
| # "type": "text", | |
| # "text": "Whats in this image?" | |
| # }, | |
| # { | |
| # "type": "image_url", | |
| # "image_url": { | |
| # "url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" | |
| # } | |
| # } | |
| # ] | |
| # } | |
| # ], | |
| # ) | |
| # print("async response gemini pro vision") | |
| # print(resp) | |
| # asyncio.run(test()) | |
| # except Exception as e: | |
| # import traceback | |
| # traceback.print_exc() | |
| # raise e | |
| # test_gemini_pro_vision_async() | |
| # def test_gemini_pro_vision_async_stream(): | |
| # try: | |
| # litellm.set_verbose = True | |
| # litellm.num_retries=0 | |
| # async def test(): | |
| # resp = await litellm.acompletion( | |
| # model = "vertex_ai/gemini-pro-vision", | |
| # messages=[ | |
| # { | |
| # "role": "user", | |
| # "content": [ | |
| # { | |
| # "type": "text", | |
| # "text": "Whats in this image?" | |
| # }, | |
| # { | |
| # "type": "image_url", | |
| # "image_url": { | |
| # "url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" | |
| # } | |
| # } | |
| # ] | |
| # } | |
| # ], | |
| # stream=True | |
| # ) | |
| # print("async response gemini pro vision") | |
| # print(resp) | |
| # for chunk in resp: | |
| # print(chunk) | |
| # asyncio.run(test()) | |
| # except Exception as e: | |
| # import traceback | |
| # traceback.print_exc() | |
| # raise e | |
| # test_gemini_pro_vision_async() | |