SynthScope

Sleeping

App Files Files Community

SynthScope / synthscope.py

Ifeanyi

Update synthscope.py

5971ceb verified 4 months ago

raw

history blame contribute delete

7.99 kB

	from google import genai
	from google.genai.types import Tool, GenerateContentConfig, GoogleSearch
	from google.genai import types
	from PIL import Image
	from io import BytesIO
	import wave
	import os

	api_key = os.getenv("GEMINI_API_KEY")
	client = genai.Client(api_key=api_key)

	google_search_tool = Tool(
	google_search = GoogleSearch()
	)

	model_id = "gemini-2.0-flash"

	def GoogleSearchImageGen(prompt, image_style, voices, language):
	"""
	Returns Google Search results as translated text, audio, and image

	Args:
	prompt: A natural language query for Google Search
	image_style: A list of different styles for the generated image
	voice: A list of different voices for the generated audio
	language: A list of different languages for the generated text and audio
	Returns:
	Text, image, audio.
	"""

	def wave_file(filename, pcm, channels=1, rate=24000, sample_width=2):
	with wave.open(filename, "wb") as wf:
	wf.setnchannels(channels)
	wf.setsampwidth(sample_width)
	wf.setframerate(rate)
	wf.writeframes(pcm)


	# define audio output voice
	select_voice = {
	"Kore": "Kore",
	"Zephyr": "Zephyr",
	"Orus": "Orus",
	"Fenrir": "Fenrir",
	"Charon": "Charon",
	"Umbriel": "Umbriel",
	"Schedar": "Schedar",
	"Iapetus": "Iapetus",
	"Puck": "Puck",
	"Gacrux": "Gacrux"
	}


	try:
	# Get search result to be displayed to the user
	response = client.models.generate_content(
	model=model_id,
	contents=prompt,
	config=GenerateContentConfig(
	tools=[google_search_tool],
	response_modalities=["TEXT"],
	)
	)

	# Initialize variables to avoid UnboundLocalError
	search_result = ""
	image = None

	# Extract search result
	for each in response.candidates[0].content.parts:
	if each.text:
	search_result += each.text


	# define style-specific prompts
	style_prompts = {
	"Comic": f"Convert the {search_result} into a well-crafted text-to-image prompt that generates a comic book-style image.",
	"Cartoon": f"Convert the {search_result} into a well-crafted text-to-image prompt that generates a cartoon-style image.",
	"Disney": f"Convert the {search_result} into a well-crafted text-to-image prompt that generates a Disney-style image.",
	"Anime": f"Convert the {search_result} into a well-crafted text-to-image prompt that generates an Anime-style image.",
	"Ghibli": f"Convert the {search_result} into a well-crafted text-to-image prompt that generates a Ghibli-style image.",
	"Victorian": f"Convert the {search_result} into a well-crafted text-to-image prompt that generates a Victorian-era image.",
	"Movie": f"Convert the {search_result} into a well-crafted text-to-image prompt that generates a Movie-style image.",
	"Star Wars": f"Convert the {search_result} into a well-crafted text-to-image prompt that generates a Star Wars-style image.",
	"Marvel": f"Convert the {search_result} into a well-crafted text-to-image prompt that generates a Marvel-style image.",
	"Van Gogh": f"Convert the {search_result} into a well-crafted text-to-image prompt that generates a Van Gogh-style image.",
	"Picasso": f"Convert the {search_result} into a well-crafted text-to-image prompt that generates a Picasso-style image"
	}

	# Check if the image_style is supported
	if image_style in style_prompts:
	image_gen_prompt = style_prompts[image_style]
	else:
	return "Invalid image style", None, None


	# define translation options
	translation_prompt = {
	"English": f"Read out the {search_result}",
	"Spanish": f"Translate {search_result} into Spanish. Return only the translated text.",
	"French": f"Translate {search_result} into French. Return only the translated text.",
	"German": f"Translate {search_result} into German. Return only the translated text.",
	"Italian": f"Translate {search_result} into Italian. Return only the translated text.",
	"Japanese": f"Translate {search_result} into Japanese. Return only the translated text.",
	"Tamil": f"Translate {search_result} into Tamil. Return only the translated text.",
	"Arabic": f"Translate {search_result} into Arabic. Return only the translated text.",
	"Russian": f"Translate {search_result} into Russian. Return only the translated text.",
	"Portuguese": f"Translate {search_result} Portuguese. Return only the translated text.",
	"Dutch": f"Translate {search_result} into Dutch. Return only the translated text.",
	"Thai": f"Translate {search_result} into Thai. Return only the translated text.",
	"Turkish": f"Translate {search_result} into Turkish. Return only the translated text.",
	"Romanian": f"Translate {search_result} into Romanian. Return only the translated text.",
	"Polish": f"Translate {search_result} into Romanian. Return only the translated text."
	}

	# updated search result
	trans_resp = client.models.generate_content(
	model=model_id,
	contents= translation_prompt[language]
	)


	# Generate audio from search result
	audio_resp = client.models.generate_content(
	model="gemini-2.5-flash-preview-tts",
	contents=trans_resp.text,
	config=types.GenerateContentConfig(
	response_modalities=["AUDIO"],
	speech_config=types.SpeechConfig(
	voice_config=types.VoiceConfig(
	prebuilt_voice_config=types.PrebuiltVoiceConfig(
	voice_name=select_voice[voices],
	)
	)
	),
	)
	)

	data = audio_resp.candidates[0].content.parts[0].inline_data.data

	audio_output_file = "out.wav"
	wave_file(audio_output_file, data)

	# Generate image prompt from search result
	output = client.models.generate_content(
	model=model_id,
	contents=image_gen_prompt,
	config=GenerateContentConfig(
	tools=[google_search_tool],
	response_modalities=["TEXT"],
	)
	)

	prompt_image = ""
	for single in output.candidates[0].content.parts:
	if single.text:
	prompt_image += single.text

	# Generate image
	if prompt_image: # Only generate image if we have a prompt
	response = client.models.generate_content(
	model="gemini-2.0-flash-preview-image-generation",
	contents=prompt_image,
	config=types.GenerateContentConfig(
	response_modalities=['TEXT', 'IMAGE']
	)
	)

	# Extract image from response
	for part in response.candidates[0].content.parts:
	if part.text is not None:
	pass # Handle text if needed
	elif part.inline_data is not None:
	image = Image.open(BytesIO(part.inline_data.data))
	break # Exit loop once we find the image

	except Exception as e:
	print(f"Error occurred: {e}")
	# Return default values in case of error
	return trans_resp.text or "No search result available", None, None

	return trans_resp.text, image, audio_output_file

	if __name__ == "__main__":
	GoogleSearchImageGen(
	prompt = "What is the current population of China?",
	image_style = "Star Wars",
	voice = "Zephyr",
	language = "Thai"
	)