Spaces:

sravan837
/

Personality-Engine

Sleeping

App Files Files Community

Personality-Engine / app.py

sravan837

Update app.py

dfbb592 verified 24 days ago

raw

history blame contribute delete

8.25 kB

	import gradio as gr
	import json
	import asyncio
	import edge_tts
	import re
	import os
	from huggingface_hub import InferenceClient

	# --- SETTINGS ---
	# 1. BRAIN: Llama-3 (Text Generation)
	EXTRACTOR_MODEL = "meta-llama/Meta-Llama-3-8B-Instruct"
	PERSONALITY_MODEL = "meta-llama/Meta-Llama-3-8B-Instruct"

	# 2. EARS: Whisper (Speech-to-Text)
	STT_MODEL = "openai/whisper-large-v3-turbo"

	# Default Chat History
	DEFAULT_LOGS = """
	1. User: I feel tired after big parties. I need to be alone to recharge.
	2. User: I like ideas more than real-world details.
	3. User: My desk is messy, but I know where my stuff is.
	4. User: I worry that I said the wrong thing.
	5. User: I like to plan ahead. Surprises stress me out.
	6. User: It is hard for me to understand why people cry over small things.
	7. User: I start many hobbies but do not finish them.
	8. User: I feel bad when someone criticizes me.
	9. User: I take charge in groups to make sure work is done right.
	10. User: Logic is more important than feelings.
	11. User: I daydream a lot.
	12. User: I hate fighting. I want everyone to get along.
	13. User: I help others even if it hurts me.
	14. User: Boring tasks make me sleepy.
	15. User: I need proof before I believe something.
	16. User: I love being the center of attention.
	17. User: I am bad at talking about my feelings.
	18. User: I wait until the last minute to do work.
	19. User: Music makes me feel strong emotions.
	20. User: I prefer 2 close friends over 20 acquaintances.
	21. User: I cannot say "no" to people.
	22. User: I always analyze why people act the way they do.
	23. User: I like following rules and traditions.
	24. User: People say I am too serious.
	25. User: I have lots of energy when debating.
	26. User: I am scared of the future.
	27. User: I trust my gut feeling more than numbers.
	28. User: I work better alone.
	29. User: I hate losing games.
	30. User: I want to know my purpose in life.
	"""

	# --- HELPER: CLEAN TEXT ---
	def clean_text_for_audio(text):
	"""Removes (pause), laughs, etc. so the robot doesn't read them."""
	clean = re.sub(r'[\(\[\].?[\)\]\*]', '', text)
	return clean.strip()

	# --- PART 1: MEMORY EXTRACTOR ---
	def extract_memory(chat_logs, hf_token):
	if not hf_token:
	return "Error: Please paste your Hugging Face Token."

	client = InferenceClient(token=hf_token)

	system_prompt = """
	Read the chat logs. Create a simple User Profile in JSON format.
	Find these 3 things:
	1. "traits": Is the user Introverted? Organized? Anxious?
	2. "values": Do they care about Logic? Peace? Winning?
	3. "struggles": Do they procrastinate? Have social anxiety?
	Return ONLY valid JSON.
	"""

	messages = [
	{"role": "system", "content": system_prompt},
	{"role": "user", "content": chat_logs}
	]

	try:
	response = client.chat_completion(
	model=EXTRACTOR_MODEL,
	messages=messages,
	max_tokens=500,
	temperature=0.1
	)
	text = response.choices[0].message.content.strip()
	if "```" in text:
	text = text.replace("```json", "").replace("```", "")
	start = text.find("{")
	end = text.rfind("}") + 1
	return json.dumps(json.loads(text[start:end]), indent=2)
	except Exception as e:
	return json.dumps({"error": str(e)}, indent=2)

	# --- PART 2: THE EARS (Speech-to-Text) ---
	def transcribe_audio(audio_filepath, hf_token):
	"""
	Sends the user's recorded audio file to the Whisper model.
	Returns the text string.
	"""
	if not audio_filepath:
	return ""

	client = InferenceClient(token=hf_token)

	try:
	# Provide the file path directly to the API
	response = client.automatic_speech_recognition(
	model=STT_MODEL,
	audio=audio_filepath
	)
	return response.text
	except Exception as e:
	return f"Error listening: {str(e)}"

	# --- PART 3: PERSONALITY & VOICE ---
	async def generate_response_and_audio(text_input, audio_input, memory_json, persona, hf_token):
	if not hf_token:
	return "Error: Please paste your Hugging Face Token.", None

	# LOGIC: Did the user Type or Speak?
	user_message = ""
	if audio_input is not None:
	# If audio exists, convert it to text first
	user_message = transcribe_audio(audio_input, hf_token)
	else:
	# Otherwise use the typed text
	user_message = text_input

	if not user_message:
	return "Error: Please type something or record your voice.", None

	client = InferenceClient(token=hf_token)

	try:
	memory = json.loads(memory_json)
	except:
	memory = {}

	prompts = {
	"Calm Mentor": "Role: Wise Teacher. Tone: Calm, slow, patient. Advice: Focus on long-term growth.",
	"Witty Friend": "Role: Best Friend. Tone: Funny, fast, sarcastic. Advice: Make jokes and be relatable.",
	"Therapist": "Role: Counselor. Tone: Soft, kind, gentle. Advice: Validate their feelings."
	}

	context = f"""
	ABOUT THE USER:
	- Personality: {memory.get('traits', 'Unknown')}
	- Values: {memory.get('values', 'Unknown')}
	- Problems: {memory.get('struggles', 'Unknown')}
	"""

	messages = [
	{"role": "system", "content": f"{prompts[persona]}\n\n{context}"},
	{"role": "user", "content": user_message}
	]

	try:
	# A. Generate Text Response
	res = client.chat_completion(
	model=PERSONALITY_MODEL,
	messages=messages,
	max_tokens=250,
	temperature=0.7
	)
	text_reply = res.choices[0].message.content

	# B. Generate Audio Response
	spoken_text = clean_text_for_audio(text_reply)
	voice_map = {
	"Calm Mentor": "en-US-ChristopherNeural",
	"Witty Friend": "en-US-EricNeural",
	"Therapist": "en-US-AvaNeural"
	}

	output_file = "response.mp3"
	communicate = edge_tts.Communicate(spoken_text, voice_map.get(persona, "en-US-AriaNeural"))
	await communicate.save(output_file)

	# Return: (User's Transcribed Text, AI Response, Audio File)
	return f" You said: {user_message}\n\n AI: {text_reply}", output_file

	except Exception as e:
	return f"Error: {str(e)}", None

	# Wrapper for Gradio
	def process_interaction(text, audio, memory, persona, token):
	return asyncio.run(generate_response_and_audio(text, audio, memory, persona, token))

	# --- UI LAYOUT ---
	with gr.Blocks(title="Multimodal Personality Engine") as demo:
	gr.Markdown("Input: Text or Voice \| Output: Text + Voice")

	with gr.Row():
	token_input = gr.Textbox(label="Hugging Face Token (Required)", type="password")

	with gr.Row():
	# Column 1: Analyze
	with gr.Column():
	gr.Markdown("### 1. Memory Analysis")
	logs_input = gr.Textbox(label="History", value=DEFAULT_LOGS, lines=5)
	extract_btn = gr.Button("Create Profile")
	memory_output = gr.Code(label="Result (JSON)", language="json")
	extract_btn.click(extract_memory, inputs=[logs_input, token_input], outputs=memory_output)

	# Column 2: Chat
	with gr.Column():
	gr.Markdown("### 2. Chat with Agent")

	# INPUTS
	with gr.Tab("Type"):
	text_in = gr.Textbox(label="Type here...")
	with gr.Tab("Speak"):
	audio_in = gr.Audio(sources=["microphone"], type="filepath", label="Record here")

	persona_select = gr.Radio(["Calm Mentor", "Witty Friend", "Therapist"], label="Tone", value="Calm Mentor")
	send_btn = gr.Button("Send Message")

	# OUTPUTS
	text_out = gr.Textbox(label="Conversation Log", lines=4)
	audio_out = gr.Audio(label="AI Voice Response")

	send_btn.click(
	process_interaction,
	inputs=[text_in, audio_in, memory_output, persona_select, token_input],
	outputs=[text_out, audio_out]
	)

	if __name__ == "__main__":
	demo.queue().launch()