Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import json | |
| import asyncio | |
| import edge_tts | |
| import re | |
| import os | |
| from huggingface_hub import InferenceClient | |
| # --- SETTINGS --- | |
| # 1. BRAIN: Llama-3 (Text Generation) | |
| EXTRACTOR_MODEL = "meta-llama/Meta-Llama-3-8B-Instruct" | |
| PERSONALITY_MODEL = "meta-llama/Meta-Llama-3-8B-Instruct" | |
| # 2. EARS: Whisper (Speech-to-Text) | |
| STT_MODEL = "openai/whisper-large-v3-turbo" | |
| # Default Chat History | |
| DEFAULT_LOGS = """ | |
| 1. User: I feel tired after big parties. I need to be alone to recharge. | |
| 2. User: I like ideas more than real-world details. | |
| 3. User: My desk is messy, but I know where my stuff is. | |
| 4. User: I worry that I said the wrong thing. | |
| 5. User: I like to plan ahead. Surprises stress me out. | |
| 6. User: It is hard for me to understand why people cry over small things. | |
| 7. User: I start many hobbies but do not finish them. | |
| 8. User: I feel bad when someone criticizes me. | |
| 9. User: I take charge in groups to make sure work is done right. | |
| 10. User: Logic is more important than feelings. | |
| 11. User: I daydream a lot. | |
| 12. User: I hate fighting. I want everyone to get along. | |
| 13. User: I help others even if it hurts me. | |
| 14. User: Boring tasks make me sleepy. | |
| 15. User: I need proof before I believe something. | |
| 16. User: I love being the center of attention. | |
| 17. User: I am bad at talking about my feelings. | |
| 18. User: I wait until the last minute to do work. | |
| 19. User: Music makes me feel strong emotions. | |
| 20. User: I prefer 2 close friends over 20 acquaintances. | |
| 21. User: I cannot say "no" to people. | |
| 22. User: I always analyze why people act the way they do. | |
| 23. User: I like following rules and traditions. | |
| 24. User: People say I am too serious. | |
| 25. User: I have lots of energy when debating. | |
| 26. User: I am scared of the future. | |
| 27. User: I trust my gut feeling more than numbers. | |
| 28. User: I work better alone. | |
| 29. User: I hate losing games. | |
| 30. User: I want to know my purpose in life. | |
| """ | |
| # --- HELPER: CLEAN TEXT --- | |
| def clean_text_for_audio(text): | |
| """Removes (pause), *laughs*, etc. so the robot doesn't read them.""" | |
| clean = re.sub(r'[\(\[\*].*?[\)\]\*]', '', text) | |
| return clean.strip() | |
| # --- PART 1: MEMORY EXTRACTOR --- | |
| def extract_memory(chat_logs, hf_token): | |
| if not hf_token: | |
| return "Error: Please paste your Hugging Face Token." | |
| client = InferenceClient(token=hf_token) | |
| system_prompt = """ | |
| Read the chat logs. Create a simple User Profile in JSON format. | |
| Find these 3 things: | |
| 1. "traits": Is the user Introverted? Organized? Anxious? | |
| 2. "values": Do they care about Logic? Peace? Winning? | |
| 3. "struggles": Do they procrastinate? Have social anxiety? | |
| Return ONLY valid JSON. | |
| """ | |
| messages = [ | |
| {"role": "system", "content": system_prompt}, | |
| {"role": "user", "content": chat_logs} | |
| ] | |
| try: | |
| response = client.chat_completion( | |
| model=EXTRACTOR_MODEL, | |
| messages=messages, | |
| max_tokens=500, | |
| temperature=0.1 | |
| ) | |
| text = response.choices[0].message.content.strip() | |
| if "```" in text: | |
| text = text.replace("```json", "").replace("```", "") | |
| start = text.find("{") | |
| end = text.rfind("}") + 1 | |
| return json.dumps(json.loads(text[start:end]), indent=2) | |
| except Exception as e: | |
| return json.dumps({"error": str(e)}, indent=2) | |
| # --- PART 2: THE EARS (Speech-to-Text) --- | |
| def transcribe_audio(audio_filepath, hf_token): | |
| """ | |
| Sends the user's recorded audio file to the Whisper model. | |
| Returns the text string. | |
| """ | |
| if not audio_filepath: | |
| return "" | |
| client = InferenceClient(token=hf_token) | |
| try: | |
| # Provide the file path directly to the API | |
| response = client.automatic_speech_recognition( | |
| model=STT_MODEL, | |
| audio=audio_filepath | |
| ) | |
| return response.text | |
| except Exception as e: | |
| return f"Error listening: {str(e)}" | |
| # --- PART 3: PERSONALITY & VOICE --- | |
| async def generate_response_and_audio(text_input, audio_input, memory_json, persona, hf_token): | |
| if not hf_token: | |
| return "Error: Please paste your Hugging Face Token.", None | |
| # LOGIC: Did the user Type or Speak? | |
| user_message = "" | |
| if audio_input is not None: | |
| # If audio exists, convert it to text first | |
| user_message = transcribe_audio(audio_input, hf_token) | |
| else: | |
| # Otherwise use the typed text | |
| user_message = text_input | |
| if not user_message: | |
| return "Error: Please type something or record your voice.", None | |
| client = InferenceClient(token=hf_token) | |
| try: | |
| memory = json.loads(memory_json) | |
| except: | |
| memory = {} | |
| prompts = { | |
| "Calm Mentor": "Role: Wise Teacher. Tone: Calm, slow, patient. Advice: Focus on long-term growth.", | |
| "Witty Friend": "Role: Best Friend. Tone: Funny, fast, sarcastic. Advice: Make jokes and be relatable.", | |
| "Therapist": "Role: Counselor. Tone: Soft, kind, gentle. Advice: Validate their feelings." | |
| } | |
| context = f""" | |
| ABOUT THE USER: | |
| - Personality: {memory.get('traits', 'Unknown')} | |
| - Values: {memory.get('values', 'Unknown')} | |
| - Problems: {memory.get('struggles', 'Unknown')} | |
| """ | |
| messages = [ | |
| {"role": "system", "content": f"{prompts[persona]}\n\n{context}"}, | |
| {"role": "user", "content": user_message} | |
| ] | |
| try: | |
| # A. Generate Text Response | |
| res = client.chat_completion( | |
| model=PERSONALITY_MODEL, | |
| messages=messages, | |
| max_tokens=250, | |
| temperature=0.7 | |
| ) | |
| text_reply = res.choices[0].message.content | |
| # B. Generate Audio Response | |
| spoken_text = clean_text_for_audio(text_reply) | |
| voice_map = { | |
| "Calm Mentor": "en-US-ChristopherNeural", | |
| "Witty Friend": "en-US-EricNeural", | |
| "Therapist": "en-US-AvaNeural" | |
| } | |
| output_file = "response.mp3" | |
| communicate = edge_tts.Communicate(spoken_text, voice_map.get(persona, "en-US-AriaNeural")) | |
| await communicate.save(output_file) | |
| # Return: (User's Transcribed Text, AI Response, Audio File) | |
| return f" You said: {user_message}\n\n AI: {text_reply}", output_file | |
| except Exception as e: | |
| return f"Error: {str(e)}", None | |
| # Wrapper for Gradio | |
| def process_interaction(text, audio, memory, persona, token): | |
| return asyncio.run(generate_response_and_audio(text, audio, memory, persona, token)) | |
| # --- UI LAYOUT --- | |
| with gr.Blocks(title="Multimodal Personality Engine") as demo: | |
| gr.Markdown("Input: **Text or Voice** | Output: **Text + Voice**") | |
| with gr.Row(): | |
| token_input = gr.Textbox(label="Hugging Face Token (Required)", type="password") | |
| with gr.Row(): | |
| # Column 1: Analyze | |
| with gr.Column(): | |
| gr.Markdown("### 1. Memory Analysis") | |
| logs_input = gr.Textbox(label="History", value=DEFAULT_LOGS, lines=5) | |
| extract_btn = gr.Button("Create Profile") | |
| memory_output = gr.Code(label="Result (JSON)", language="json") | |
| extract_btn.click(extract_memory, inputs=[logs_input, token_input], outputs=memory_output) | |
| # Column 2: Chat | |
| with gr.Column(): | |
| gr.Markdown("### 2. Chat with Agent") | |
| # INPUTS | |
| with gr.Tab("Type"): | |
| text_in = gr.Textbox(label="Type here...") | |
| with gr.Tab("Speak"): | |
| audio_in = gr.Audio(sources=["microphone"], type="filepath", label="Record here") | |
| persona_select = gr.Radio(["Calm Mentor", "Witty Friend", "Therapist"], label="Tone", value="Calm Mentor") | |
| send_btn = gr.Button("Send Message") | |
| # OUTPUTS | |
| text_out = gr.Textbox(label="Conversation Log", lines=4) | |
| audio_out = gr.Audio(label="AI Voice Response") | |
| send_btn.click( | |
| process_interaction, | |
| inputs=[text_in, audio_in, memory_output, persona_select, token_input], | |
| outputs=[text_out, audio_out] | |
| ) | |
| if __name__ == "__main__": | |
| demo.queue().launch() |