import gradio as gr
import whisper
from huggingface_hub import InferenceClient
from gtts import gTTS
import torch
import os

# --- 1. YOUR COMPANY DATA ---
COMPANY_KNOWLEDGE = """
You are the Senior Tech Consultant AI for 'SoftStream Tech', a software development agency.
Your goal is to answer client questions professionally, technically, and concisely.

RULES FOR ANSWERING:
1. Answer in 20 WORDS OR LESS.
2. Be direct. Do not use filler words like "Thank you for asking" or "I would be happy to help".
3. If the answer is a list, pick only the top 2 items.
4. If asked about price, give ranges, not exact quotes.
5. If you don't know the answer, say: "I'll need to check with a senior engineer on that."

DATA SHEET:
[Services]
- Custom Web Development: React, Vue, Next.js, Python (Django/FastAPI), Node.js.
- Mobile App Development: Flutter (Cross-platform), Swift (iOS), Kotlin (Android).
- AI & Machine Learning: Chatbots, Predictive Analytics, Computer Vision.
- Cloud DevOps: AWS, Google Cloud, Azure setup and CI/CD pipelines.

[Pricing Models]
- Time & Material (Hourly): $40 - $80 per hour depending on developer seniority.
- Fixed Price: Minimum project size is $5,000. Requires detailed scope.
- Retainer: Dedicated team for $4,000/month per developer.

[Process]
- Methodology: Agile/Scrum with 2-week sprints.
- Tools: We use Jira for tracking and Slack for communication.
- Timeline: MVP (Minimum Viable Product) usually takes 4-8 weeks.

[Support & Maintenance]
- Standard Support: Bug fixing for 3 months after launch (Free).
- Premium SLA: 24/7 server monitoring and priority support ($500/month).

[Contact]
- Email: projects@softstream.tech
- Phone: +1-555-CODE-NOW
- Location: San Francisco, CA (but we work remote globally).
"""

# --- 2. SETUP ---
# 🔒 SECURITY: We read the token from Settings > Secrets
hf_token = os.getenv("HF_TOKEN")

client = InferenceClient(api_key=hf_token)

device = "cuda" if torch.cuda.is_available() else "cpu"
whisper_model = whisper.load_model("tiny", device=device)

def voice_chat(audio, history):
    if audio is None:
        return None, "", history

    # A. Initialize History
    if not history:
        history = [{"role": "system", "content": COMPANY_KNOWLEDGE}]

    try:
        # B. Hear (Whisper)
        transcription = whisper_model.transcribe(audio, fp16=False)["text"]

        # C. Think (AI with Context)
        # We enforce the brevity rule here again to be safe
        history.append({"role": "user", "content": transcription + " (Answer in 20 words max)"})

        # --- UPDATED MODEL HERE ---
        response = client.chat.completions.create(
            model="Qwen/Qwen2.5-7B-Instruct", # Currently the best free/stable model
            messages=history,
            max_tokens=60,  # Strict limit to prevent long monologues
            temperature=0.4 # Low temp = more robotic and precise
        )
        ai_text = response.choices[0].message.content

        history.append({"role": "assistant", "content": ai_text})

        # D. Speak (gTTS)
        tts = gTTS(text=ai_text, lang='en')
        audio_path = "response.mp3"
        tts.save(audio_path)

        return audio_path, ai_text, history

    except Exception as e:
        return None, f"Error: {str(e)}", history

# --- 3. INTERFACE ---
with gr.Blocks(theme="soft") as demo:
    gr.Markdown("## 🏢 SoftStream Tech AI Consultant")

    conversation_history = gr.State([])

    with gr.Row():
        input_audio = gr.Audio(sources=["microphone"], type="filepath", label="Ask about our services")

    with gr.Row():
        output_audio = gr.Audio(label="AI Response (Under 25s)", autoplay=True)
        output_text = gr.Textbox(label="Transcript")

    clear_btn = gr.Button("Reset Conversation")

    input_audio.change(
        voice_chat,
        inputs=[input_audio, conversation_history],
        outputs=[output_audio, output_text, conversation_history]
    )

    clear_btn.click(lambda: ([], None, ""), outputs=[conversation_history, output_audio, output_text])

demo.launch()