import gradio as gr import whisper from huggingface_hub import InferenceClient from gtts import gTTS import torch import os # --- 1. YOUR COMPANY DATA --- COMPANY_KNOWLEDGE = """ You are the Senior Tech Consultant AI for 'SoftStream Tech', a software development agency. Your goal is to answer client questions professionally, technically, and concisely. RULES FOR ANSWERING: 1. Answer in 20 WORDS OR LESS. 2. Be direct. Do not use filler words like "Thank you for asking" or "I would be happy to help". 3. If the answer is a list, pick only the top 2 items. 4. If asked about price, give ranges, not exact quotes. 5. If you don't know the answer, say: "I'll need to check with a senior engineer on that." DATA SHEET: [Services] - Custom Web Development: React, Vue, Next.js, Python (Django/FastAPI), Node.js. - Mobile App Development: Flutter (Cross-platform), Swift (iOS), Kotlin (Android). - AI & Machine Learning: Chatbots, Predictive Analytics, Computer Vision. - Cloud DevOps: AWS, Google Cloud, Azure setup and CI/CD pipelines. [Pricing Models] - Time & Material (Hourly): $40 - $80 per hour depending on developer seniority. - Fixed Price: Minimum project size is $5,000. Requires detailed scope. - Retainer: Dedicated team for $4,000/month per developer. [Process] - Methodology: Agile/Scrum with 2-week sprints. - Tools: We use Jira for tracking and Slack for communication. - Timeline: MVP (Minimum Viable Product) usually takes 4-8 weeks. [Support & Maintenance] - Standard Support: Bug fixing for 3 months after launch (Free). - Premium SLA: 24/7 server monitoring and priority support ($500/month). [Contact] - Email: projects@softstream.tech - Phone: +1-555-CODE-NOW - Location: San Francisco, CA (but we work remote globally). """ # --- 2. SETUP --- # 🔒 SECURITY: We read the token from Settings > Secrets hf_token = os.getenv("HF_TOKEN") client = InferenceClient(api_key=hf_token) device = "cuda" if torch.cuda.is_available() else "cpu" whisper_model = whisper.load_model("tiny", device=device) def voice_chat(audio, history): if audio is None: return None, "", history # A. Initialize History if not history: history = [{"role": "system", "content": COMPANY_KNOWLEDGE}] try: # B. Hear (Whisper) transcription = whisper_model.transcribe(audio, fp16=False)["text"] # C. Think (AI with Context) # We enforce the brevity rule here again to be safe history.append({"role": "user", "content": transcription + " (Answer in 20 words max)"}) # --- UPDATED MODEL HERE --- response = client.chat.completions.create( model="Qwen/Qwen2.5-7B-Instruct", # Currently the best free/stable model messages=history, max_tokens=60, # Strict limit to prevent long monologues temperature=0.4 # Low temp = more robotic and precise ) ai_text = response.choices[0].message.content history.append({"role": "assistant", "content": ai_text}) # D. Speak (gTTS) tts = gTTS(text=ai_text, lang='en') audio_path = "response.mp3" tts.save(audio_path) return audio_path, ai_text, history except Exception as e: return None, f"Error: {str(e)}", history # --- 3. INTERFACE --- with gr.Blocks(theme="soft") as demo: gr.Markdown("## 🏢 SoftStream Tech AI Consultant") conversation_history = gr.State([]) with gr.Row(): input_audio = gr.Audio(sources=["microphone"], type="filepath", label="Ask about our services") with gr.Row(): output_audio = gr.Audio(label="AI Response (Under 25s)", autoplay=True) output_text = gr.Textbox(label="Transcript") clear_btn = gr.Button("Reset Conversation") input_audio.change( voice_chat, inputs=[input_audio, conversation_history], outputs=[output_audio, output_text, conversation_history] ) clear_btn.click(lambda: ([], None, ""), outputs=[conversation_history, output_audio, output_text]) demo.launch()