Spaces:

pratham0011
/

ConversAI_AI-Voice-Chat-Assistant

Build error

App Files Files Community

pratham0011 commited on Jan 11, 2025

Commit

dc4a1e0

verified ·

1 Parent(s): e1834c5

Upload 8 files

Browse files

Files changed (3) hide show

services/qwen.py +18 -16
services/search.py +1 -1
services/whisper.py +20 -44

services/qwen.py CHANGED Viewed

@@ -2,10 +2,10 @@ import logging
 from typing import List, Dict, Optional, Tuple
 import torch
-from transformers import pipeline
-from transformers import pipeline
-from config.config import token, device, SYSTEM_PROMPT
 from services.whisper import generate_speech, transcribe
 from services.search import WebSearcher
@@ -19,13 +19,12 @@ model_kwargs = {
     "torch_dtype": torch.float32,
     'use_cache': True
 }
-client = pipeline(
-    "text-generation",
     model="Qwen/Qwen2.5-0.5B-Instruct",
-    token=token,
-    trust_remote_code=True,
-    device=device,
-    model_kwargs=model_kwargs
 )
 async def respond(
@@ -65,24 +64,27 @@ async def respond(
             if results:
                 search_context = "Based on search results:\n"
                 for result in results:
-                    snippet = result['content'][:500].strip()
                     search_context += f"{snippet}\n"
                 prompt = prompt.replace(SYSTEM_PROMPT, f"{SYSTEM_PROMPT}\n{search_context}")
         # Generate response
-        reply = client(
             prompt,
-            max_new_tokens=400,
             do_sample=True,
             temperature=0.7,
             top_p=0.9,
-            num_return_sequences=1
         )
         # Extract and clean assistant response
-        assistant_response = reply[0]['generated_text']
-        assistant_response = assistant_response.split("<|im_start|>assistant\n")[-1]
-        assistant_response = assistant_response.split("<|im_end|>")[0].strip()
         # Convert response to speech
         audio_path = await generate_speech(assistant_response)

 from typing import List, Dict, Optional, Tuple
 import torch
+# from transformers import pipeline
+from huggingface_hub import InferenceClient
+from config.config import token, SYSTEM_PROMPT
 from services.whisper import generate_speech, transcribe
 from services.search import WebSearcher
     "torch_dtype": torch.float32,
     'use_cache': True
 }
+client = InferenceClient(
     model="Qwen/Qwen2.5-0.5B-Instruct",
+    token=token
+    # trust_remote_code=True,
+    # device=device,
+    # model_kwargs=model_kwargs
 )
 async def respond(
             if results:
                 search_context = "Based on search results:\n"
                 for result in results:
+                    snippet = result['content'][:5000].strip()
                     search_context += f"{snippet}\n"
                 prompt = prompt.replace(SYSTEM_PROMPT, f"{SYSTEM_PROMPT}\n{search_context}")
         # Generate response
+        reply = client.text_generation(
             prompt,
+            max_new_tokens=300,
             do_sample=True,
             temperature=0.7,
             top_p=0.9,
+            return_full_text=False
         )
         # Extract and clean assistant response
+        assistant_response = reply  # Reply is already the generated text string
+        if "<|im_start|>assistant\n" in assistant_response:
+            assistant_response = assistant_response.split("<|im_start|>assistant\n")[-1]
+        if "<|im_end|>" in assistant_response:
+            assistant_response = assistant_response.split("<|im_end|>")[0]
+        assistant_response = assistant_response.strip()
         # Convert response to speech
         audio_path = await generate_speech(assistant_response)

services/search.py CHANGED Viewed

@@ -40,7 +40,7 @@ class WebSearcher:
                     search_url,
                     headers=self.headers,
                     params=params,
-                    timeout=10,
                     verify=False
                 )
                 response.raise_for_status()

                     search_url,
                     headers=self.headers,
                     params=params,
+                    timeout=3,
                     verify=False
                 )
                 response.raise_for_status()

services/whisper.py CHANGED Viewed

@@ -1,29 +1,19 @@
 import os
 import tempfile
 import logging
 from typing import Optional
-import torch
-import librosa
 import edge_tts
-from transformers import WhisperProcessor, WhisperForConditionalGeneration
-from config.config import VOICE, FALLBACK_VOICES
 logger = logging.getLogger(__name__)
 # Whisper model for speech to text
-processor = WhisperProcessor.from_pretrained(
-    "openai/whisper-tiny",
-    local_files_only=False
-)
-model = WhisperForConditionalGeneration.from_pretrained(
-    "openai/whisper-tiny",
-    local_files_only=False,
-    low_cpu_mem_usage=True,
-    torch_dtype=torch.float32,
-).to("cpu")
 # Voice selection handling
 async def get_valid_voice() -> str:
@@ -59,34 +49,20 @@ async def generate_speech(text: str) -> Optional[str]:
 # Speech-to-text using Whisper
 async def transcribe(audio_file: str) -> str:
-    audio, sr = librosa.load(
-        audio_file,
-        sr=16000,
-        mono=True,
-        duration=30
-    )
-    inputs = processor(
-        audio,
-        sampling_rate=sr,
-        return_tensors="pt",
-        return_attention_mask=True
-    ).to(model.device)
-    with torch.no_grad():
-        generated_ids = model.generate(
-            input_features=inputs.input_features,
-            attention_mask=inputs.attention_mask,
-            language="en",
-            task="transcribe",
-            max_length=448,
-            temperature=0.0
-        )
-        transcription = processor.batch_decode(
-            generated_ids,
-            skip_special_tokens=True
-        )[0].strip()
-    logger.info(f"Transcribed text: {transcription}")
-    return transcription

 import os
 import tempfile
 import logging
+import requests
 from typing import Optional
 import edge_tts
+from config.config import VOICE, FALLBACK_VOICES, token
 logger = logging.getLogger(__name__)
 # Whisper model for speech to text
+API_URL = "https://api-inference.huggingface.co/models/openai/whisper-tiny"
+headers = {"Authorization": f"Bearer {token}"}
 # Voice selection handling
 async def get_valid_voice() -> str:
 # Speech-to-text using Whisper
 async def transcribe(audio_file: str) -> str:
+    try:
+        with open(audio_file, "rb") as f:
+            data = f.read()
+        response = requests.post(API_URL, headers=headers, data=data)
+        result = response.json()
+        if "text" in result:
+            transcription = result["text"].strip()
+            logger.info(f"Transcribed text: {transcription}")
+            return transcription
+        else:
+            raise ValueError("No transcription in response")
+    except Exception as e:
+        logger.error(f"Transcription error: {str(e)}")
+        raise RuntimeError(f"Failed to transcribe audio: {str(e)}")