kathirog commited on
Commit
8a3153b
·
verified ·
1 Parent(s): f404bdb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +91 -77
app.py CHANGED
@@ -4,9 +4,42 @@ from threading import Thread
4
 
5
  import gradio as gr
6
  import torch
7
- import pyttsx3
8
- import speech_recognition as sr
9
  from transformers import AutoTokenizer, TextIteratorStreamer, AutoModelForCausalLM
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
  # Convert voice input (audio) to text
12
  def voice_to_text(audio):
@@ -21,81 +54,62 @@ def voice_to_text(audio):
21
  text = "Could not request results from Google Speech Recognition service."
22
  return text
23
 
 
24
  # Convert text to speech (voice output)
25
  def text_to_voice(text):
26
- engine = pyttsx3.init()
27
- engine.save_to_file(text, 'response.mp3')
28
  engine.runAndWait()
29
- return 'response.mp3'
30
-
31
- # Model loading and configuration
32
- if __name__ == '__main__':
33
- parser = argparse.ArgumentParser(prog="SOCRATIC-CHATBOT", description="Socratic chatbot")
34
-
35
- parser.add_argument("--load-in-4bit",
36
- action="store_true",
37
- help="Load base model with 4bit quantization (requires GPU)")
38
-
39
- parser.add_argument("--server-port",
40
- type=int,
41
- default=2121,
42
- help="The port the chatbot server listens to")
43
-
44
- args = parser.parse_args()
45
-
46
- with gr.Blocks() as demo:
47
- chatbot = gr.Chatbot()
48
- msg = gr.Textbox()
49
- audio_input = gr.Audio(type="filepath", label="Audio Input (or leave blank to use text input)")
50
- clear = gr.Button("Clear")
51
-
52
- with urllib.request.urlopen(
53
- "https://raw.githubusercontent.com/GiovanniGatti/socratic-llm/kdd-2024/templates/inference.txt"
54
- ) as f:
55
- inference_prompt_template = f.read().decode('utf-8')
56
-
57
- device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
58
-
59
- model = AutoModelForCausalLM.from_pretrained(
60
- "eurecom-ds/Phi-3-mini-4k-socratic",
61
- torch_dtype=torch.bfloat16,
62
- load_in_4bit=args.load_in_4bit,
63
- trust_remote_code=True,
64
- device_map=device,
65
- )
66
-
67
- tokenizer = AutoTokenizer.from_pretrained("eurecom-ds/Phi-3-mini-4k-socratic")
68
- streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
69
-
70
- def user(user_message, history):
71
- return "", history + [[user_message, ""]]
72
-
73
- def bot(history, audio=None):
74
- user_query = ""
75
- if audio:
76
- # Convert audio to text
77
- user_query = voice_to_text(audio)
78
- else:
79
- user_query = "".join(f"Student: {s}\nTeacher: {t}\n" for s, t in history[:-1])
80
- last_query: str = history[-1][0]
81
- user_query += f"Student: {last_query}"
82
-
83
- content = inference_prompt_template.format(input=user_query)
84
- formatted = tokenizer.apply_chat_template(
85
- [{"role": "user", "content": content}], tokenize=False, add_generation_prompt=True
86
- )
87
-
88
- encoded_inputs = tokenizer([formatted], return_tensors="pt").to(device)
89
-
90
- thread = Thread(target=model.generate, kwargs=dict(encoded_inputs, max_new_tokens=250, streamer=streamer))
91
- thread.start()
92
-
93
- for word in streamer:
94
- history[-1][1] += word
95
- yield history, text_to_voice(history[-1][1])
96
-
97
- msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(bot, [chatbot, audio_input], [chatbot, gr.Audio()])
98
- clear.click(lambda: None, None, chatbot, queue=False)
99
-
100
- demo.queue()
101
- demo.launch(server_name="0.0.0.0", server_port=args.server_port)
 
4
 
5
  import gradio as gr
6
  import torch
 
 
7
  from transformers import AutoTokenizer, TextIteratorStreamer, AutoModelForCausalLM
8
+ import speech_recognition as sr
9
+ import pyttsx3
10
+ from huggingface_hub import InferenceClient
11
+
12
+ # API Key for Hugging Face Model
13
+ API_KEY = "YOUR_API_KEY_HERE" # Replace with your actual API key
14
+
15
+ # Initialize InferenceClient with the API key
16
+ client = InferenceClient("HuggingFaceH4/zephyr-7b-beta", token=API_KEY)
17
+
18
+ # Initialize text-to-speech engine
19
+ engine = pyttsx3.init()
20
+
21
+ # Load the model for Socratic chatbot
22
+ def load_model():
23
+ parser = argparse.ArgumentParser(prog="SOCRATIC-CHATBOT", description="Socratic chatbot")
24
+ parser.add_argument("--load-in-4bit", action="store_true", help="Load base model with 4bit quantization (requires GPU)")
25
+ parser.add_argument("--server-port", type=int, default=2121, help="The port the chatbot server listens to")
26
+ args = parser.parse_args()
27
+
28
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
29
+
30
+ model = AutoModelForCausalLM.from_pretrained(
31
+ "eurecom-ds/Phi-3-mini-4k-socratic",
32
+ torch_dtype=torch.bfloat16,
33
+ load_in_4bit=args.load_in_4bit,
34
+ trust_remote_code=True,
35
+ device_map=device,
36
+ )
37
+
38
+ tokenizer = AutoTokenizer.from_pretrained("eurecom-ds/Phi-3-mini-4k-socratic")
39
+ streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
40
+
41
+ return model, tokenizer, streamer, device
42
+
43
 
44
  # Convert voice input (audio) to text
45
  def voice_to_text(audio):
 
54
  text = "Could not request results from Google Speech Recognition service."
55
  return text
56
 
57
+
58
  # Convert text to speech (voice output)
59
  def text_to_voice(text):
60
+ audio_file = 'response.mp3'
61
+ engine.save_to_file(text, audio_file)
62
  engine.runAndWait()
63
+ return audio_file
64
+
65
+
66
+ # Respond with Socratic Chatbot logic and text-to-speech
67
+ def respond(message, history, audio_input=None):
68
+ if audio_input:
69
+ message = voice_to_text(audio_input) # Convert audio input to text if available
70
+
71
+ # Prepare the prompt for the Socratic model
72
+ user_query = "".join(f"Student: {s}\nTeacher: {t}\n" for s, t in history[:-1])
73
+ last_query: str = history[-1][0]
74
+ user_query += f"Student: {last_query}"
75
+
76
+ content = f"Teacher: {user_query}"
77
+
78
+ # Get the model's response
79
+ model, tokenizer, streamer, device = load_model()
80
+
81
+ formatted = tokenizer.apply_chat_template([{"role": "user", "content": content}], tokenize=False, add_generation_prompt=True)
82
+ encoded_inputs = tokenizer([formatted], return_tensors="pt").to(device)
83
+
84
+ thread = Thread(target=model.generate, kwargs=dict(encoded_inputs, max_new_tokens=250, streamer=streamer))
85
+ thread.start()
86
+
87
+ response = ""
88
+ for word in streamer:
89
+ response += word
90
+
91
+ # Convert response text to speech (audio output)
92
+ audio_output = text_to_voice(response)
93
+
94
+ return response, audio_output
95
+
96
+
97
+ # Gradio UI with text and audio input/output
98
+ def create_interface():
99
+ demo = gr.Interface(
100
+ fn=respond,
101
+ inputs=[
102
+ gr.Textbox(label="Text Input (or leave blank to use audio input)", placeholder="Enter your message here..."),
103
+ gr.Audio(type="filepath", label="Audio Input (or leave blank to use text input)"),
104
+ ],
105
+ outputs=[
106
+ gr.Textbox(label="Text Output"),
107
+ gr.Audio(label="Voice Output"),
108
+ ]
109
+ )
110
+
111
+ demo.launch()
112
+
113
+
114
+ if __name__ == "__main__":
115
+ create_interface()