NaderAfshar commited on
Commit
d1cf1d1
·
1 Parent(s): 55b7d0c

updated code and implemented a new test: test_workflow

Browse files
Files changed (4) hide show
  1. requirements.txt +19 -16
  2. step4.py +10 -14
  3. test_audio.py +52 -0
  4. test_audio_delay.py +40 -0
requirements.txt CHANGED
@@ -1,20 +1,23 @@
1
- pytorch
2
- gradio ==5.20.1
3
- gradio_client ==1.7.2
4
  llama-parse
5
- llama-index ==0.12.23
6
- llama-index-cli ==0.4.1
7
- llama-index-core ==0.12.23.post2
8
- llama-index-embeddings-huggingface ==0.5.2
9
- llama-index-indices-managed-llama-cloud ==0.6.8
10
- llama-index-llms-cohere ==0.4.0
11
- llama-index-llms-openai
12
  llama-index-llms-groq
13
- llama-index-readers-file ==0.4.6
14
- llama-index-readers-llama-parse ==0.4.0
15
- llama-index-utils-workflow ==0.3.0
16
  #openai-whisper ==20240930
17
- llama-index-readers-whisper
18
- pydantic ==2.10.6
19
- pydantic_core ==2.27.2
20
  dotenv
 
 
 
 
1
+ torch
2
+ gradio
3
+ gradio_client
4
  llama-parse
5
+ llama-index
6
+ llama-index-cli
7
+ llama-index-core
8
+ llama-index-embeddings-huggingface
9
+ llama-index-indices-managed-llama-cloud
10
+ llama-index-llms-cohere
11
+ #llama-index-llms-openai
12
  llama-index-llms-groq
13
+ llama-index-readers-file
14
+ llama-index-readers-llama-parse
15
+ llama-index-utils-workflow
16
  #openai-whisper ==20240930
17
+ #llama-index-readers-whisper
18
+ pydantic
19
+ pydantic_core
20
  dotenv
21
+ #faster-whisper
22
+ whisper
23
+ ffmpeg-python
step4.py CHANGED
@@ -144,12 +144,21 @@ class RAGWorkflow(Workflow):
144
  # generate one query for each of the fields, and fire them off
145
  for field in fields:
146
  question = f"How would you answer this question about the candidate? <field>{field}</field>"
 
 
 
 
 
 
 
 
 
147
  ctx.send_event(QueryEvent(
148
  field=field,
149
  query=question
150
  ))
151
 
152
- # store the number of fields so we know how many to wait for later
153
  await ctx.set("total_fields", len(fields))
154
  print(f"\n DEBUG: total fields from Context : {len(fields)}")
155
 
@@ -228,19 +237,6 @@ async def main():
228
  application_form="data/fake_application_form.pdf"
229
  )
230
 
231
- '''
232
- print("DEBUG: Awaiting next event manually...")
233
- event = await handler.next_event()
234
- print(f"DEBUG: Received event - {event}")
235
-
236
- # Handle the first event if it's InputRequiredEvent
237
- if isinstance(event, InputRequiredEvent):
238
- print("We've filled in your form! Here are the results:\n")
239
- print(event.result)
240
- response = input(event.prefix)
241
- handler.ctx.send_event(HumanResponseEvent(response=response))
242
- '''
243
-
244
  print("DEBUG: Starting event stream...")
245
  async for event in handler.stream_events():
246
  print(f"DEBUG: Received event type {type(event).__name__}")
 
144
  # generate one query for each of the fields, and fire them off
145
  for field in fields:
146
  question = f"How would you answer this question about the candidate? <field>{field}</field>"
147
+ # Is there feedback? If so, add it to the query:
148
+ if hasattr(ev, "feedback"):
149
+ question += f"""
150
+ \nWe previously got feedback about how we answered the questions.
151
+ It might not be relevant to this particular field, but here it is:
152
+ <feedback>{ev.feedback}</feedback>
153
+ """
154
+ print("\n question : ", question)
155
+
156
  ctx.send_event(QueryEvent(
157
  field=field,
158
  query=question
159
  ))
160
 
161
+ # store the number of fields, so we know how many to wait for later
162
  await ctx.set("total_fields", len(fields))
163
  print(f"\n DEBUG: total fields from Context : {len(fields)}")
164
 
 
237
  application_form="data/fake_application_form.pdf"
238
  )
239
 
 
 
 
 
 
 
 
 
 
 
 
 
 
240
  print("DEBUG: Starting event stream...")
241
  async for event in handler.stream_events():
242
  print(f"DEBUG: Received event type {type(event).__name__}")
test_audio.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from llama_index.readers.whisper import WhisperReader
2
+ from faster_whisper import WhisperModel
3
+ import gradio as gr
4
+ from pathlib import Path
5
+ from dotenv import load_dotenv
6
+ import os
7
+
8
+ load_dotenv()
9
+ openai_api_key = os.getenv("OPENAI_API_KEY")
10
+
11
+ transcription_value = ""
12
+
13
+
14
+ def transcribe_speech(filepath):
15
+ if filepath is None:
16
+ gr.Warning("No audio found, please retry.")
17
+
18
+ model = WhisperModel("base", compute_type="float32")
19
+ segments, _ = model.transcribe(filepath)
20
+
21
+ return " ".join(segment.text for segment in segments)
22
+
23
+
24
+ def store_transcription(output):
25
+ global transcription_value
26
+ transcription_value = output
27
+ return output
28
+
29
+
30
+ mic_transcribe = gr.Interface(
31
+ fn=lambda x: store_transcription(transcribe_speech(x)),
32
+ inputs=gr.Audio(sources=["microphone"], type="filepath"),
33
+ outputs=gr.Textbox(label="Transcription")
34
+ )
35
+
36
+
37
+ test_interface = gr.Blocks()
38
+ with test_interface:
39
+ gr.TabbedInterface(
40
+ [mic_transcribe],
41
+ ["Transcribe Microphone"]
42
+ )
43
+
44
+ test_interface.launch(
45
+ share=True,
46
+ server_port=8000,
47
+ #prevent_thread_lock=True
48
+ )
49
+
50
+ print(transcription_value)
51
+
52
+ #test_interface.close()
test_audio_delay.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import whisper
3
+ import os
4
+
5
+
6
+ def transcribe_audio(audio_file):
7
+
8
+ if not os.path.exists(audio_file):
9
+ print(f"Cannot locate file: {audio_file}")
10
+ return "Error: Audio file not found!"
11
+ else:
12
+ print(f"Processing file: {audio_file}")
13
+
14
+ model = whisper.load_model("base")
15
+ result = model.transcribe(audio_file, fp16=False)
16
+ return result["text"]
17
+
18
+
19
+ def main():
20
+ audio_input = gr.Audio(sources=["microphone"], type="filepath")
21
+ output_text = gr.Textbox(label="Transcription")
22
+
23
+ iface = gr.Interface(fn=transcribe_audio,
24
+ inputs=audio_input,
25
+ outputs=output_text,
26
+ title="Audio Transcription App",
27
+ description="Record an audio file and hit the 'Submit' button"
28
+ )
29
+
30
+ iface.launch(
31
+ share=True,
32
+ debug=True,
33
+ ssr_mode=False,
34
+ server_port=7860,
35
+ #prevent_thread_lock=True
36
+ )
37
+
38
+
39
+ if __name__ == '__main__':
40
+ main()