NaderAfshar commited on
Commit
c2cddf6
·
1 Parent(s): d1cf1d1

Updated app.py with functioning code. Voice input is also fixed.

Browse files
Files changed (4) hide show
  1. app.py +118 -186
  2. app2.py +0 -228
  3. requirements.txt +1 -0
  4. test_audio.py +4 -8
app.py CHANGED
@@ -1,178 +1,178 @@
1
- #!/usr/bin/env python
2
- # coding: utf-8
3
-
4
- # # Lesson 6: Use your voice
5
-
6
- # **Lesson objective**: Get voice feedback
7
- #
8
- # So far we've set up a moderately complex workflows with a human feedback loop. Let's run it through the visualizer to see what it looks like.
9
-
10
- # <div style="background-color:#fff1d7; padding:15px;"> <b> Note</b>: Make sure to run the notebook cell by cell. Please try to avoid running all cells at once.</div>
11
-
12
- # In[1]:
13
-
14
-
15
- # Warning control
16
- import warnings
17
- import os, json
18
- from llama_cloud_services import LlamaParse
19
- from llama_index.llms.cohere import Cohere
20
  from llama_index.embeddings.huggingface import HuggingFaceEmbedding
21
  from llama_index.core import (
22
  VectorStoreIndex,
23
  StorageContext,
24
  load_index_from_storage
25
  )
 
 
26
  from llama_index.core.workflow import (
27
  StartEvent,
28
  StopEvent,
29
  Workflow,
30
  step,
31
  Event,
32
- Context,
33
- InputRequiredEvent,
34
- HumanResponseEvent
35
  )
36
- from llama_index.utils.workflow import draw_all_possible_flows
37
- #import whisper
38
- from llama_index.readers.whisper import WhisperReader
39
- import gradio as gr
40
- import asyncio
41
- import nest_asyncio
42
  from queue import Queue
 
 
43
  from dotenv import load_dotenv
 
 
 
 
 
 
44
 
45
- # Load environment variables
46
  load_dotenv()
47
- CO_API_KEY = os.getenv("COHERE_API_KEY")
48
  llama_cloud_api_key = os.getenv("LLAMA_CLOUD_API_KEY")
 
49
  LLAMA_CLOUD_BASE_URL = os.getenv("LLAMA_CLOUD_BASE_URL")
50
- OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
51
-
52
- warnings.filterwarnings('ignore')
53
 
54
- nest_asyncio.apply() # Accommodate nested events.
55
-
56
- transcription_value = None
57
 
58
 
59
  class ParseFormEvent(Event):
60
  application_form: str
61
 
 
62
  class QueryEvent(Event):
63
  query: str
 
 
64
 
65
  class ResponseEvent(Event):
66
  response: str
67
 
 
 
68
  class FeedbackEvent(Event):
69
  feedback: str
70
 
 
71
  class GenerateQuestionsEvent(Event):
72
  pass
73
 
 
74
  class RAGWorkflow(Workflow):
75
  storage_dir = "./storage"
76
- llm: Cohere
77
  query_engine: VectorStoreIndex
78
 
79
  @step
80
  async def set_up(self, ctx: Context, ev: StartEvent) -> ParseFormEvent:
81
-
 
82
  if not ev.resume_file:
83
  raise ValueError("No resume file provided")
84
 
85
  if not ev.application_form:
86
  raise ValueError("No application form provided")
87
 
88
- # give ourselves an LLM to work with
89
- self.llm = Cohere(api_key=CO_API_KEY, model="command-r-plus")
90
-
91
- # ingest our data and set up the query engine
92
  if os.path.exists(self.storage_dir):
93
- # we've already ingested our documents
94
  storage_context = StorageContext.from_defaults(persist_dir=self.storage_dir)
95
  index = load_index_from_storage(storage_context)
96
  else:
97
- # we need to parse and load our documents
98
  documents = LlamaParse(
99
- api_key=llama_cloud_api_key,
100
- base_url=os.getenv("LLAMA_CLOUD_BASE_URL"),
101
  result_type="markdown",
102
- content_guideline_instruction="This is a resume, gather related facts together and format it as bullet points with headers"
 
103
  ).load_data(ev.resume_file)
104
  # embed and index the documents
105
  index = VectorStoreIndex.from_documents(
106
  documents,
107
- embed_model=HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
108
  )
109
  index.storage_context.persist(persist_dir=self.storage_dir)
110
 
111
- # either way, create a query engine
112
  self.query_engine = index.as_query_engine(llm=self.llm, similarity_top_k=5)
113
 
114
- # let's pass our application form to a new step where we parse it
 
 
115
  return ParseFormEvent(application_form=ev.application_form)
116
 
117
- # we've separated the form parsing from the question generation
118
  @step
119
  async def parse_form(self, ctx: Context, ev: ParseFormEvent) -> GenerateQuestionsEvent:
120
  parser = LlamaParse(
121
- api_key=llama_cloud_api_key,
122
- base_url=os.getenv("LLAMA_CLOUD_BASE_URL"),
123
  result_type="markdown",
124
- content_guideline_instruction="This is a job application form. Create a list of all the fields that need to be filled in.",
 
125
  formatting_instruction="Return a bulleted list of the fields ONLY."
126
  )
127
 
128
  # get the LLM to convert the parsed form into JSON
129
  result = parser.load_data(ev.application_form)[0]
130
  raw_json = self.llm.complete(
131
- f"This is a parsed form. Convert it into a JSON object containing only the list of fields to be filled in, in the form {{ fields: [...] }}. <form>{result.text}</form>. Return JSON ONLY, no markdown.")
 
 
 
 
 
 
132
  fields = json.loads(raw_json.text)["fields"]
133
 
134
  await ctx.set("fields_to_fill", fields)
 
135
 
136
  return GenerateQuestionsEvent()
137
 
138
- # this step can get triggered either by GenerateQuestionsEvent or a FeedbackEvent
139
  @step
140
  async def generate_questions(self, ctx: Context, ev: GenerateQuestionsEvent | FeedbackEvent) -> QueryEvent:
141
 
142
  # get the list of fields to fill in
143
  fields = await ctx.get("fields_to_fill")
 
144
 
145
  # generate one query for each of the fields, and fire them off
146
  for field in fields:
147
  question = f"How would you answer this question about the candidate? <field>{field}</field>"
148
-
149
- if hasattr(ev,"feedback"):
150
  question += f"""
151
- \nWe previously got feedback about how we answered the questions.
152
- It might not be relevant to this particular field, but here it is:
153
- <feedback>{ev.feedback}</feedback>
154
- """
 
155
 
156
  ctx.send_event(QueryEvent(
157
  field=field,
158
  query=question
159
  ))
160
 
161
- # store the number of fields so we know how many to wait for later
162
  await ctx.set("total_fields", len(fields))
 
 
163
  return
164
 
165
  @step
166
  async def ask_question(self, ctx: Context, ev: QueryEvent) -> ResponseEvent:
167
- print(f"Asking question: {ev.query}")
168
-
169
- response = self.query_engine.query(f"This is a question about the specific resume we have in our database: {ev.query}")
170
-
171
- print(f"Answer was: {str(response)}")
172
-
173
  return ResponseEvent(field=ev.field, response=response.response)
174
 
175
- # we now emit an InputRequiredEvent
176
  @step
177
  async def fill_in_application(self, ctx: Context, ev: ResponseEvent) -> InputRequiredEvent:
178
  # get the total number of fields to wait for
@@ -180,10 +180,11 @@ class RAGWorkflow(Workflow):
180
 
181
  responses = ctx.collect_events(ev, [ResponseEvent] * total_fields)
182
  if responses is None:
183
- return None # do nothing if there's nothing to do yet
184
 
185
  # we've got all the responses!
186
  responseList = "\n".join("Field: " + r.field + "\n" + "Response: " + r.response for r in responses)
 
187
 
188
  result = self.llm.complete(f"""
189
  You are given a list of fields in an application form and responses to
@@ -195,16 +196,20 @@ class RAGWorkflow(Workflow):
195
  </responses>
196
  """)
197
 
198
- # save the result for later
 
 
199
  await ctx.set("filled_form", str(result))
200
 
201
- # Let's get a human in the loop
 
 
202
  return InputRequiredEvent(
203
  prefix="How does this look? Give me any feedback you have on any of the answers.",
204
  result=result
205
  )
206
 
207
- # Accept the feedback.
208
  @step
209
  async def get_feedback(self, ctx: Context, ev: HumanResponseEvent) -> FeedbackEvent | StopEvent:
210
 
@@ -227,103 +232,17 @@ class RAGWorkflow(Workflow):
227
  return FeedbackEvent(feedback=ev.response)
228
 
229
 
230
-
231
- WORKFLOW_FILE = "workflows/RAG-EventDriven.html"
232
- draw_all_possible_flows(RAGWorkflow, filename=WORKFLOW_FILE)
233
-
234
-
235
- from IPython.display import display, HTML, DisplayHandle
236
- from helper import extract_html_content
237
-
238
- html_content = extract_html_content(WORKFLOW_FILE)
239
- display(HTML(html_content), metadata=dict(isolated=True))
240
-
241
-
242
- # Cool! You can see the path all the way to the end and the feedback loop is clear.
243
-
244
- # <p style="background-color:#f7fff8; padding:15px; border-width:3px; border-color:#e0f0e0; border-style:solid; border-radius:6px"> 🚨
245
- # &nbsp; <b>Different Run Results:</b> The output generated by AI chat models can vary with each execution due to their dynamic, probabilistic nature. Don't be surprised if your results differ from those shown in the video.</p>
246
-
247
- # ## Getting voice feedback
248
-
249
- # Now, just for fun, you'll do one more thing: change the feedback from text feedback to actual words spoken out loud. To do this we'll use a different model from OpenAI called Whisper. LlamaIndex has a built-in way to transcribe audio files into text using Whisper.
250
- #
251
- # Here's a function that takes a file and uses Whisper to return just the text:
252
-
253
-
254
  def transcribe_speech(filepath):
255
  if filepath is None:
256
  gr.Warning("No audio found, please retry.")
257
- audio_file= open(filepath, "rb")
258
- reader = WhisperReader(
259
- model="whisper-1",
260
- api_key=OPENAI_API_KEY,
261
- )
262
- documents = reader.load_data(filepath)
263
- return documents[0].text
264
-
265
 
266
- # But before we can use it, you need to capture some audio from your microphone. That involves some extra steps!
267
- #
268
- # First, create a callback function that saves data to a global variable.
269
 
 
270
 
271
- def store_transcription(output):
272
- global transcription_value
273
- transcription_value = output
274
- return output
275
 
276
-
277
- # Now use Gradio, which has special widgets that can render inside a notebook, to create an interface
278
- # for capturing audio from a microphone. When the audio is captured, it calls `transcribe_speech` on the recorded data,
279
- # and calls `store_transcription` on that.
280
-
281
-
282
- mic_transcribe = gr.Interface(
283
- fn=lambda x: store_transcription(transcribe_speech(x)),
284
- inputs=gr.Audio(sources=["microphone"],
285
- type="filepath"),
286
- outputs=gr.Textbox(label="Transcription"))
287
-
288
-
289
- # In Gradio, define a visual interface containing this microphone input and output, and then launch it:
290
-
291
- # Make sure to wait for the gradio interface to load. A popup window will appear and ask you to allow the use of your
292
- # microphone. To record audio, make sure to click on record -> stop -> submit. Make sure the audio is captured
293
- # before clicking on 'submit'.
294
-
295
-
296
- test_interface = gr.Blocks()
297
- with test_interface:
298
- gr.TabbedInterface(
299
- [mic_transcribe],
300
- ["Transcribe Microphone"]
301
- )
302
-
303
- test_interface.launch(
304
- share=True,
305
- show_error=True,
306
- server_port=8000,
307
- prevent_thread_lock=True
308
- )
309
-
310
-
311
- # You can now print out the transcription, which is stored in that global variable you created earlier:
312
-
313
-
314
- print(transcription_value)
315
-
316
-
317
- # run Gradio again, so it's a good idea to shut down the running Gradio interface.
318
-
319
-
320
-
321
- test_interface.close()
322
-
323
-
324
- # Make sure to run the previous cell to close the Gradio interface before running the next cell
325
-
326
- # Now create an entirely new class, a Transcription Handler.
327
  class TranscriptionHandler:
328
 
329
  # we create a queue to hold transcription values
@@ -358,7 +277,7 @@ class TranscriptionHandler:
358
  self.interface.launch(
359
  share=False,
360
  server_port=8000,
361
- inbrowser=True # open in a browser
362
  )
363
 
364
  # we poll every 1.5 seconds waiting for something to end up in the queue
@@ -371,29 +290,42 @@ class TranscriptionHandler:
371
  await asyncio.sleep(1.5)
372
 
373
 
374
- # Now you have a transcription handler, you can use it instead of the keyboard input interface when you're getting human input when you run your workflows:
 
 
 
 
 
375
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
376
 
 
 
 
377
 
378
- w = RAGWorkflow(timeout=600, verbose=False)
 
 
 
 
379
 
380
- handler = w.run(
381
- resume_file="./data/fake_resume.pdf",
382
- application_form="./data/fake_application_form.pdf"
383
- )
384
 
385
- async for event in handler.stream_events():
386
- if isinstance(event, InputRequiredEvent):
387
- # Get transcription
388
- transcription_handler = TranscriptionHandler()
389
- response = await transcription_handler.get_transcription()
390
-
391
- handler.ctx.send_event(
392
- HumanResponseEvent(
393
- response=response
394
- )
395
- )
396
-
397
- response = await handler
398
- print("Agent complete! Here's your final result:")
399
- print(str(response))
 
1
+ from helper import extract_html_content
2
+ from IPython.display import display, HTML
3
+ from llama_index.utils.workflow import draw_all_possible_flows
4
+ from llama_index.core.tools import FunctionTool
5
+ from llama_index.core.agent import FunctionCallingAgent
6
+ from llama_index.core import Settings
7
+ from llama_parse import LlamaParse
8
+ from llama_index.llms.groq import Groq
 
 
 
 
 
 
 
 
 
 
 
9
  from llama_index.embeddings.huggingface import HuggingFaceEmbedding
10
  from llama_index.core import (
11
  VectorStoreIndex,
12
  StorageContext,
13
  load_index_from_storage
14
  )
15
+ import nest_asyncio
16
+ from llama_index.core.workflow import InputRequiredEvent, HumanResponseEvent
17
  from llama_index.core.workflow import (
18
  StartEvent,
19
  StopEvent,
20
  Workflow,
21
  step,
22
  Event,
23
+ Context
 
 
24
  )
25
+ from pathlib import Path
 
 
 
 
 
26
  from queue import Queue
27
+ import gradio as gr
28
+ import whisper
29
  from dotenv import load_dotenv
30
+ import os, json
31
+ import asyncio
32
+
33
+ storage_dir = "./storage"
34
+ application_file = "./data/fake_application_form.pdf"
35
+ nest_asyncio.apply()
36
 
 
37
  load_dotenv()
 
38
  llama_cloud_api_key = os.getenv("LLAMA_CLOUD_API_KEY")
39
+ GROQ_API_KEY = os.getenv("GROQ_API_KEY")
40
  LLAMA_CLOUD_BASE_URL = os.getenv("LLAMA_CLOUD_BASE_URL")
 
 
 
41
 
42
+ global_llm = Groq(api_key=GROQ_API_KEY, model="llama3-70b-8192")
43
+ global_embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
44
+ Settings.embed_model = global_embed_model
45
 
46
 
47
  class ParseFormEvent(Event):
48
  application_form: str
49
 
50
+
51
  class QueryEvent(Event):
52
  query: str
53
+ field: str
54
+
55
 
56
  class ResponseEvent(Event):
57
  response: str
58
 
59
+
60
+ # new!
61
  class FeedbackEvent(Event):
62
  feedback: str
63
 
64
+
65
  class GenerateQuestionsEvent(Event):
66
  pass
67
 
68
+
69
  class RAGWorkflow(Workflow):
70
  storage_dir = "./storage"
71
+ llm: Groq
72
  query_engine: VectorStoreIndex
73
 
74
  @step
75
  async def set_up(self, ctx: Context, ev: StartEvent) -> ParseFormEvent:
76
+ self.llm = global_llm
77
+ self.storage_dir = storage_dir
78
  if not ev.resume_file:
79
  raise ValueError("No resume file provided")
80
 
81
  if not ev.application_form:
82
  raise ValueError("No application form provided")
83
 
84
+ # ingest the data and set up the query engine
 
 
 
85
  if os.path.exists(self.storage_dir):
86
+ # you've already ingested the resume document
87
  storage_context = StorageContext.from_defaults(persist_dir=self.storage_dir)
88
  index = load_index_from_storage(storage_context)
89
  else:
90
+ # parse and load the resume document
91
  documents = LlamaParse(
 
 
92
  result_type="markdown",
93
+ content_guideline_instruction="This is a resume, gather related facts together and format it as "
94
+ "bullet points with headers"
95
  ).load_data(ev.resume_file)
96
  # embed and index the documents
97
  index = VectorStoreIndex.from_documents(
98
  documents,
99
+ embed_model=global_embed_model
100
  )
101
  index.storage_context.persist(persist_dir=self.storage_dir)
102
 
103
+ # create a query engine
104
  self.query_engine = index.as_query_engine(llm=self.llm, similarity_top_k=5)
105
 
106
+ # you no longer need a query to be passed in,
107
+ # you'll be generating the queries instead
108
+ # let's pass the application form to a new step to parse it
109
  return ParseFormEvent(application_form=ev.application_form)
110
 
111
+ # new - separated the form parsing from the question generation
112
  @step
113
  async def parse_form(self, ctx: Context, ev: ParseFormEvent) -> GenerateQuestionsEvent:
114
  parser = LlamaParse(
 
 
115
  result_type="markdown",
116
+ content_guideline_instruction="This is a job application form. Create a list of all the fields "
117
+ "that need to be filled in.",
118
  formatting_instruction="Return a bulleted list of the fields ONLY."
119
  )
120
 
121
  # get the LLM to convert the parsed form into JSON
122
  result = parser.load_data(ev.application_form)[0]
123
  raw_json = self.llm.complete(
124
+ f"""
125
+ This is a parsed form.
126
+ Convert it into a JSON object containing only the list
127
+ of fields to be filled in, in the form {{ fields: [...] }}.
128
+ <form>{result.text}</form>.
129
+ Return JSON ONLY, no markdown.
130
+ """)
131
  fields = json.loads(raw_json.text)["fields"]
132
 
133
  await ctx.set("fields_to_fill", fields)
134
+ print("\n DEBUG: all fields written to Context >>>>>>>>>>>>>>>>>>>>>>>>>>\n")
135
 
136
  return GenerateQuestionsEvent()
137
 
138
+ # new - this step can get triggered either by GenerateQuestionsEvent or a FeedbackEvent
139
  @step
140
  async def generate_questions(self, ctx: Context, ev: GenerateQuestionsEvent | FeedbackEvent) -> QueryEvent:
141
 
142
  # get the list of fields to fill in
143
  fields = await ctx.get("fields_to_fill")
144
+ print("\n DEBUG:all fields Read from Context >>>>>>>>>>>>>>>>>>>>>>>>>>\n")
145
 
146
  # generate one query for each of the fields, and fire them off
147
  for field in fields:
148
  question = f"How would you answer this question about the candidate? <field>{field}</field>"
149
+ # Is there feedback? If so, add it to the query:
150
+ if hasattr(ev, "feedback"):
151
  question += f"""
152
+ \nWe previously got feedback about how we answered the questions.
153
+ It might not be relevant to this particular field, but here it is:
154
+ <feedback>{ev.feedback}</feedback>
155
+ """
156
+ print("\n question : ", question)
157
 
158
  ctx.send_event(QueryEvent(
159
  field=field,
160
  query=question
161
  ))
162
 
163
+ # store the number of fields, so we know how many to wait for later
164
  await ctx.set("total_fields", len(fields))
165
+ print(f"\n DEBUG: total fields from Context : {len(fields)}")
166
+
167
  return
168
 
169
  @step
170
  async def ask_question(self, ctx: Context, ev: QueryEvent) -> ResponseEvent:
171
+ response = self.query_engine.query(
172
+ f"This is a question about the specific resume we have in our database: {ev.query}")
 
 
 
 
173
  return ResponseEvent(field=ev.field, response=response.response)
174
 
175
+ # new - we now emit an InputRequiredEvent
176
  @step
177
  async def fill_in_application(self, ctx: Context, ev: ResponseEvent) -> InputRequiredEvent:
178
  # get the total number of fields to wait for
 
180
 
181
  responses = ctx.collect_events(ev, [ResponseEvent] * total_fields)
182
  if responses is None:
183
+ return None # do nothing if there's nothing to do yet
184
 
185
  # we've got all the responses!
186
  responseList = "\n".join("Field: " + r.field + "\n" + "Response: " + r.response for r in responses)
187
+ print("\n DEBUG: got all responses :\n")
188
 
189
  result = self.llm.complete(f"""
190
  You are given a list of fields in an application form and responses to
 
196
  </responses>
197
  """)
198
 
199
+ print("\n DEBUG: llm combined the fields and responses from resume")
200
+
201
+ # new! save the result for later
202
  await ctx.set("filled_form", str(result))
203
 
204
+ print("\n DEBUG: Write all form fields to context. Now will emit InputRequiredEvent")
205
+
206
+ # new! Let's get a human in the loop
207
  return InputRequiredEvent(
208
  prefix="How does this look? Give me any feedback you have on any of the answers.",
209
  result=result
210
  )
211
 
212
+ # new! Accept the feedback.
213
  @step
214
  async def get_feedback(self, ctx: Context, ev: HumanResponseEvent) -> FeedbackEvent | StopEvent:
215
 
 
232
  return FeedbackEvent(feedback=ev.response)
233
 
234
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
235
  def transcribe_speech(filepath):
236
  if filepath is None:
237
  gr.Warning("No audio found, please retry.")
 
 
 
 
 
 
 
 
238
 
239
+ model = whisper.load_model("base")
240
+ result = model.transcribe(filepath, fp16=False)
 
241
 
242
+ return result["text"]
243
 
 
 
 
 
244
 
245
+ # New! Transcription handler.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
246
  class TranscriptionHandler:
247
 
248
  # we create a queue to hold transcription values
 
277
  self.interface.launch(
278
  share=False,
279
  server_port=8000,
280
+ prevent_thread_lock=True
281
  )
282
 
283
  # we poll every 1.5 seconds waiting for something to end up in the queue
 
290
  await asyncio.sleep(1.5)
291
 
292
 
293
+ async def main():
294
+ w = RAGWorkflow(timeout=600, verbose=True)
295
+ handler = w.run(
296
+ resume_file="data/fake_resume.pdf",
297
+ application_form="data/fake_application_form.pdf"
298
+ )
299
 
300
+ print("DEBUG: Starting event stream...")
301
+ async for event in handler.stream_events():
302
+ print(f"DEBUG: Received event type {type(event).__name__}")
303
+ if isinstance(event, InputRequiredEvent):
304
+ print("We've filled in your form! Here are the results:\n")
305
+ print(event.result)
306
+
307
+ # Get transcription
308
+ transcription_handler = TranscriptionHandler()
309
+ response = await transcription_handler.get_transcription()
310
+
311
+ handler.ctx.send_event(
312
+ HumanResponseEvent(
313
+ response=response
314
+ )
315
+ )
316
+ else:
317
+ print("\n handler received event ", event)
318
 
319
+ response = await handler
320
+ print("Agent complete! Here's your final result:")
321
+ print(str(response))
322
 
323
+ # Display of the workflow
324
+ workflow_file = Path(__file__).parent / "workflows" / "form_parsing_workflow.html"
325
+ draw_all_possible_flows(w, filename=str(workflow_file))
326
+ html_content = extract_html_content(str(workflow_file))
327
+ display(HTML(html_content), metadata=dict(isolated=True))
328
 
 
 
 
 
329
 
330
+ if __name__ == "__main__":
331
+ asyncio.run(main())
 
 
 
 
 
 
 
 
 
 
 
 
 
app2.py DELETED
@@ -1,228 +0,0 @@
1
- #!/usr/bin/env python
2
- # coding: utf-8
3
-
4
- import warnings
5
- import os
6
- import json
7
- import asyncio
8
- from queue import Queue
9
- from dotenv import load_dotenv
10
- import gradio as gr
11
- from llama_cloud_services import LlamaParse
12
- from llama_index.llms.cohere import Cohere
13
- from llama_index.embeddings.huggingface import HuggingFaceEmbedding
14
- from llama_index.core import (
15
- VectorStoreIndex,
16
- StorageContext,
17
- load_index_from_storage
18
- )
19
- from llama_index.core.workflow import (
20
- StartEvent,
21
- StopEvent,
22
- Workflow,
23
- step,
24
- Event,
25
- Context,
26
- InputRequiredEvent,
27
- HumanResponseEvent
28
- )
29
- from llama_index.readers.whisper import WhisperReader
30
- import nest_asyncio
31
-
32
-
33
- # Load environment variables
34
- load_dotenv()
35
- CO_API_KEY = os.getenv("COHERE_API_KEY")
36
- llama_cloud_api_key = os.getenv("LLAMA_CLOUD_API_KEY")
37
- LLAMA_CLOUD_BASE_URL = os.getenv("LLAMA_CLOUD_BASE_URL")
38
- OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
39
-
40
- warnings.filterwarnings('ignore')
41
-
42
- nest_asyncio.apply()
43
-
44
- # Define Event Classes
45
- class ParseFormEvent(Event):
46
- application_form: str
47
-
48
- class QueryEvent(Event):
49
- query: str
50
-
51
- class ResponseEvent(Event):
52
- response: str
53
-
54
- class FeedbackEvent(Event):
55
- feedback: str
56
-
57
- class GenerateQuestionsEvent(Event):
58
- pass
59
-
60
- # Define Workflow
61
- class RAGWorkflow(Workflow):
62
- storage_dir = "./storage"
63
- llm: Cohere
64
- query_engine: VectorStoreIndex
65
-
66
- @step
67
- async def set_up(self, ctx: Context, ev: StartEvent) -> ParseFormEvent:
68
- if not ev.resume_file:
69
- raise ValueError("No resume file provided")
70
- if not ev.application_form:
71
- raise ValueError("No application form provided")
72
-
73
- self.llm = Cohere(api_key=CO_API_KEY, model="command-r-plus")
74
-
75
- if os.path.exists(self.storage_dir):
76
- storage_context = StorageContext.from_defaults(persist_dir=self.storage_dir)
77
- index = load_index_from_storage(storage_context)
78
- else:
79
- documents = LlamaParse(
80
- api_key=llama_cloud_api_key,
81
- base_url=LLAMA_CLOUD_BASE_URL,
82
- result_type="markdown",
83
- content_guideline_instruction="This is a resume, gather related facts together and format it as bullet points with headers"
84
- ).load_data(ev.resume_file)
85
-
86
- index = VectorStoreIndex.from_documents(
87
- documents,
88
- embed_model=HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
89
- )
90
- index.storage_context.persist(persist_dir=self.storage_dir)
91
-
92
- self.query_engine = index.as_query_engine(llm=self.llm, similarity_top_k=5)
93
- return ParseFormEvent(application_form=ev.application_form)
94
-
95
- @step
96
- async def parse_form(self, ctx: Context, ev: ParseFormEvent) -> GenerateQuestionsEvent:
97
- parser = LlamaParse(
98
- api_key=llama_cloud_api_key,
99
- base_url=LLAMA_CLOUD_BASE_URL,
100
- result_type="markdown",
101
- content_guideline_instruction="This is a job application form. Create a list of all the fields that need to be filled in.",
102
- formatting_instruction="Return a bulleted list of the fields ONLY."
103
- )
104
-
105
- result = parser.load_data(ev.application_form)[0]
106
- raw_json = self.llm.complete(
107
- f"This is a parsed form. Convert it into a JSON object containing only the list of fields to be filled in, in the form {{ fields: [...] }}. <form>{result.text}</form>. Return JSON ONLY, no markdown."
108
- )
109
- fields = json.loads(raw_json.text)["fields"]
110
-
111
- await ctx.set("fields_to_fill", fields)
112
- return GenerateQuestionsEvent()
113
-
114
- @step
115
- async def generate_questions(self, ctx: Context, ev: GenerateQuestionsEvent | FeedbackEvent) -> QueryEvent:
116
- fields = await ctx.get("fields_to_fill")
117
-
118
- for field in fields:
119
- question = f"How would you answer this question about the candidate? <field>{field}</field>"
120
-
121
- if hasattr(ev, "feedback"):
122
- question += f"\nPrevious feedback: <feedback>{ev.feedback}</feedback>"
123
-
124
- ctx.send_event(QueryEvent(field=field, query=question))
125
-
126
- await ctx.set("total_fields", len(fields))
127
- return
128
-
129
- @step
130
- async def ask_question(self, ctx: Context, ev: QueryEvent) -> ResponseEvent:
131
- response = self.query_engine.query(
132
- f"This is a question about the specific resume we have in our database: {ev.query}"
133
- )
134
- return ResponseEvent(field=ev.field, response=response.response)
135
-
136
- @step
137
- async def fill_in_application(self, ctx: Context, ev: ResponseEvent) -> InputRequiredEvent:
138
- total_fields = await ctx.get("total_fields")
139
- responses = ctx.collect_events(ev, [ResponseEvent] * total_fields)
140
-
141
- if responses is None:
142
- return None
143
-
144
- responseList = "\n".join(f"Field: {r.field}\nResponse: {r.response}" for r in responses)
145
- result = self.llm.complete(
146
- f"You are given a list of fields in an application form and responses to questions about those fields from a resume. Combine the two into a list of fields and succinct, factual answers.\n<responses>{responseList}</responses>"
147
- )
148
-
149
- await ctx.set("filled_form", str(result))
150
-
151
- return InputRequiredEvent(
152
- prefix="How does this look? Provide feedback.",
153
- result=result
154
- )
155
-
156
- @step
157
- async def get_feedback(self, ctx: Context, ev: HumanResponseEvent) -> FeedbackEvent | StopEvent:
158
- result = self.llm.complete(
159
- f"You have received feedback on the form-filling task.\n<feedback>{ev.response}</feedback>\nIf everything is fine, respond with 'OKAY'. Otherwise, respond with 'FEEDBACK'."
160
- )
161
-
162
- verdict = result.text.strip()
163
- return StopEvent(result=await ctx.get("filled_form")) if verdict == "OKAY" else FeedbackEvent(feedback=ev.response)
164
-
165
-
166
- # Transcription Handler
167
- class TranscriptionHandler:
168
- def __init__(self):
169
- self.transcription_queue = Queue()
170
- self.interface = None
171
-
172
- def store_transcription(self, output):
173
- self.transcription_queue.put(output)
174
- return output
175
-
176
- def create_interface(self):
177
- mic_transcribe = gr.Interface(
178
- fn=lambda x: self.store_transcription(transcribe_speech(x)),
179
- inputs=gr.Audio(sources=["microphone"], type="filepath"),
180
- outputs=gr.Textbox(label="Transcription")
181
- )
182
- self.interface = gr.Blocks()
183
- with self.interface:
184
- gr.TabbedInterface([mic_transcribe], ["Transcribe Microphone"])
185
- return self.interface
186
-
187
- async def get_transcription(self):
188
- self.interface = self.create_interface()
189
- self.interface.launch(share=False, server_port=8000, inbrowser=True)
190
-
191
- while True:
192
- if not self.transcription_queue.empty():
193
- result = self.transcription_queue.get()
194
- self.interface.close()
195
- return result
196
- await asyncio.sleep(1.5)
197
-
198
-
199
- # Transcription function
200
- def transcribe_speech(filepath):
201
- if not filepath:
202
- gr.Warning("No audio found, please retry.")
203
- reader = WhisperReader(model="whisper-1", api_key=OPENAI_API_KEY)
204
- documents = reader.load_data(filepath)
205
- return documents[0].text
206
-
207
-
208
- # Async Wrapper
209
- async def main():
210
- w = RAGWorkflow(timeout=600, verbose=False)
211
-
212
- handler = w.run(
213
- resume_file="./data/fake_resume.pdf",
214
- application_form="./data/fake_application_form.pdf"
215
- )
216
-
217
- async for event in handler.stream_events():
218
- if isinstance(event, InputRequiredEvent):
219
- transcription_handler = TranscriptionHandler()
220
- response = await transcription_handler.get_transcription()
221
- handler.ctx.send_event(HumanResponseEvent(response=response))
222
-
223
- response = await handler
224
- print("Agent complete! Here's your final result:")
225
- print(str(response))
226
-
227
- if __name__ == "__main__":
228
- asyncio.run(main())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
requirements.txt CHANGED
@@ -15,6 +15,7 @@ llama-index-readers-llama-parse
15
  llama-index-utils-workflow
16
  #openai-whisper ==20240930
17
  #llama-index-readers-whisper
 
18
  pydantic
19
  pydantic_core
20
  dotenv
 
15
  llama-index-utils-workflow
16
  #openai-whisper ==20240930
17
  #llama-index-readers-whisper
18
+ IPython
19
  pydantic
20
  pydantic_core
21
  dotenv
test_audio.py CHANGED
@@ -1,13 +1,9 @@
1
- from llama_index.readers.whisper import WhisperReader
2
- from faster_whisper import WhisperModel
3
  import gradio as gr
4
  from pathlib import Path
5
  from dotenv import load_dotenv
6
  import os
7
 
8
- load_dotenv()
9
- openai_api_key = os.getenv("OPENAI_API_KEY")
10
-
11
  transcription_value = ""
12
 
13
 
@@ -15,10 +11,10 @@ def transcribe_speech(filepath):
15
  if filepath is None:
16
  gr.Warning("No audio found, please retry.")
17
 
18
- model = WhisperModel("base", compute_type="float32")
19
- segments, _ = model.transcribe(filepath)
20
 
21
- return " ".join(segment.text for segment in segments)
22
 
23
 
24
  def store_transcription(output):
 
1
+ import whisper
 
2
  import gradio as gr
3
  from pathlib import Path
4
  from dotenv import load_dotenv
5
  import os
6
 
 
 
 
7
  transcription_value = ""
8
 
9
 
 
11
  if filepath is None:
12
  gr.Warning("No audio found, please retry.")
13
 
14
+ model = whisper.load_model("base")
15
+ result = model.transcribe(filepath, fp16=False)
16
 
17
+ return result["text"]
18
 
19
 
20
  def store_transcription(output):