Spaces:

nader01
/

RAG01

Build error

App Files Files Community

NaderAfshar commited on Apr 10, 2025

Commit

c2cddf6

1 Parent(s): d1cf1d1

Updated app.py with functioning code. Voice input is also fixed.

Browse files

Files changed (4) hide show

app.py +118 -186
app2.py +0 -228
requirements.txt +1 -0
test_audio.py +4 -8

app.py CHANGED Viewed

@@ -1,178 +1,178 @@
-#!/usr/bin/env python
-# coding: utf-8
-# # Lesson 6: Use your voice
-# **Lesson objective**: Get voice feedback
-#
-# So far we've set up a moderately complex workflows with a human feedback loop. Let's run it through the visualizer to see what it looks like.
-# <div style="background-color:#fff1d7; padding:15px;"> <b> Note</b>: Make sure to run the notebook cell by cell. Please try to avoid running all cells at once.</div>
-# In[1]:
-# Warning control
-import warnings
-import os, json
-from llama_cloud_services import LlamaParse
-from llama_index.llms.cohere import Cohere
 from llama_index.embeddings.huggingface import HuggingFaceEmbedding
 from llama_index.core import (
     VectorStoreIndex,
     StorageContext,
     load_index_from_storage
 )
 from llama_index.core.workflow import (
     StartEvent,
     StopEvent,
     Workflow,
     step,
     Event,
-    Context,
-    InputRequiredEvent,
-    HumanResponseEvent
 )
-from llama_index.utils.workflow import draw_all_possible_flows
-#import whisper
-from llama_index.readers.whisper import WhisperReader
-import gradio as gr
-import asyncio
-import nest_asyncio
 from queue import Queue
 from dotenv import load_dotenv
-# Load environment variables
 load_dotenv()
-CO_API_KEY = os.getenv("COHERE_API_KEY")
 llama_cloud_api_key = os.getenv("LLAMA_CLOUD_API_KEY")
 LLAMA_CLOUD_BASE_URL = os.getenv("LLAMA_CLOUD_BASE_URL")
-OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
-warnings.filterwarnings('ignore')
-nest_asyncio.apply()  # Accommodate nested events.
-transcription_value = None
 class ParseFormEvent(Event):
     application_form: str
 class QueryEvent(Event):
     query: str
 class ResponseEvent(Event):
     response: str
 class FeedbackEvent(Event):
     feedback: str
 class GenerateQuestionsEvent(Event):
     pass
 class RAGWorkflow(Workflow):
     storage_dir = "./storage"
-    llm: Cohere
     query_engine: VectorStoreIndex
     @step
     async def set_up(self, ctx: Context, ev: StartEvent) -> ParseFormEvent:
         if not ev.resume_file:
             raise ValueError("No resume file provided")
         if not ev.application_form:
             raise ValueError("No application form provided")
-        # give ourselves an LLM to work with
-        self.llm = Cohere(api_key=CO_API_KEY, model="command-r-plus")
-        # ingest our data and set up the query engine
         if os.path.exists(self.storage_dir):
-            # we've already ingested our documents
             storage_context = StorageContext.from_defaults(persist_dir=self.storage_dir)
             index = load_index_from_storage(storage_context)
         else:
-            # we need to parse and load our documents
             documents = LlamaParse(
-                api_key=llama_cloud_api_key,
-                base_url=os.getenv("LLAMA_CLOUD_BASE_URL"),
                 result_type="markdown",
-                content_guideline_instruction="This is a resume, gather related facts together and format it as bullet points with headers"
             ).load_data(ev.resume_file)
             # embed and index the documents
             index = VectorStoreIndex.from_documents(
                 documents,
-                embed_model=HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
             )
             index.storage_context.persist(persist_dir=self.storage_dir)
-        # either way, create a query engine
         self.query_engine = index.as_query_engine(llm=self.llm, similarity_top_k=5)
-        # let's pass our application form to a new step where we parse it
         return ParseFormEvent(application_form=ev.application_form)
-    # we've separated the form parsing from the question generation
     @step
     async def parse_form(self, ctx: Context, ev: ParseFormEvent) -> GenerateQuestionsEvent:
         parser = LlamaParse(
-            api_key=llama_cloud_api_key,
-            base_url=os.getenv("LLAMA_CLOUD_BASE_URL"),
             result_type="markdown",
-            content_guideline_instruction="This is a job application form. Create a list of all the fields that need to be filled in.",
             formatting_instruction="Return a bulleted list of the fields ONLY."
         )
         # get the LLM to convert the parsed form into JSON
         result = parser.load_data(ev.application_form)[0]
         raw_json = self.llm.complete(
-            f"This is a parsed form. Convert it into a JSON object containing only the list of fields to be filled in, in the form {{ fields: [...] }}. <form>{result.text}</form>. Return JSON ONLY, no markdown.")
         fields = json.loads(raw_json.text)["fields"]
         await ctx.set("fields_to_fill", fields)
         return GenerateQuestionsEvent()
-    # this step can get triggered either by GenerateQuestionsEvent or a FeedbackEvent
     @step
     async def generate_questions(self, ctx: Context, ev: GenerateQuestionsEvent | FeedbackEvent) -> QueryEvent:
         # get the list of fields to fill in
         fields = await ctx.get("fields_to_fill")
         # generate one query for each of the fields, and fire them off
         for field in fields:
             question = f"How would you answer this question about the candidate? <field>{field}</field>"
-            if hasattr(ev,"feedback"):
                 question += f"""
-                    \nWe previously got feedback about how we answered the questions.
-                    It might not be relevant to this particular field, but here it is:
-                    <feedback>{ev.feedback}</feedback>
-                """
             ctx.send_event(QueryEvent(
                 field=field,
                 query=question
             ))
-        # store the number of fields so we know how many to wait for later
         await ctx.set("total_fields", len(fields))
         return
     @step
     async def ask_question(self, ctx: Context, ev: QueryEvent) -> ResponseEvent:
-        print(f"Asking question: {ev.query}")
-        response = self.query_engine.query(f"This is a question about the specific resume we have in our database: {ev.query}")
-        print(f"Answer was: {str(response)}")
         return ResponseEvent(field=ev.field, response=response.response)
-    # we now emit an InputRequiredEvent
     @step
     async def fill_in_application(self, ctx: Context, ev: ResponseEvent) -> InputRequiredEvent:
         # get the total number of fields to wait for
@@ -180,10 +180,11 @@ class RAGWorkflow(Workflow):
         responses = ctx.collect_events(ev, [ResponseEvent] * total_fields)
         if responses is None:
-            return None # do nothing if there's nothing to do yet
         # we've got all the responses!
         responseList = "\n".join("Field: " + r.field + "\n" + "Response: " + r.response for r in responses)
         result = self.llm.complete(f"""
             You are given a list of fields in an application form and responses to
@@ -195,16 +196,20 @@ class RAGWorkflow(Workflow):
             </responses>
         """)
-        # save the result for later
         await ctx.set("filled_form", str(result))
-        # Let's get a human in the loop
         return InputRequiredEvent(
             prefix="How does this look? Give me any feedback you have on any of the answers.",
             result=result
         )
-    # Accept the feedback.
     @step
     async def get_feedback(self, ctx: Context, ev: HumanResponseEvent) -> FeedbackEvent | StopEvent:
@@ -227,103 +232,17 @@ class RAGWorkflow(Workflow):
             return FeedbackEvent(feedback=ev.response)
-WORKFLOW_FILE = "workflows/RAG-EventDriven.html"
-draw_all_possible_flows(RAGWorkflow, filename=WORKFLOW_FILE)
-from IPython.display import display, HTML, DisplayHandle
-from helper import extract_html_content
-html_content = extract_html_content(WORKFLOW_FILE)
-display(HTML(html_content), metadata=dict(isolated=True))
-# Cool! You can see the path all the way to the end and the feedback loop is clear.
-# <p style="background-color:#f7fff8; padding:15px; border-width:3px; border-color:#e0f0e0; border-style:solid; border-radius:6px"> 🚨
-# &nbsp; <b>Different Run Results:</b> The output generated by AI chat models can vary with each execution due to their dynamic, probabilistic nature. Don't be surprised if your results differ from those shown in the video.</p>
-# ## Getting voice feedback
-# Now, just for fun, you'll do one more thing: change the feedback from text feedback to actual words spoken out loud. To do this we'll use a different model from OpenAI called Whisper. LlamaIndex has a built-in way to transcribe audio files into text using Whisper.
-#
-# Here's a function that takes a file and uses Whisper to return just the text:
 def transcribe_speech(filepath):
     if filepath is None:
         gr.Warning("No audio found, please retry.")
-    audio_file= open(filepath, "rb")
-    reader = WhisperReader(
-        model="whisper-1",
-        api_key=OPENAI_API_KEY,
-    )
-    documents = reader.load_data(filepath)
-    return documents[0].text
-# But before we can use it, you need to capture some audio from your microphone. That involves some extra steps!
-#
-# First, create a callback function that saves data to a global variable.
-def store_transcription(output):
-    global transcription_value
-    transcription_value = output
-    return output
-# Now use Gradio, which has special widgets that can render inside a notebook, to create an interface
-# for capturing audio from a microphone. When the audio is captured, it calls `transcribe_speech` on the recorded data,
-# and calls `store_transcription` on that.
-mic_transcribe = gr.Interface(
-    fn=lambda x: store_transcription(transcribe_speech(x)),
-    inputs=gr.Audio(sources=["microphone"],
-                    type="filepath"),
-    outputs=gr.Textbox(label="Transcription"))
-# In Gradio, define a visual interface containing this microphone input and output, and then launch it:
-# Make sure to wait for the gradio interface to load. A popup window will appear and ask you to allow the use of your
-# microphone. To record audio, make sure to click on record -> stop -> submit. Make sure the audio is captured
-# before clicking on 'submit'.
-test_interface = gr.Blocks()
-with test_interface:
-    gr.TabbedInterface(
-        [mic_transcribe],
-        ["Transcribe Microphone"]
-    )
-test_interface.launch(
-    share=True,
-    show_error=True,
-    server_port=8000,
-    prevent_thread_lock=True
-)
-# You can now print out the transcription, which is stored in that global variable you created earlier:
-print(transcription_value)
-# run Gradio again, so it's a good idea to shut down the running Gradio interface.
-test_interface.close()
-# Make sure to run the previous cell to close the Gradio interface before running the next cell
-# Now create an entirely new class, a Transcription Handler.
 class TranscriptionHandler:
     # we create a queue to hold transcription values
@@ -358,7 +277,7 @@ class TranscriptionHandler:
         self.interface.launch(
             share=False,
             server_port=8000,
-            inbrowser=True   # open in a browser
         )
         # we poll every 1.5 seconds waiting for something to end up in the queue
@@ -371,29 +290,42 @@ class TranscriptionHandler:
             await asyncio.sleep(1.5)
-# Now you have a transcription handler, you can use it instead of the keyboard input interface when you're getting human input when you run your workflows:
-w = RAGWorkflow(timeout=600, verbose=False)
-handler = w.run(
-    resume_file="./data/fake_resume.pdf",
-    application_form="./data/fake_application_form.pdf"
-)
-async for event in handler.stream_events():
-  if isinstance(event, InputRequiredEvent):
-      # Get transcription
-      transcription_handler = TranscriptionHandler()
-      response = await transcription_handler.get_transcription()
-      handler.ctx.send_event(
-          HumanResponseEvent(
-              response=response
-          )
-      )
-response = await handler
-print("Agent complete! Here's your final result:")
-print(str(response))

+from helper import extract_html_content
+from IPython.display import display, HTML
+from llama_index.utils.workflow import draw_all_possible_flows
+from llama_index.core.tools import FunctionTool
+from llama_index.core.agent import FunctionCallingAgent
+from llama_index.core import Settings
+from llama_parse import LlamaParse
+from llama_index.llms.groq import Groq
 from llama_index.embeddings.huggingface import HuggingFaceEmbedding
 from llama_index.core import (
     VectorStoreIndex,
     StorageContext,
     load_index_from_storage
 )
+import nest_asyncio
+from llama_index.core.workflow import InputRequiredEvent, HumanResponseEvent
 from llama_index.core.workflow import (
     StartEvent,
     StopEvent,
     Workflow,
     step,
     Event,
+    Context
 )
+from pathlib import Path
 from queue import Queue
+import gradio as gr
+import whisper
 from dotenv import load_dotenv
+import os, json
+import asyncio
+storage_dir = "./storage"
+application_file = "./data/fake_application_form.pdf"
+nest_asyncio.apply()
 load_dotenv()
 llama_cloud_api_key = os.getenv("LLAMA_CLOUD_API_KEY")
+GROQ_API_KEY = os.getenv("GROQ_API_KEY")
 LLAMA_CLOUD_BASE_URL = os.getenv("LLAMA_CLOUD_BASE_URL")
+global_llm = Groq(api_key=GROQ_API_KEY, model="llama3-70b-8192")
+global_embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
+Settings.embed_model = global_embed_model
 class ParseFormEvent(Event):
     application_form: str
 class QueryEvent(Event):
     query: str
+    field: str
 class ResponseEvent(Event):
     response: str
+# new!
 class FeedbackEvent(Event):
     feedback: str
 class GenerateQuestionsEvent(Event):
     pass
 class RAGWorkflow(Workflow):
     storage_dir = "./storage"
+    llm: Groq
     query_engine: VectorStoreIndex
     @step
     async def set_up(self, ctx: Context, ev: StartEvent) -> ParseFormEvent:
+        self.llm = global_llm
+        self.storage_dir = storage_dir
         if not ev.resume_file:
             raise ValueError("No resume file provided")
         if not ev.application_form:
             raise ValueError("No application form provided")
+        # ingest the data and set up the query engine
         if os.path.exists(self.storage_dir):
+            # you've already ingested the resume document
             storage_context = StorageContext.from_defaults(persist_dir=self.storage_dir)
             index = load_index_from_storage(storage_context)
         else:
+            # parse and load the resume document
             documents = LlamaParse(
                 result_type="markdown",
+                content_guideline_instruction="This is a resume, gather related facts together and format it as "
+                                              "bullet points with headers"
             ).load_data(ev.resume_file)
             # embed and index the documents
             index = VectorStoreIndex.from_documents(
                 documents,
+                embed_model=global_embed_model
             )
             index.storage_context.persist(persist_dir=self.storage_dir)
+        # create a query engine
         self.query_engine = index.as_query_engine(llm=self.llm, similarity_top_k=5)
+        # you no longer need a query to be passed in,
+        # you'll be generating the queries instead
+        # let's pass the application form to a new step to parse it
         return ParseFormEvent(application_form=ev.application_form)
+    # new - separated the form parsing from the question generation
     @step
     async def parse_form(self, ctx: Context, ev: ParseFormEvent) -> GenerateQuestionsEvent:
         parser = LlamaParse(
             result_type="markdown",
+            content_guideline_instruction="This is a job application form. Create a list of all the fields "
+                                          "that need to be filled in.",
             formatting_instruction="Return a bulleted list of the fields ONLY."
         )
         # get the LLM to convert the parsed form into JSON
         result = parser.load_data(ev.application_form)[0]
         raw_json = self.llm.complete(
+            f"""
+            This is a parsed form.
+            Convert it into a JSON object containing only the list
+            of fields to be filled in, in the form {{ fields: [...] }}.
+            <form>{result.text}</form>.
+            Return JSON ONLY, no markdown.
+            """)
         fields = json.loads(raw_json.text)["fields"]
         await ctx.set("fields_to_fill", fields)
+        print("\n DEBUG: all fields written to Context >>>>>>>>>>>>>>>>>>>>>>>>>>\n")
         return GenerateQuestionsEvent()
+    # new - this step can get triggered either by GenerateQuestionsEvent or a FeedbackEvent
     @step
     async def generate_questions(self, ctx: Context, ev: GenerateQuestionsEvent | FeedbackEvent) -> QueryEvent:
         # get the list of fields to fill in
         fields = await ctx.get("fields_to_fill")
+        print("\n DEBUG:all fields Read from Context >>>>>>>>>>>>>>>>>>>>>>>>>>\n")
         # generate one query for each of the fields, and fire them off
         for field in fields:
             question = f"How would you answer this question about the candidate? <field>{field}</field>"
+            # Is there feedback? If so, add it to the query:
+            if hasattr(ev, "feedback"):
                 question += f"""
+                                \nWe previously got feedback about how we answered the questions.
+                                It might not be relevant to this particular field, but here it is:
+                                <feedback>{ev.feedback}</feedback>
+                            """
+                print("\n question : ", question)
             ctx.send_event(QueryEvent(
                 field=field,
                 query=question
             ))
+        # store the number of fields, so we know how many to wait for later
         await ctx.set("total_fields", len(fields))
+        print(f"\n DEBUG: total fields from Context : {len(fields)}")
         return
     @step
     async def ask_question(self, ctx: Context, ev: QueryEvent) -> ResponseEvent:
+        response = self.query_engine.query(
+            f"This is a question about the specific resume we have in our database: {ev.query}")
         return ResponseEvent(field=ev.field, response=response.response)
+    # new - we now emit an InputRequiredEvent
     @step
     async def fill_in_application(self, ctx: Context, ev: ResponseEvent) -> InputRequiredEvent:
         # get the total number of fields to wait for
         responses = ctx.collect_events(ev, [ResponseEvent] * total_fields)
         if responses is None:
+            return None  # do nothing if there's nothing to do yet
         # we've got all the responses!
         responseList = "\n".join("Field: " + r.field + "\n" + "Response: " + r.response for r in responses)
+        print("\n DEBUG: got all responses :\n")
         result = self.llm.complete(f"""
             You are given a list of fields in an application form and responses to
             </responses>
         """)
+        print("\n DEBUG: llm combined the fields and responses from resume")
+        # new! save the result for later
         await ctx.set("filled_form", str(result))
+        print("\n DEBUG: Write all form fields to context. Now will emit InputRequiredEvent")
+        # new! Let's get a human in the loop
         return InputRequiredEvent(
             prefix="How does this look? Give me any feedback you have on any of the answers.",
             result=result
         )
+    # new! Accept the feedback.
     @step
     async def get_feedback(self, ctx: Context, ev: HumanResponseEvent) -> FeedbackEvent | StopEvent:
             return FeedbackEvent(feedback=ev.response)
 def transcribe_speech(filepath):
     if filepath is None:
         gr.Warning("No audio found, please retry.")
+    model = whisper.load_model("base")
+    result = model.transcribe(filepath, fp16=False)
+    return result["text"]
+# New! Transcription handler.
 class TranscriptionHandler:
     # we create a queue to hold transcription values
         self.interface.launch(
             share=False,
             server_port=8000,
+            prevent_thread_lock=True
         )
         # we poll every 1.5 seconds waiting for something to end up in the queue
             await asyncio.sleep(1.5)
+async def main():
+    w = RAGWorkflow(timeout=600, verbose=True)
+    handler = w.run(
+        resume_file="data/fake_resume.pdf",
+        application_form="data/fake_application_form.pdf"
+    )
+    print("DEBUG: Starting event stream...")
+    async for event in handler.stream_events():
+        print(f"DEBUG: Received event type {type(event).__name__}")
+        if isinstance(event, InputRequiredEvent):
+            print("We've filled in your form! Here are the results:\n")
+            print(event.result)
+            # Get transcription
+            transcription_handler = TranscriptionHandler()
+            response = await transcription_handler.get_transcription()
+            handler.ctx.send_event(
+                HumanResponseEvent(
+                    response=response
+                )
+            )
+        else:
+            print("\n handler received event ", event)
+    response = await handler
+    print("Agent complete! Here's your final result:")
+    print(str(response))
+    # Display of the workflow
+    workflow_file = Path(__file__).parent / "workflows" / "form_parsing_workflow.html"
+    draw_all_possible_flows(w, filename=str(workflow_file))
+    html_content = extract_html_content(str(workflow_file))
+    display(HTML(html_content), metadata=dict(isolated=True))
+if __name__ == "__main__":
+    asyncio.run(main())

app2.py DELETED Viewed

@@ -1,228 +0,0 @@
-#!/usr/bin/env python
-# coding: utf-8
-import warnings
-import os
-import json
-import asyncio
-from queue import Queue
-from dotenv import load_dotenv
-import gradio as gr
-from llama_cloud_services import LlamaParse
-from llama_index.llms.cohere import Cohere
-from llama_index.embeddings.huggingface import HuggingFaceEmbedding
-from llama_index.core import (
-    VectorStoreIndex,
-    StorageContext,
-    load_index_from_storage
-)
-from llama_index.core.workflow import (
-    StartEvent,
-    StopEvent,
-    Workflow,
-    step,
-    Event,
-    Context,
-    InputRequiredEvent,
-    HumanResponseEvent
-)
-from llama_index.readers.whisper import WhisperReader
-import nest_asyncio
-# Load environment variables
-load_dotenv()
-CO_API_KEY = os.getenv("COHERE_API_KEY")
-llama_cloud_api_key = os.getenv("LLAMA_CLOUD_API_KEY")
-LLAMA_CLOUD_BASE_URL = os.getenv("LLAMA_CLOUD_BASE_URL")
-OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
-warnings.filterwarnings('ignore')
-nest_asyncio.apply()
-# Define Event Classes
-class ParseFormEvent(Event):
-    application_form: str
-class QueryEvent(Event):
-    query: str
-class ResponseEvent(Event):
-    response: str
-class FeedbackEvent(Event):
-    feedback: str
-class GenerateQuestionsEvent(Event):
-    pass
-# Define Workflow
-class RAGWorkflow(Workflow):
-    storage_dir = "./storage"
-    llm: Cohere
-    query_engine: VectorStoreIndex
-    @step
-    async def set_up(self, ctx: Context, ev: StartEvent) -> ParseFormEvent:
-        if not ev.resume_file:
-            raise ValueError("No resume file provided")
-        if not ev.application_form:
-            raise ValueError("No application form provided")
-        self.llm = Cohere(api_key=CO_API_KEY, model="command-r-plus")
-        if os.path.exists(self.storage_dir):
-            storage_context = StorageContext.from_defaults(persist_dir=self.storage_dir)
-            index = load_index_from_storage(storage_context)
-        else:
-            documents = LlamaParse(
-                api_key=llama_cloud_api_key,
-                base_url=LLAMA_CLOUD_BASE_URL,
-                result_type="markdown",
-                content_guideline_instruction="This is a resume, gather related facts together and format it as bullet points with headers"
-            ).load_data(ev.resume_file)
-            index = VectorStoreIndex.from_documents(
-                documents,
-                embed_model=HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
-            )
-            index.storage_context.persist(persist_dir=self.storage_dir)
-        self.query_engine = index.as_query_engine(llm=self.llm, similarity_top_k=5)
-        return ParseFormEvent(application_form=ev.application_form)
-    @step
-    async def parse_form(self, ctx: Context, ev: ParseFormEvent) -> GenerateQuestionsEvent:
-        parser = LlamaParse(
-            api_key=llama_cloud_api_key,
-            base_url=LLAMA_CLOUD_BASE_URL,
-            result_type="markdown",
-            content_guideline_instruction="This is a job application form. Create a list of all the fields that need to be filled in.",
-            formatting_instruction="Return a bulleted list of the fields ONLY."
-        )
-        result = parser.load_data(ev.application_form)[0]
-        raw_json = self.llm.complete(
-            f"This is a parsed form. Convert it into a JSON object containing only the list of fields to be filled in, in the form {{ fields: [...] }}. <form>{result.text}</form>. Return JSON ONLY, no markdown."
-        )
-        fields = json.loads(raw_json.text)["fields"]
-        await ctx.set("fields_to_fill", fields)
-        return GenerateQuestionsEvent()
-    @step
-    async def generate_questions(self, ctx: Context, ev: GenerateQuestionsEvent | FeedbackEvent) -> QueryEvent:
-        fields = await ctx.get("fields_to_fill")
-        for field in fields:
-            question = f"How would you answer this question about the candidate? <field>{field}</field>"
-            if hasattr(ev, "feedback"):
-                question += f"\nPrevious feedback: <feedback>{ev.feedback}</feedback>"
-            ctx.send_event(QueryEvent(field=field, query=question))
-        await ctx.set("total_fields", len(fields))
-        return
-    @step
-    async def ask_question(self, ctx: Context, ev: QueryEvent) -> ResponseEvent:
-        response = self.query_engine.query(
-            f"This is a question about the specific resume we have in our database: {ev.query}"
-        )
-        return ResponseEvent(field=ev.field, response=response.response)
-    @step
-    async def fill_in_application(self, ctx: Context, ev: ResponseEvent) -> InputRequiredEvent:
-        total_fields = await ctx.get("total_fields")
-        responses = ctx.collect_events(ev, [ResponseEvent] * total_fields)
-        if responses is None:
-            return None
-        responseList = "\n".join(f"Field: {r.field}\nResponse: {r.response}" for r in responses)
-        result = self.llm.complete(
-            f"You are given a list of fields in an application form and responses to questions about those fields from a resume. Combine the two into a list of fields and succinct, factual answers.\n<responses>{responseList}</responses>"
-        )
-        await ctx.set("filled_form", str(result))
-        return InputRequiredEvent(
-            prefix="How does this look? Provide feedback.",
-            result=result
-        )
-    @step
-    async def get_feedback(self, ctx: Context, ev: HumanResponseEvent) -> FeedbackEvent | StopEvent:
-        result = self.llm.complete(
-            f"You have received feedback on the form-filling task.\n<feedback>{ev.response}</feedback>\nIf everything is fine, respond with 'OKAY'. Otherwise, respond with 'FEEDBACK'."
-        )
-        verdict = result.text.strip()
-        return StopEvent(result=await ctx.get("filled_form")) if verdict == "OKAY" else FeedbackEvent(feedback=ev.response)
-# Transcription Handler
-class TranscriptionHandler:
-    def __init__(self):
-        self.transcription_queue = Queue()
-        self.interface = None
-    def store_transcription(self, output):
-        self.transcription_queue.put(output)
-        return output
-    def create_interface(self):
-        mic_transcribe = gr.Interface(
-            fn=lambda x: self.store_transcription(transcribe_speech(x)),
-            inputs=gr.Audio(sources=["microphone"], type="filepath"),
-            outputs=gr.Textbox(label="Transcription")
-        )
-        self.interface = gr.Blocks()
-        with self.interface:
-            gr.TabbedInterface([mic_transcribe], ["Transcribe Microphone"])
-        return self.interface
-    async def get_transcription(self):
-        self.interface = self.create_interface()
-        self.interface.launch(share=False, server_port=8000, inbrowser=True)
-        while True:
-            if not self.transcription_queue.empty():
-                result = self.transcription_queue.get()
-                self.interface.close()
-                return result
-            await asyncio.sleep(1.5)
-# Transcription function
-def transcribe_speech(filepath):
-    if not filepath:
-        gr.Warning("No audio found, please retry.")
-    reader = WhisperReader(model="whisper-1", api_key=OPENAI_API_KEY)
-    documents = reader.load_data(filepath)
-    return documents[0].text
-# Async Wrapper
-async def main():
-    w = RAGWorkflow(timeout=600, verbose=False)
-    handler = w.run(
-        resume_file="./data/fake_resume.pdf",
-        application_form="./data/fake_application_form.pdf"
-    )
-    async for event in handler.stream_events():
-        if isinstance(event, InputRequiredEvent):
-            transcription_handler = TranscriptionHandler()
-            response = await transcription_handler.get_transcription()
-            handler.ctx.send_event(HumanResponseEvent(response=response))
-    response = await handler
-    print("Agent complete! Here's your final result:")
-    print(str(response))
-if __name__ == "__main__":
-    asyncio.run(main())

requirements.txt CHANGED Viewed

@@ -15,6 +15,7 @@ llama-index-readers-llama-parse
 llama-index-utils-workflow
 #openai-whisper                          ==20240930
 #llama-index-readers-whisper
 pydantic
 pydantic_core
 dotenv

 llama-index-utils-workflow
 #openai-whisper                          ==20240930
 #llama-index-readers-whisper
+IPython
 pydantic
 pydantic_core
 dotenv

test_audio.py CHANGED Viewed

@@ -1,13 +1,9 @@
-from llama_index.readers.whisper import WhisperReader
-from faster_whisper import WhisperModel
 import gradio as gr
 from pathlib import Path
 from dotenv import load_dotenv
 import os
-load_dotenv()
-openai_api_key = os.getenv("OPENAI_API_KEY")
 transcription_value = ""
@@ -15,10 +11,10 @@ def transcribe_speech(filepath):
     if filepath is None:
         gr.Warning("No audio found, please retry.")
-    model = WhisperModel("base", compute_type="float32")
-    segments, _ = model.transcribe(filepath)
-    return " ".join(segment.text for segment in segments)
 def store_transcription(output):

+import whisper
 import gradio as gr
 from pathlib import Path
 from dotenv import load_dotenv
 import os
 transcription_value = ""
     if filepath is None:
         gr.Warning("No audio found, please retry.")
+    model = whisper.load_model("base")
+    result = model.transcribe(filepath, fp16=False)
+    return result["text"]
 def store_transcription(output):