NaderAfshar commited on
Commit ·
c2cddf6
1
Parent(s): d1cf1d1
Updated app.py with functioning code. Voice input is also fixed.
Browse files- app.py +118 -186
- app2.py +0 -228
- requirements.txt +1 -0
- test_audio.py +4 -8
app.py
CHANGED
|
@@ -1,178 +1,178 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
# <div style="background-color:#fff1d7; padding:15px;"> <b> Note</b>: Make sure to run the notebook cell by cell. Please try to avoid running all cells at once.</div>
|
| 11 |
-
|
| 12 |
-
# In[1]:
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
# Warning control
|
| 16 |
-
import warnings
|
| 17 |
-
import os, json
|
| 18 |
-
from llama_cloud_services import LlamaParse
|
| 19 |
-
from llama_index.llms.cohere import Cohere
|
| 20 |
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
|
| 21 |
from llama_index.core import (
|
| 22 |
VectorStoreIndex,
|
| 23 |
StorageContext,
|
| 24 |
load_index_from_storage
|
| 25 |
)
|
|
|
|
|
|
|
| 26 |
from llama_index.core.workflow import (
|
| 27 |
StartEvent,
|
| 28 |
StopEvent,
|
| 29 |
Workflow,
|
| 30 |
step,
|
| 31 |
Event,
|
| 32 |
-
Context
|
| 33 |
-
InputRequiredEvent,
|
| 34 |
-
HumanResponseEvent
|
| 35 |
)
|
| 36 |
-
from
|
| 37 |
-
#import whisper
|
| 38 |
-
from llama_index.readers.whisper import WhisperReader
|
| 39 |
-
import gradio as gr
|
| 40 |
-
import asyncio
|
| 41 |
-
import nest_asyncio
|
| 42 |
from queue import Queue
|
|
|
|
|
|
|
| 43 |
from dotenv import load_dotenv
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
|
| 45 |
-
# Load environment variables
|
| 46 |
load_dotenv()
|
| 47 |
-
CO_API_KEY = os.getenv("COHERE_API_KEY")
|
| 48 |
llama_cloud_api_key = os.getenv("LLAMA_CLOUD_API_KEY")
|
|
|
|
| 49 |
LLAMA_CLOUD_BASE_URL = os.getenv("LLAMA_CLOUD_BASE_URL")
|
| 50 |
-
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
| 51 |
-
|
| 52 |
-
warnings.filterwarnings('ignore')
|
| 53 |
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
|
| 58 |
|
| 59 |
class ParseFormEvent(Event):
|
| 60 |
application_form: str
|
| 61 |
|
|
|
|
| 62 |
class QueryEvent(Event):
|
| 63 |
query: str
|
|
|
|
|
|
|
| 64 |
|
| 65 |
class ResponseEvent(Event):
|
| 66 |
response: str
|
| 67 |
|
|
|
|
|
|
|
| 68 |
class FeedbackEvent(Event):
|
| 69 |
feedback: str
|
| 70 |
|
|
|
|
| 71 |
class GenerateQuestionsEvent(Event):
|
| 72 |
pass
|
| 73 |
|
|
|
|
| 74 |
class RAGWorkflow(Workflow):
|
| 75 |
storage_dir = "./storage"
|
| 76 |
-
llm:
|
| 77 |
query_engine: VectorStoreIndex
|
| 78 |
|
| 79 |
@step
|
| 80 |
async def set_up(self, ctx: Context, ev: StartEvent) -> ParseFormEvent:
|
| 81 |
-
|
|
|
|
| 82 |
if not ev.resume_file:
|
| 83 |
raise ValueError("No resume file provided")
|
| 84 |
|
| 85 |
if not ev.application_form:
|
| 86 |
raise ValueError("No application form provided")
|
| 87 |
|
| 88 |
-
#
|
| 89 |
-
self.llm = Cohere(api_key=CO_API_KEY, model="command-r-plus")
|
| 90 |
-
|
| 91 |
-
# ingest our data and set up the query engine
|
| 92 |
if os.path.exists(self.storage_dir):
|
| 93 |
-
#
|
| 94 |
storage_context = StorageContext.from_defaults(persist_dir=self.storage_dir)
|
| 95 |
index = load_index_from_storage(storage_context)
|
| 96 |
else:
|
| 97 |
-
#
|
| 98 |
documents = LlamaParse(
|
| 99 |
-
api_key=llama_cloud_api_key,
|
| 100 |
-
base_url=os.getenv("LLAMA_CLOUD_BASE_URL"),
|
| 101 |
result_type="markdown",
|
| 102 |
-
content_guideline_instruction="This is a resume, gather related facts together and format it as
|
|
|
|
| 103 |
).load_data(ev.resume_file)
|
| 104 |
# embed and index the documents
|
| 105 |
index = VectorStoreIndex.from_documents(
|
| 106 |
documents,
|
| 107 |
-
embed_model=
|
| 108 |
)
|
| 109 |
index.storage_context.persist(persist_dir=self.storage_dir)
|
| 110 |
|
| 111 |
-
#
|
| 112 |
self.query_engine = index.as_query_engine(llm=self.llm, similarity_top_k=5)
|
| 113 |
|
| 114 |
-
#
|
|
|
|
|
|
|
| 115 |
return ParseFormEvent(application_form=ev.application_form)
|
| 116 |
|
| 117 |
-
#
|
| 118 |
@step
|
| 119 |
async def parse_form(self, ctx: Context, ev: ParseFormEvent) -> GenerateQuestionsEvent:
|
| 120 |
parser = LlamaParse(
|
| 121 |
-
api_key=llama_cloud_api_key,
|
| 122 |
-
base_url=os.getenv("LLAMA_CLOUD_BASE_URL"),
|
| 123 |
result_type="markdown",
|
| 124 |
-
content_guideline_instruction="This is a job application form. Create a list of all the fields
|
|
|
|
| 125 |
formatting_instruction="Return a bulleted list of the fields ONLY."
|
| 126 |
)
|
| 127 |
|
| 128 |
# get the LLM to convert the parsed form into JSON
|
| 129 |
result = parser.load_data(ev.application_form)[0]
|
| 130 |
raw_json = self.llm.complete(
|
| 131 |
-
f"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 132 |
fields = json.loads(raw_json.text)["fields"]
|
| 133 |
|
| 134 |
await ctx.set("fields_to_fill", fields)
|
|
|
|
| 135 |
|
| 136 |
return GenerateQuestionsEvent()
|
| 137 |
|
| 138 |
-
# this step can get triggered either by GenerateQuestionsEvent or a FeedbackEvent
|
| 139 |
@step
|
| 140 |
async def generate_questions(self, ctx: Context, ev: GenerateQuestionsEvent | FeedbackEvent) -> QueryEvent:
|
| 141 |
|
| 142 |
# get the list of fields to fill in
|
| 143 |
fields = await ctx.get("fields_to_fill")
|
|
|
|
| 144 |
|
| 145 |
# generate one query for each of the fields, and fire them off
|
| 146 |
for field in fields:
|
| 147 |
question = f"How would you answer this question about the candidate? <field>{field}</field>"
|
| 148 |
-
|
| 149 |
-
if hasattr(ev,"feedback"):
|
| 150 |
question += f"""
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
|
|
|
| 155 |
|
| 156 |
ctx.send_event(QueryEvent(
|
| 157 |
field=field,
|
| 158 |
query=question
|
| 159 |
))
|
| 160 |
|
| 161 |
-
# store the number of fields so we know how many to wait for later
|
| 162 |
await ctx.set("total_fields", len(fields))
|
|
|
|
|
|
|
| 163 |
return
|
| 164 |
|
| 165 |
@step
|
| 166 |
async def ask_question(self, ctx: Context, ev: QueryEvent) -> ResponseEvent:
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
response = self.query_engine.query(f"This is a question about the specific resume we have in our database: {ev.query}")
|
| 170 |
-
|
| 171 |
-
print(f"Answer was: {str(response)}")
|
| 172 |
-
|
| 173 |
return ResponseEvent(field=ev.field, response=response.response)
|
| 174 |
|
| 175 |
-
# we now emit an InputRequiredEvent
|
| 176 |
@step
|
| 177 |
async def fill_in_application(self, ctx: Context, ev: ResponseEvent) -> InputRequiredEvent:
|
| 178 |
# get the total number of fields to wait for
|
|
@@ -180,10 +180,11 @@ class RAGWorkflow(Workflow):
|
|
| 180 |
|
| 181 |
responses = ctx.collect_events(ev, [ResponseEvent] * total_fields)
|
| 182 |
if responses is None:
|
| 183 |
-
return None
|
| 184 |
|
| 185 |
# we've got all the responses!
|
| 186 |
responseList = "\n".join("Field: " + r.field + "\n" + "Response: " + r.response for r in responses)
|
|
|
|
| 187 |
|
| 188 |
result = self.llm.complete(f"""
|
| 189 |
You are given a list of fields in an application form and responses to
|
|
@@ -195,16 +196,20 @@ class RAGWorkflow(Workflow):
|
|
| 195 |
</responses>
|
| 196 |
""")
|
| 197 |
|
| 198 |
-
|
|
|
|
|
|
|
| 199 |
await ctx.set("filled_form", str(result))
|
| 200 |
|
| 201 |
-
|
|
|
|
|
|
|
| 202 |
return InputRequiredEvent(
|
| 203 |
prefix="How does this look? Give me any feedback you have on any of the answers.",
|
| 204 |
result=result
|
| 205 |
)
|
| 206 |
|
| 207 |
-
# Accept the feedback.
|
| 208 |
@step
|
| 209 |
async def get_feedback(self, ctx: Context, ev: HumanResponseEvent) -> FeedbackEvent | StopEvent:
|
| 210 |
|
|
@@ -227,103 +232,17 @@ class RAGWorkflow(Workflow):
|
|
| 227 |
return FeedbackEvent(feedback=ev.response)
|
| 228 |
|
| 229 |
|
| 230 |
-
|
| 231 |
-
WORKFLOW_FILE = "workflows/RAG-EventDriven.html"
|
| 232 |
-
draw_all_possible_flows(RAGWorkflow, filename=WORKFLOW_FILE)
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
from IPython.display import display, HTML, DisplayHandle
|
| 236 |
-
from helper import extract_html_content
|
| 237 |
-
|
| 238 |
-
html_content = extract_html_content(WORKFLOW_FILE)
|
| 239 |
-
display(HTML(html_content), metadata=dict(isolated=True))
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
# Cool! You can see the path all the way to the end and the feedback loop is clear.
|
| 243 |
-
|
| 244 |
-
# <p style="background-color:#f7fff8; padding:15px; border-width:3px; border-color:#e0f0e0; border-style:solid; border-radius:6px"> 🚨
|
| 245 |
-
# <b>Different Run Results:</b> The output generated by AI chat models can vary with each execution due to their dynamic, probabilistic nature. Don't be surprised if your results differ from those shown in the video.</p>
|
| 246 |
-
|
| 247 |
-
# ## Getting voice feedback
|
| 248 |
-
|
| 249 |
-
# Now, just for fun, you'll do one more thing: change the feedback from text feedback to actual words spoken out loud. To do this we'll use a different model from OpenAI called Whisper. LlamaIndex has a built-in way to transcribe audio files into text using Whisper.
|
| 250 |
-
#
|
| 251 |
-
# Here's a function that takes a file and uses Whisper to return just the text:
|
| 252 |
-
|
| 253 |
-
|
| 254 |
def transcribe_speech(filepath):
|
| 255 |
if filepath is None:
|
| 256 |
gr.Warning("No audio found, please retry.")
|
| 257 |
-
audio_file= open(filepath, "rb")
|
| 258 |
-
reader = WhisperReader(
|
| 259 |
-
model="whisper-1",
|
| 260 |
-
api_key=OPENAI_API_KEY,
|
| 261 |
-
)
|
| 262 |
-
documents = reader.load_data(filepath)
|
| 263 |
-
return documents[0].text
|
| 264 |
-
|
| 265 |
|
| 266 |
-
|
| 267 |
-
|
| 268 |
-
# First, create a callback function that saves data to a global variable.
|
| 269 |
|
|
|
|
| 270 |
|
| 271 |
-
def store_transcription(output):
|
| 272 |
-
global transcription_value
|
| 273 |
-
transcription_value = output
|
| 274 |
-
return output
|
| 275 |
|
| 276 |
-
|
| 277 |
-
# Now use Gradio, which has special widgets that can render inside a notebook, to create an interface
|
| 278 |
-
# for capturing audio from a microphone. When the audio is captured, it calls `transcribe_speech` on the recorded data,
|
| 279 |
-
# and calls `store_transcription` on that.
|
| 280 |
-
|
| 281 |
-
|
| 282 |
-
mic_transcribe = gr.Interface(
|
| 283 |
-
fn=lambda x: store_transcription(transcribe_speech(x)),
|
| 284 |
-
inputs=gr.Audio(sources=["microphone"],
|
| 285 |
-
type="filepath"),
|
| 286 |
-
outputs=gr.Textbox(label="Transcription"))
|
| 287 |
-
|
| 288 |
-
|
| 289 |
-
# In Gradio, define a visual interface containing this microphone input and output, and then launch it:
|
| 290 |
-
|
| 291 |
-
# Make sure to wait for the gradio interface to load. A popup window will appear and ask you to allow the use of your
|
| 292 |
-
# microphone. To record audio, make sure to click on record -> stop -> submit. Make sure the audio is captured
|
| 293 |
-
# before clicking on 'submit'.
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
test_interface = gr.Blocks()
|
| 297 |
-
with test_interface:
|
| 298 |
-
gr.TabbedInterface(
|
| 299 |
-
[mic_transcribe],
|
| 300 |
-
["Transcribe Microphone"]
|
| 301 |
-
)
|
| 302 |
-
|
| 303 |
-
test_interface.launch(
|
| 304 |
-
share=True,
|
| 305 |
-
show_error=True,
|
| 306 |
-
server_port=8000,
|
| 307 |
-
prevent_thread_lock=True
|
| 308 |
-
)
|
| 309 |
-
|
| 310 |
-
|
| 311 |
-
# You can now print out the transcription, which is stored in that global variable you created earlier:
|
| 312 |
-
|
| 313 |
-
|
| 314 |
-
print(transcription_value)
|
| 315 |
-
|
| 316 |
-
|
| 317 |
-
# run Gradio again, so it's a good idea to shut down the running Gradio interface.
|
| 318 |
-
|
| 319 |
-
|
| 320 |
-
|
| 321 |
-
test_interface.close()
|
| 322 |
-
|
| 323 |
-
|
| 324 |
-
# Make sure to run the previous cell to close the Gradio interface before running the next cell
|
| 325 |
-
|
| 326 |
-
# Now create an entirely new class, a Transcription Handler.
|
| 327 |
class TranscriptionHandler:
|
| 328 |
|
| 329 |
# we create a queue to hold transcription values
|
|
@@ -358,7 +277,7 @@ class TranscriptionHandler:
|
|
| 358 |
self.interface.launch(
|
| 359 |
share=False,
|
| 360 |
server_port=8000,
|
| 361 |
-
|
| 362 |
)
|
| 363 |
|
| 364 |
# we poll every 1.5 seconds waiting for something to end up in the queue
|
|
@@ -371,29 +290,42 @@ class TranscriptionHandler:
|
|
| 371 |
await asyncio.sleep(1.5)
|
| 372 |
|
| 373 |
|
| 374 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 375 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 376 |
|
|
|
|
|
|
|
|
|
|
| 377 |
|
| 378 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 379 |
|
| 380 |
-
handler = w.run(
|
| 381 |
-
resume_file="./data/fake_resume.pdf",
|
| 382 |
-
application_form="./data/fake_application_form.pdf"
|
| 383 |
-
)
|
| 384 |
|
| 385 |
-
|
| 386 |
-
|
| 387 |
-
# Get transcription
|
| 388 |
-
transcription_handler = TranscriptionHandler()
|
| 389 |
-
response = await transcription_handler.get_transcription()
|
| 390 |
-
|
| 391 |
-
handler.ctx.send_event(
|
| 392 |
-
HumanResponseEvent(
|
| 393 |
-
response=response
|
| 394 |
-
)
|
| 395 |
-
)
|
| 396 |
-
|
| 397 |
-
response = await handler
|
| 398 |
-
print("Agent complete! Here's your final result:")
|
| 399 |
-
print(str(response))
|
|
|
|
| 1 |
+
from helper import extract_html_content
|
| 2 |
+
from IPython.display import display, HTML
|
| 3 |
+
from llama_index.utils.workflow import draw_all_possible_flows
|
| 4 |
+
from llama_index.core.tools import FunctionTool
|
| 5 |
+
from llama_index.core.agent import FunctionCallingAgent
|
| 6 |
+
from llama_index.core import Settings
|
| 7 |
+
from llama_parse import LlamaParse
|
| 8 |
+
from llama_index.llms.groq import Groq
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
|
| 10 |
from llama_index.core import (
|
| 11 |
VectorStoreIndex,
|
| 12 |
StorageContext,
|
| 13 |
load_index_from_storage
|
| 14 |
)
|
| 15 |
+
import nest_asyncio
|
| 16 |
+
from llama_index.core.workflow import InputRequiredEvent, HumanResponseEvent
|
| 17 |
from llama_index.core.workflow import (
|
| 18 |
StartEvent,
|
| 19 |
StopEvent,
|
| 20 |
Workflow,
|
| 21 |
step,
|
| 22 |
Event,
|
| 23 |
+
Context
|
|
|
|
|
|
|
| 24 |
)
|
| 25 |
+
from pathlib import Path
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
from queue import Queue
|
| 27 |
+
import gradio as gr
|
| 28 |
+
import whisper
|
| 29 |
from dotenv import load_dotenv
|
| 30 |
+
import os, json
|
| 31 |
+
import asyncio
|
| 32 |
+
|
| 33 |
+
storage_dir = "./storage"
|
| 34 |
+
application_file = "./data/fake_application_form.pdf"
|
| 35 |
+
nest_asyncio.apply()
|
| 36 |
|
|
|
|
| 37 |
load_dotenv()
|
|
|
|
| 38 |
llama_cloud_api_key = os.getenv("LLAMA_CLOUD_API_KEY")
|
| 39 |
+
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
|
| 40 |
LLAMA_CLOUD_BASE_URL = os.getenv("LLAMA_CLOUD_BASE_URL")
|
|
|
|
|
|
|
|
|
|
| 41 |
|
| 42 |
+
global_llm = Groq(api_key=GROQ_API_KEY, model="llama3-70b-8192")
|
| 43 |
+
global_embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
|
| 44 |
+
Settings.embed_model = global_embed_model
|
| 45 |
|
| 46 |
|
| 47 |
class ParseFormEvent(Event):
|
| 48 |
application_form: str
|
| 49 |
|
| 50 |
+
|
| 51 |
class QueryEvent(Event):
|
| 52 |
query: str
|
| 53 |
+
field: str
|
| 54 |
+
|
| 55 |
|
| 56 |
class ResponseEvent(Event):
|
| 57 |
response: str
|
| 58 |
|
| 59 |
+
|
| 60 |
+
# new!
|
| 61 |
class FeedbackEvent(Event):
|
| 62 |
feedback: str
|
| 63 |
|
| 64 |
+
|
| 65 |
class GenerateQuestionsEvent(Event):
|
| 66 |
pass
|
| 67 |
|
| 68 |
+
|
| 69 |
class RAGWorkflow(Workflow):
|
| 70 |
storage_dir = "./storage"
|
| 71 |
+
llm: Groq
|
| 72 |
query_engine: VectorStoreIndex
|
| 73 |
|
| 74 |
@step
|
| 75 |
async def set_up(self, ctx: Context, ev: StartEvent) -> ParseFormEvent:
|
| 76 |
+
self.llm = global_llm
|
| 77 |
+
self.storage_dir = storage_dir
|
| 78 |
if not ev.resume_file:
|
| 79 |
raise ValueError("No resume file provided")
|
| 80 |
|
| 81 |
if not ev.application_form:
|
| 82 |
raise ValueError("No application form provided")
|
| 83 |
|
| 84 |
+
# ingest the data and set up the query engine
|
|
|
|
|
|
|
|
|
|
| 85 |
if os.path.exists(self.storage_dir):
|
| 86 |
+
# you've already ingested the resume document
|
| 87 |
storage_context = StorageContext.from_defaults(persist_dir=self.storage_dir)
|
| 88 |
index = load_index_from_storage(storage_context)
|
| 89 |
else:
|
| 90 |
+
# parse and load the resume document
|
| 91 |
documents = LlamaParse(
|
|
|
|
|
|
|
| 92 |
result_type="markdown",
|
| 93 |
+
content_guideline_instruction="This is a resume, gather related facts together and format it as "
|
| 94 |
+
"bullet points with headers"
|
| 95 |
).load_data(ev.resume_file)
|
| 96 |
# embed and index the documents
|
| 97 |
index = VectorStoreIndex.from_documents(
|
| 98 |
documents,
|
| 99 |
+
embed_model=global_embed_model
|
| 100 |
)
|
| 101 |
index.storage_context.persist(persist_dir=self.storage_dir)
|
| 102 |
|
| 103 |
+
# create a query engine
|
| 104 |
self.query_engine = index.as_query_engine(llm=self.llm, similarity_top_k=5)
|
| 105 |
|
| 106 |
+
# you no longer need a query to be passed in,
|
| 107 |
+
# you'll be generating the queries instead
|
| 108 |
+
# let's pass the application form to a new step to parse it
|
| 109 |
return ParseFormEvent(application_form=ev.application_form)
|
| 110 |
|
| 111 |
+
# new - separated the form parsing from the question generation
|
| 112 |
@step
|
| 113 |
async def parse_form(self, ctx: Context, ev: ParseFormEvent) -> GenerateQuestionsEvent:
|
| 114 |
parser = LlamaParse(
|
|
|
|
|
|
|
| 115 |
result_type="markdown",
|
| 116 |
+
content_guideline_instruction="This is a job application form. Create a list of all the fields "
|
| 117 |
+
"that need to be filled in.",
|
| 118 |
formatting_instruction="Return a bulleted list of the fields ONLY."
|
| 119 |
)
|
| 120 |
|
| 121 |
# get the LLM to convert the parsed form into JSON
|
| 122 |
result = parser.load_data(ev.application_form)[0]
|
| 123 |
raw_json = self.llm.complete(
|
| 124 |
+
f"""
|
| 125 |
+
This is a parsed form.
|
| 126 |
+
Convert it into a JSON object containing only the list
|
| 127 |
+
of fields to be filled in, in the form {{ fields: [...] }}.
|
| 128 |
+
<form>{result.text}</form>.
|
| 129 |
+
Return JSON ONLY, no markdown.
|
| 130 |
+
""")
|
| 131 |
fields = json.loads(raw_json.text)["fields"]
|
| 132 |
|
| 133 |
await ctx.set("fields_to_fill", fields)
|
| 134 |
+
print("\n DEBUG: all fields written to Context >>>>>>>>>>>>>>>>>>>>>>>>>>\n")
|
| 135 |
|
| 136 |
return GenerateQuestionsEvent()
|
| 137 |
|
| 138 |
+
# new - this step can get triggered either by GenerateQuestionsEvent or a FeedbackEvent
|
| 139 |
@step
|
| 140 |
async def generate_questions(self, ctx: Context, ev: GenerateQuestionsEvent | FeedbackEvent) -> QueryEvent:
|
| 141 |
|
| 142 |
# get the list of fields to fill in
|
| 143 |
fields = await ctx.get("fields_to_fill")
|
| 144 |
+
print("\n DEBUG:all fields Read from Context >>>>>>>>>>>>>>>>>>>>>>>>>>\n")
|
| 145 |
|
| 146 |
# generate one query for each of the fields, and fire them off
|
| 147 |
for field in fields:
|
| 148 |
question = f"How would you answer this question about the candidate? <field>{field}</field>"
|
| 149 |
+
# Is there feedback? If so, add it to the query:
|
| 150 |
+
if hasattr(ev, "feedback"):
|
| 151 |
question += f"""
|
| 152 |
+
\nWe previously got feedback about how we answered the questions.
|
| 153 |
+
It might not be relevant to this particular field, but here it is:
|
| 154 |
+
<feedback>{ev.feedback}</feedback>
|
| 155 |
+
"""
|
| 156 |
+
print("\n question : ", question)
|
| 157 |
|
| 158 |
ctx.send_event(QueryEvent(
|
| 159 |
field=field,
|
| 160 |
query=question
|
| 161 |
))
|
| 162 |
|
| 163 |
+
# store the number of fields, so we know how many to wait for later
|
| 164 |
await ctx.set("total_fields", len(fields))
|
| 165 |
+
print(f"\n DEBUG: total fields from Context : {len(fields)}")
|
| 166 |
+
|
| 167 |
return
|
| 168 |
|
| 169 |
@step
|
| 170 |
async def ask_question(self, ctx: Context, ev: QueryEvent) -> ResponseEvent:
|
| 171 |
+
response = self.query_engine.query(
|
| 172 |
+
f"This is a question about the specific resume we have in our database: {ev.query}")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 173 |
return ResponseEvent(field=ev.field, response=response.response)
|
| 174 |
|
| 175 |
+
# new - we now emit an InputRequiredEvent
|
| 176 |
@step
|
| 177 |
async def fill_in_application(self, ctx: Context, ev: ResponseEvent) -> InputRequiredEvent:
|
| 178 |
# get the total number of fields to wait for
|
|
|
|
| 180 |
|
| 181 |
responses = ctx.collect_events(ev, [ResponseEvent] * total_fields)
|
| 182 |
if responses is None:
|
| 183 |
+
return None # do nothing if there's nothing to do yet
|
| 184 |
|
| 185 |
# we've got all the responses!
|
| 186 |
responseList = "\n".join("Field: " + r.field + "\n" + "Response: " + r.response for r in responses)
|
| 187 |
+
print("\n DEBUG: got all responses :\n")
|
| 188 |
|
| 189 |
result = self.llm.complete(f"""
|
| 190 |
You are given a list of fields in an application form and responses to
|
|
|
|
| 196 |
</responses>
|
| 197 |
""")
|
| 198 |
|
| 199 |
+
print("\n DEBUG: llm combined the fields and responses from resume")
|
| 200 |
+
|
| 201 |
+
# new! save the result for later
|
| 202 |
await ctx.set("filled_form", str(result))
|
| 203 |
|
| 204 |
+
print("\n DEBUG: Write all form fields to context. Now will emit InputRequiredEvent")
|
| 205 |
+
|
| 206 |
+
# new! Let's get a human in the loop
|
| 207 |
return InputRequiredEvent(
|
| 208 |
prefix="How does this look? Give me any feedback you have on any of the answers.",
|
| 209 |
result=result
|
| 210 |
)
|
| 211 |
|
| 212 |
+
# new! Accept the feedback.
|
| 213 |
@step
|
| 214 |
async def get_feedback(self, ctx: Context, ev: HumanResponseEvent) -> FeedbackEvent | StopEvent:
|
| 215 |
|
|
|
|
| 232 |
return FeedbackEvent(feedback=ev.response)
|
| 233 |
|
| 234 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 235 |
def transcribe_speech(filepath):
|
| 236 |
if filepath is None:
|
| 237 |
gr.Warning("No audio found, please retry.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 238 |
|
| 239 |
+
model = whisper.load_model("base")
|
| 240 |
+
result = model.transcribe(filepath, fp16=False)
|
|
|
|
| 241 |
|
| 242 |
+
return result["text"]
|
| 243 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 244 |
|
| 245 |
+
# New! Transcription handler.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 246 |
class TranscriptionHandler:
|
| 247 |
|
| 248 |
# we create a queue to hold transcription values
|
|
|
|
| 277 |
self.interface.launch(
|
| 278 |
share=False,
|
| 279 |
server_port=8000,
|
| 280 |
+
prevent_thread_lock=True
|
| 281 |
)
|
| 282 |
|
| 283 |
# we poll every 1.5 seconds waiting for something to end up in the queue
|
|
|
|
| 290 |
await asyncio.sleep(1.5)
|
| 291 |
|
| 292 |
|
| 293 |
+
async def main():
|
| 294 |
+
w = RAGWorkflow(timeout=600, verbose=True)
|
| 295 |
+
handler = w.run(
|
| 296 |
+
resume_file="data/fake_resume.pdf",
|
| 297 |
+
application_form="data/fake_application_form.pdf"
|
| 298 |
+
)
|
| 299 |
|
| 300 |
+
print("DEBUG: Starting event stream...")
|
| 301 |
+
async for event in handler.stream_events():
|
| 302 |
+
print(f"DEBUG: Received event type {type(event).__name__}")
|
| 303 |
+
if isinstance(event, InputRequiredEvent):
|
| 304 |
+
print("We've filled in your form! Here are the results:\n")
|
| 305 |
+
print(event.result)
|
| 306 |
+
|
| 307 |
+
# Get transcription
|
| 308 |
+
transcription_handler = TranscriptionHandler()
|
| 309 |
+
response = await transcription_handler.get_transcription()
|
| 310 |
+
|
| 311 |
+
handler.ctx.send_event(
|
| 312 |
+
HumanResponseEvent(
|
| 313 |
+
response=response
|
| 314 |
+
)
|
| 315 |
+
)
|
| 316 |
+
else:
|
| 317 |
+
print("\n handler received event ", event)
|
| 318 |
|
| 319 |
+
response = await handler
|
| 320 |
+
print("Agent complete! Here's your final result:")
|
| 321 |
+
print(str(response))
|
| 322 |
|
| 323 |
+
# Display of the workflow
|
| 324 |
+
workflow_file = Path(__file__).parent / "workflows" / "form_parsing_workflow.html"
|
| 325 |
+
draw_all_possible_flows(w, filename=str(workflow_file))
|
| 326 |
+
html_content = extract_html_content(str(workflow_file))
|
| 327 |
+
display(HTML(html_content), metadata=dict(isolated=True))
|
| 328 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 329 |
|
| 330 |
+
if __name__ == "__main__":
|
| 331 |
+
asyncio.run(main())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app2.py
DELETED
|
@@ -1,228 +0,0 @@
|
|
| 1 |
-
#!/usr/bin/env python
|
| 2 |
-
# coding: utf-8
|
| 3 |
-
|
| 4 |
-
import warnings
|
| 5 |
-
import os
|
| 6 |
-
import json
|
| 7 |
-
import asyncio
|
| 8 |
-
from queue import Queue
|
| 9 |
-
from dotenv import load_dotenv
|
| 10 |
-
import gradio as gr
|
| 11 |
-
from llama_cloud_services import LlamaParse
|
| 12 |
-
from llama_index.llms.cohere import Cohere
|
| 13 |
-
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
|
| 14 |
-
from llama_index.core import (
|
| 15 |
-
VectorStoreIndex,
|
| 16 |
-
StorageContext,
|
| 17 |
-
load_index_from_storage
|
| 18 |
-
)
|
| 19 |
-
from llama_index.core.workflow import (
|
| 20 |
-
StartEvent,
|
| 21 |
-
StopEvent,
|
| 22 |
-
Workflow,
|
| 23 |
-
step,
|
| 24 |
-
Event,
|
| 25 |
-
Context,
|
| 26 |
-
InputRequiredEvent,
|
| 27 |
-
HumanResponseEvent
|
| 28 |
-
)
|
| 29 |
-
from llama_index.readers.whisper import WhisperReader
|
| 30 |
-
import nest_asyncio
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
# Load environment variables
|
| 34 |
-
load_dotenv()
|
| 35 |
-
CO_API_KEY = os.getenv("COHERE_API_KEY")
|
| 36 |
-
llama_cloud_api_key = os.getenv("LLAMA_CLOUD_API_KEY")
|
| 37 |
-
LLAMA_CLOUD_BASE_URL = os.getenv("LLAMA_CLOUD_BASE_URL")
|
| 38 |
-
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
| 39 |
-
|
| 40 |
-
warnings.filterwarnings('ignore')
|
| 41 |
-
|
| 42 |
-
nest_asyncio.apply()
|
| 43 |
-
|
| 44 |
-
# Define Event Classes
|
| 45 |
-
class ParseFormEvent(Event):
|
| 46 |
-
application_form: str
|
| 47 |
-
|
| 48 |
-
class QueryEvent(Event):
|
| 49 |
-
query: str
|
| 50 |
-
|
| 51 |
-
class ResponseEvent(Event):
|
| 52 |
-
response: str
|
| 53 |
-
|
| 54 |
-
class FeedbackEvent(Event):
|
| 55 |
-
feedback: str
|
| 56 |
-
|
| 57 |
-
class GenerateQuestionsEvent(Event):
|
| 58 |
-
pass
|
| 59 |
-
|
| 60 |
-
# Define Workflow
|
| 61 |
-
class RAGWorkflow(Workflow):
|
| 62 |
-
storage_dir = "./storage"
|
| 63 |
-
llm: Cohere
|
| 64 |
-
query_engine: VectorStoreIndex
|
| 65 |
-
|
| 66 |
-
@step
|
| 67 |
-
async def set_up(self, ctx: Context, ev: StartEvent) -> ParseFormEvent:
|
| 68 |
-
if not ev.resume_file:
|
| 69 |
-
raise ValueError("No resume file provided")
|
| 70 |
-
if not ev.application_form:
|
| 71 |
-
raise ValueError("No application form provided")
|
| 72 |
-
|
| 73 |
-
self.llm = Cohere(api_key=CO_API_KEY, model="command-r-plus")
|
| 74 |
-
|
| 75 |
-
if os.path.exists(self.storage_dir):
|
| 76 |
-
storage_context = StorageContext.from_defaults(persist_dir=self.storage_dir)
|
| 77 |
-
index = load_index_from_storage(storage_context)
|
| 78 |
-
else:
|
| 79 |
-
documents = LlamaParse(
|
| 80 |
-
api_key=llama_cloud_api_key,
|
| 81 |
-
base_url=LLAMA_CLOUD_BASE_URL,
|
| 82 |
-
result_type="markdown",
|
| 83 |
-
content_guideline_instruction="This is a resume, gather related facts together and format it as bullet points with headers"
|
| 84 |
-
).load_data(ev.resume_file)
|
| 85 |
-
|
| 86 |
-
index = VectorStoreIndex.from_documents(
|
| 87 |
-
documents,
|
| 88 |
-
embed_model=HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
|
| 89 |
-
)
|
| 90 |
-
index.storage_context.persist(persist_dir=self.storage_dir)
|
| 91 |
-
|
| 92 |
-
self.query_engine = index.as_query_engine(llm=self.llm, similarity_top_k=5)
|
| 93 |
-
return ParseFormEvent(application_form=ev.application_form)
|
| 94 |
-
|
| 95 |
-
@step
|
| 96 |
-
async def parse_form(self, ctx: Context, ev: ParseFormEvent) -> GenerateQuestionsEvent:
|
| 97 |
-
parser = LlamaParse(
|
| 98 |
-
api_key=llama_cloud_api_key,
|
| 99 |
-
base_url=LLAMA_CLOUD_BASE_URL,
|
| 100 |
-
result_type="markdown",
|
| 101 |
-
content_guideline_instruction="This is a job application form. Create a list of all the fields that need to be filled in.",
|
| 102 |
-
formatting_instruction="Return a bulleted list of the fields ONLY."
|
| 103 |
-
)
|
| 104 |
-
|
| 105 |
-
result = parser.load_data(ev.application_form)[0]
|
| 106 |
-
raw_json = self.llm.complete(
|
| 107 |
-
f"This is a parsed form. Convert it into a JSON object containing only the list of fields to be filled in, in the form {{ fields: [...] }}. <form>{result.text}</form>. Return JSON ONLY, no markdown."
|
| 108 |
-
)
|
| 109 |
-
fields = json.loads(raw_json.text)["fields"]
|
| 110 |
-
|
| 111 |
-
await ctx.set("fields_to_fill", fields)
|
| 112 |
-
return GenerateQuestionsEvent()
|
| 113 |
-
|
| 114 |
-
@step
|
| 115 |
-
async def generate_questions(self, ctx: Context, ev: GenerateQuestionsEvent | FeedbackEvent) -> QueryEvent:
|
| 116 |
-
fields = await ctx.get("fields_to_fill")
|
| 117 |
-
|
| 118 |
-
for field in fields:
|
| 119 |
-
question = f"How would you answer this question about the candidate? <field>{field}</field>"
|
| 120 |
-
|
| 121 |
-
if hasattr(ev, "feedback"):
|
| 122 |
-
question += f"\nPrevious feedback: <feedback>{ev.feedback}</feedback>"
|
| 123 |
-
|
| 124 |
-
ctx.send_event(QueryEvent(field=field, query=question))
|
| 125 |
-
|
| 126 |
-
await ctx.set("total_fields", len(fields))
|
| 127 |
-
return
|
| 128 |
-
|
| 129 |
-
@step
|
| 130 |
-
async def ask_question(self, ctx: Context, ev: QueryEvent) -> ResponseEvent:
|
| 131 |
-
response = self.query_engine.query(
|
| 132 |
-
f"This is a question about the specific resume we have in our database: {ev.query}"
|
| 133 |
-
)
|
| 134 |
-
return ResponseEvent(field=ev.field, response=response.response)
|
| 135 |
-
|
| 136 |
-
@step
|
| 137 |
-
async def fill_in_application(self, ctx: Context, ev: ResponseEvent) -> InputRequiredEvent:
|
| 138 |
-
total_fields = await ctx.get("total_fields")
|
| 139 |
-
responses = ctx.collect_events(ev, [ResponseEvent] * total_fields)
|
| 140 |
-
|
| 141 |
-
if responses is None:
|
| 142 |
-
return None
|
| 143 |
-
|
| 144 |
-
responseList = "\n".join(f"Field: {r.field}\nResponse: {r.response}" for r in responses)
|
| 145 |
-
result = self.llm.complete(
|
| 146 |
-
f"You are given a list of fields in an application form and responses to questions about those fields from a resume. Combine the two into a list of fields and succinct, factual answers.\n<responses>{responseList}</responses>"
|
| 147 |
-
)
|
| 148 |
-
|
| 149 |
-
await ctx.set("filled_form", str(result))
|
| 150 |
-
|
| 151 |
-
return InputRequiredEvent(
|
| 152 |
-
prefix="How does this look? Provide feedback.",
|
| 153 |
-
result=result
|
| 154 |
-
)
|
| 155 |
-
|
| 156 |
-
@step
|
| 157 |
-
async def get_feedback(self, ctx: Context, ev: HumanResponseEvent) -> FeedbackEvent | StopEvent:
|
| 158 |
-
result = self.llm.complete(
|
| 159 |
-
f"You have received feedback on the form-filling task.\n<feedback>{ev.response}</feedback>\nIf everything is fine, respond with 'OKAY'. Otherwise, respond with 'FEEDBACK'."
|
| 160 |
-
)
|
| 161 |
-
|
| 162 |
-
verdict = result.text.strip()
|
| 163 |
-
return StopEvent(result=await ctx.get("filled_form")) if verdict == "OKAY" else FeedbackEvent(feedback=ev.response)
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
# Transcription Handler
|
| 167 |
-
class TranscriptionHandler:
|
| 168 |
-
def __init__(self):
|
| 169 |
-
self.transcription_queue = Queue()
|
| 170 |
-
self.interface = None
|
| 171 |
-
|
| 172 |
-
def store_transcription(self, output):
|
| 173 |
-
self.transcription_queue.put(output)
|
| 174 |
-
return output
|
| 175 |
-
|
| 176 |
-
def create_interface(self):
|
| 177 |
-
mic_transcribe = gr.Interface(
|
| 178 |
-
fn=lambda x: self.store_transcription(transcribe_speech(x)),
|
| 179 |
-
inputs=gr.Audio(sources=["microphone"], type="filepath"),
|
| 180 |
-
outputs=gr.Textbox(label="Transcription")
|
| 181 |
-
)
|
| 182 |
-
self.interface = gr.Blocks()
|
| 183 |
-
with self.interface:
|
| 184 |
-
gr.TabbedInterface([mic_transcribe], ["Transcribe Microphone"])
|
| 185 |
-
return self.interface
|
| 186 |
-
|
| 187 |
-
async def get_transcription(self):
|
| 188 |
-
self.interface = self.create_interface()
|
| 189 |
-
self.interface.launch(share=False, server_port=8000, inbrowser=True)
|
| 190 |
-
|
| 191 |
-
while True:
|
| 192 |
-
if not self.transcription_queue.empty():
|
| 193 |
-
result = self.transcription_queue.get()
|
| 194 |
-
self.interface.close()
|
| 195 |
-
return result
|
| 196 |
-
await asyncio.sleep(1.5)
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
# Transcription function
|
| 200 |
-
def transcribe_speech(filepath):
|
| 201 |
-
if not filepath:
|
| 202 |
-
gr.Warning("No audio found, please retry.")
|
| 203 |
-
reader = WhisperReader(model="whisper-1", api_key=OPENAI_API_KEY)
|
| 204 |
-
documents = reader.load_data(filepath)
|
| 205 |
-
return documents[0].text
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
# Async Wrapper
|
| 209 |
-
async def main():
|
| 210 |
-
w = RAGWorkflow(timeout=600, verbose=False)
|
| 211 |
-
|
| 212 |
-
handler = w.run(
|
| 213 |
-
resume_file="./data/fake_resume.pdf",
|
| 214 |
-
application_form="./data/fake_application_form.pdf"
|
| 215 |
-
)
|
| 216 |
-
|
| 217 |
-
async for event in handler.stream_events():
|
| 218 |
-
if isinstance(event, InputRequiredEvent):
|
| 219 |
-
transcription_handler = TranscriptionHandler()
|
| 220 |
-
response = await transcription_handler.get_transcription()
|
| 221 |
-
handler.ctx.send_event(HumanResponseEvent(response=response))
|
| 222 |
-
|
| 223 |
-
response = await handler
|
| 224 |
-
print("Agent complete! Here's your final result:")
|
| 225 |
-
print(str(response))
|
| 226 |
-
|
| 227 |
-
if __name__ == "__main__":
|
| 228 |
-
asyncio.run(main())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
requirements.txt
CHANGED
|
@@ -15,6 +15,7 @@ llama-index-readers-llama-parse
|
|
| 15 |
llama-index-utils-workflow
|
| 16 |
#openai-whisper ==20240930
|
| 17 |
#llama-index-readers-whisper
|
|
|
|
| 18 |
pydantic
|
| 19 |
pydantic_core
|
| 20 |
dotenv
|
|
|
|
| 15 |
llama-index-utils-workflow
|
| 16 |
#openai-whisper ==20240930
|
| 17 |
#llama-index-readers-whisper
|
| 18 |
+
IPython
|
| 19 |
pydantic
|
| 20 |
pydantic_core
|
| 21 |
dotenv
|
test_audio.py
CHANGED
|
@@ -1,13 +1,9 @@
|
|
| 1 |
-
|
| 2 |
-
from faster_whisper import WhisperModel
|
| 3 |
import gradio as gr
|
| 4 |
from pathlib import Path
|
| 5 |
from dotenv import load_dotenv
|
| 6 |
import os
|
| 7 |
|
| 8 |
-
load_dotenv()
|
| 9 |
-
openai_api_key = os.getenv("OPENAI_API_KEY")
|
| 10 |
-
|
| 11 |
transcription_value = ""
|
| 12 |
|
| 13 |
|
|
@@ -15,10 +11,10 @@ def transcribe_speech(filepath):
|
|
| 15 |
if filepath is None:
|
| 16 |
gr.Warning("No audio found, please retry.")
|
| 17 |
|
| 18 |
-
model =
|
| 19 |
-
|
| 20 |
|
| 21 |
-
return "
|
| 22 |
|
| 23 |
|
| 24 |
def store_transcription(output):
|
|
|
|
| 1 |
+
import whisper
|
|
|
|
| 2 |
import gradio as gr
|
| 3 |
from pathlib import Path
|
| 4 |
from dotenv import load_dotenv
|
| 5 |
import os
|
| 6 |
|
|
|
|
|
|
|
|
|
|
| 7 |
transcription_value = ""
|
| 8 |
|
| 9 |
|
|
|
|
| 11 |
if filepath is None:
|
| 12 |
gr.Warning("No audio found, please retry.")
|
| 13 |
|
| 14 |
+
model = whisper.load_model("base")
|
| 15 |
+
result = model.transcribe(filepath, fp16=False)
|
| 16 |
|
| 17 |
+
return result["text"]
|
| 18 |
|
| 19 |
|
| 20 |
def store_transcription(output):
|