Spaces:

nader01
/

RAG01

Build error

App Files Files Community

NaderAfshar commited on Apr 17, 2025

Commit

7e69835

1 Parent(s): cf05b38

replaced Groq LLM with Cohere Command-r-plus and it works far better

Browse files

Files changed (3) hide show

app.py +7 -4
gen_package_versions.py +11 -0
moduler_interface.py +350 -0

app.py CHANGED Viewed

@@ -5,7 +5,8 @@ from llama_index.core.tools import FunctionTool
 from llama_index.core.agent import FunctionCallingAgent
 from llama_index.core import Settings
 from llama_parse import LlamaParse
-from llama_index.llms.groq import Groq
 from llama_index.embeddings.huggingface import HuggingFaceEmbedding
 from llama_index.core import (
     VectorStoreIndex,
@@ -36,10 +37,12 @@ nest_asyncio.apply()
 load_dotenv()
 llama_cloud_api_key = os.getenv("LLAMA_CLOUD_API_KEY")
-GROQ_API_KEY = os.getenv("GROQ_API_KEY")
 LLAMA_CLOUD_BASE_URL = os.getenv("LLAMA_CLOUD_BASE_URL")
-global_llm = Groq(api_key=GROQ_API_KEY, model="llama3-70b-8192")
 global_embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
 Settings.embed_model = global_embed_model
@@ -68,7 +71,7 @@ class GenerateQuestionsEvent(Event):
 class RAGWorkflow(Workflow):
     storage_dir = "./storage"
-    llm: Groq
     query_engine: VectorStoreIndex
     @step

 from llama_index.core.agent import FunctionCallingAgent
 from llama_index.core import Settings
 from llama_parse import LlamaParse
+#from llama_index.llms.groq import Groq
+from llama_index.llms.cohere import Cohere
 from llama_index.embeddings.huggingface import HuggingFaceEmbedding
 from llama_index.core import (
     VectorStoreIndex,
 load_dotenv()
 llama_cloud_api_key = os.getenv("LLAMA_CLOUD_API_KEY")
 LLAMA_CLOUD_BASE_URL = os.getenv("LLAMA_CLOUD_BASE_URL")
+#GROQ_API_KEY = os.getenv("GROQ_API_KEY")
+CO_API_KEY = os.getenv("COHERE_API_KEY")
+#global_llm = Groq(api_key=GROQ_API_KEY, model="llama3-70b-8192")
+global_llm = Cohere(api_key=CO_API_KEY, model="command-r-plus")
 global_embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
 Settings.embed_model = global_embed_model
 class RAGWorkflow(Workflow):
     storage_dir = "./storage"
+    llm: Cohere
     query_engine: VectorStoreIndex
     @step

gen_package_versions.py ADDED Viewed

	@@ -0,0 +1,11 @@

+from importlib.metadata import version, PackageNotFoundError
+with open("requirements.txt") as f:
+    for line in f:
+        pkg = line.strip()
+        if not pkg or pkg.startswith("#"):
+            continue
+        try:
+            print(f"{pkg}=={version(pkg)}")
+        except PackageNotFoundError:
+            print(f"{pkg} not installed")

moduler_interface.py ADDED Viewed

	@@ -0,0 +1,350 @@

+from helper import extract_html_content
+from IPython.display import display, HTML
+from llama_index.utils.workflow import draw_all_possible_flows
+from llama_index.core.tools import FunctionTool
+from llama_index.core.agent import FunctionCallingAgent
+from llama_index.core import Settings
+from llama_parse import LlamaParse
+from llama_index.llms.groq import Groq
+from llama_index.embeddings.huggingface import HuggingFaceEmbedding
+from llama_index.core import (
+    VectorStoreIndex,
+    StorageContext,
+    load_index_from_storage
+)
+import nest_asyncio
+from llama_index.core.workflow import InputRequiredEvent, HumanResponseEvent
+from llama_index.core.workflow import (
+    StartEvent,
+    StopEvent,
+    Workflow,
+    step,
+    Event,
+    Context
+)
+from pathlib import Path
+from queue import Queue
+import gradio as gr
+import whisper
+from dotenv import load_dotenv
+import os, json
+import asyncio
+storage_dir = "./storage"
+application_file = "./data/fake_application_form.pdf"
+nest_asyncio.apply()
+load_dotenv()
+llama_cloud_api_key = os.getenv("LLAMA_CLOUD_API_KEY")
+GROQ_API_KEY = os.getenv("GROQ_API_KEY")
+LLAMA_CLOUD_BASE_URL = os.getenv("LLAMA_CLOUD_BASE_URL")
+global_llm = Groq(api_key=GROQ_API_KEY, model="llama3-70b-8192")
+global_embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
+Settings.embed_model = global_embed_model
+class ParseFormEvent(Event):
+    application_form: str
+class QueryEvent(Event):
+    query: str
+    field: str
+class ResponseEvent(Event):
+    response: str
+# new!
+class FeedbackEvent(Event):
+    feedback: str
+class GenerateQuestionsEvent(Event):
+    pass
+class RAGWorkflow(Workflow):
+    storage_dir = "./storage"
+    llm: Groq
+    query_engine: VectorStoreIndex
+    @step
+    async def set_up(self, ctx: Context, ev: StartEvent) -> ParseFormEvent:
+        self.llm = global_llm
+        self.storage_dir = storage_dir
+        if not ev.resume_file:
+            raise ValueError("No resume file provided")
+        if not ev.application_form:
+            raise ValueError("No application form provided")
+        # ingest the data and set up the query engine
+        if os.path.exists(self.storage_dir):
+            # you've already ingested the resume document
+            storage_context = StorageContext.from_defaults(persist_dir=self.storage_dir)
+            index = load_index_from_storage(storage_context)
+        else:
+            # parse and load the resume document
+            documents = LlamaParse(
+                result_type="markdown",
+                content_guideline_instruction="This is a resume, gather related facts together and format it as "
+                                              "bullet points with headers"
+            ).load_data(ev.resume_file)
+            # embed and index the documents
+            index = VectorStoreIndex.from_documents(
+                documents,
+                embed_model=global_embed_model
+            )
+            index.storage_context.persist(persist_dir=self.storage_dir)
+        # create a query engine
+        self.query_engine = index.as_query_engine(llm=self.llm, similarity_top_k=5)
+        # you no longer need a query to be passed in,
+        # you'll be generating the queries instead
+        # let's pass the application form to a new step to parse it
+        return ParseFormEvent(application_form=ev.application_form)
+    # new - separated the form parsing from the question generation
+    @step
+    async def parse_form(self, ctx: Context, ev: ParseFormEvent) -> GenerateQuestionsEvent:
+        parser = LlamaParse(
+            result_type="markdown",
+            content_guideline_instruction="This is a job application form. Create a list of all the fields "
+                                          "that need to be filled in.",
+            formatting_instruction="Return a bulleted list of the fields ONLY."
+        )
+        # get the LLM to convert the parsed form into JSON
+        result = parser.load_data(ev.application_form)[0]
+        raw_json = self.llm.complete(
+            f"""
+            This is a parsed form.
+            Convert it into a JSON object containing only the list
+            of fields to be filled in, in the form {{ fields: [...] }}.
+            <form>{result.text}</form>.
+            Return JSON ONLY, no markdown.
+            """)
+        fields = json.loads(raw_json.text)["fields"]
+        await ctx.set("fields_to_fill", fields)
+        print("\n DEBUG: all fields written to Context >>>>>>>>>>>>>>>>>>>>>>>>>>\n")
+        return GenerateQuestionsEvent()
+    # new - this step can get triggered either by GenerateQuestionsEvent or a FeedbackEvent
+    @step
+    async def generate_questions(self, ctx: Context, ev: GenerateQuestionsEvent | FeedbackEvent) -> QueryEvent:
+        # get the list of fields to fill in
+        fields = await ctx.get("fields_to_fill")
+        print("\n DEBUG:all fields Read from Context >>>>>>>>>>>>>>>>>>>>>>>>>>\n")
+        # generate one query for each of the fields, and fire them off
+        for field in fields:
+            question = f"How would you answer this question about the candidate? <field>{field}</field>"
+            # Is there feedback? If so, add it to the query:
+            if hasattr(ev, "feedback"):
+                question += f"""
+                                \nWe previously got feedback about how we answered the questions.
+                                It might not be relevant to this particular field, but here it is:
+                                <feedback>{ev.feedback}</feedback>
+                            """
+                print("\n question : ", question)
+            ctx.send_event(QueryEvent(
+                field=field,
+                query=question
+            ))
+        # store the number of fields, so we know how many to wait for later
+        await ctx.set("total_fields", len(fields))
+        print(f"\n DEBUG: total fields from Context : {len(fields)}")
+        return
+    @step
+    async def ask_question(self, ctx: Context, ev: QueryEvent) -> ResponseEvent:
+        response = self.query_engine.query(
+            f"This is a question about the specific resume we have in our database: {ev.query}")
+        return ResponseEvent(field=ev.field, response=response.response)
+    # new - we now emit an InputRequiredEvent
+    @step
+    async def fill_in_application(self, ctx: Context, ev: ResponseEvent) -> InputRequiredEvent:
+        # get the total number of fields to wait for
+        total_fields = await ctx.get("total_fields")
+        responses = ctx.collect_events(ev, [ResponseEvent] * total_fields)
+        if responses is None:
+            return None  # do nothing if there's nothing to do yet
+        # we've got all the responses!
+        responseList = "\n".join("Field: " + r.field + "\n" + "Response: " + r.response for r in responses)
+        print("\n DEBUG: got all responses :\n")
+        result = self.llm.complete(f"""
+            You are given a list of fields in an application form and responses to
+            questions about those fields from a resume. Combine the two into a list of
+            fields and succinct, factual answers to fill in those fields.
+            <responses>
+            {responseList}
+            </responses>
+        """)
+        print("\n DEBUG: llm combined the fields and responses from resume")
+        # new! save the result for later
+        await ctx.set("filled_form", str(result))
+        print("\n DEBUG: Write all form fields to context. Now will emit InputRequiredEvent")
+        # new! Let's get a human in the loop
+        return InputRequiredEvent(
+            prefix="How does this look? Give me any feedback you have on any of the answers.",
+            result=result
+        )
+    # new! Accept the feedback.
+    @step
+    async def get_feedback(self, ctx: Context, ev: HumanResponseEvent) -> FeedbackEvent | StopEvent:
+        result = self.llm.complete(f"""
+            You have received some human feedback on the form-filling task you've done.
+            Does everything look good, or is there more work to be done?
+            <feedback>
+            {ev.response}
+            </feedback>
+            If everything is fine, respond with just the word 'OKAY'.
+            If there's any other feedback, respond with just the word 'FEEDBACK'.
+        """)
+        verdict = result.text.strip()
+        print(f"LLM says the verdict was {verdict}")
+        if (verdict == "OKAY"):
+            return StopEvent(result=await ctx.get("filled_form"))
+        else:
+            return FeedbackEvent(feedback=ev.response)
+def transcribe_speech(filepath):
+    if filepath is None:
+        gr.Warning("No audio found, please retry.")
+    model = whisper.load_model("base")
+    result = model.transcribe(filepath, fp16=False)
+    return result["text"]
+# New! Transcription handler.
+class TranscriptionHandler:
+    # we create a queue to hold transcription values
+    def __init__(self):
+        self.transcription_queue = Queue()
+        self.interface = None
+        self.log_display = None
+    # every time we record something we put it in the queue
+    def store_transcription(self, output):
+        self.transcription_queue.put(output)
+        return output
+    # This is the same interface and transcription logic as before
+    # except it stores the result in a queue instead of a global
+    def create_interface(self):
+        # Initial Log Display (Textbox with logs)
+        log_box = gr.Textbox(
+            label="Log Output",
+            interactive=False,
+            value="Waiting for user interaction...\n",
+            height=200
+        )
+        # Transcription area that gets activated after form input
+        mic_transcribe = gr.Interface(
+            fn=lambda x: self.store_transcription(transcribe_speech(x)),
+            inputs=gr.Audio(sources=["microphone"], type="filepath"),
+            outputs=gr.Textbox(label="Transcription")
+        )
+        # Creating a Block interface
+        self.interface = gr.Blocks()
+        with self.interface:
+            with gr.Row():
+                self.log_display = log_box  # Display log
+            with gr.Row():
+                # A Tabbed Interface, initially showing the log, then the microphone input
+                gr.TabbedInterface([log_box, mic_transcribe], ["Log", "Transcribe Microphone"])
+        return self.interface
+    # Launches the interface with dynamic transition based on events
+    async def get_transcription(self):
+        self.interface = self.create_interface()
+        self.interface.launch(
+            share=True,  # Remove when running on Hugging Face Spaces
+            ssr_mode=False,
+            prevent_thread_lock=True
+        )
+        # Poll every 1.5 seconds, checking if transcription has been queued
+        while True:
+            if not self.transcription_queue.empty():
+                result = self.transcription_queue.get()
+                if self.interface is not None:
+                    self.interface.close()
+                return result
+            await asyncio.sleep(1.5)
+    # Update log display dynamically as the workflow progresses
+    def update_log(self, message):
+        if self.log_display:
+            self.log_display.update(value=f"{message}\n")
+async def main():
+    w = RAGWorkflow(timeout=600, verbose=True)
+    handler = w.run(
+        resume_file="data/fake_resume.pdf",
+        application_form="data/fake_application_form.pdf"
+    )
+    print("DEBUG: Starting event stream...")
+    async for event in handler.stream_events():
+        print(f"DEBUG: Received event type {type(event).__name__}")
+        if isinstance(event, InputRequiredEvent):
+            print("We've filled in your form! Here are the results:\n")
+            print(event.result)
+            # Get transcription
+            transcription_handler = TranscriptionHandler()
+            response = await transcription_handler.get_transcription()
+            handler.ctx.send_event(
+                HumanResponseEvent(
+                    response=response
+                )
+            )
+        else:
+            print("\n handler received event ", event)
+    response = await handler
+    print("Agent complete! Here's your final result:")
+    print(str(response))
+    # Display of the workflow
+    workflow_file = Path(__file__).parent / "workflows" / "form_parsing_workflow.html"
+    draw_all_possible_flows(w, filename=str(workflow_file))
+    html_content = extract_html_content(str(workflow_file))
+    display(HTML(html_content), metadata=dict(isolated=True))
+if __name__ == "__main__":
+    asyncio.run(main())