Spaces:

nader01
/

RAG01

Build error

File size: 8,112 Bytes

from helper import extract_html_content
from IPython.display import display, HTML
from llama_index.utils.workflow import draw_all_possible_flows
from llama_index.core.tools import FunctionTool
from llama_index.core.agent import FunctionCallingAgent
from llama_index.core import Settings
from llama_parse import LlamaParse
from llama_index.llms.groq import Groq
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import (
    VectorStoreIndex,
    StorageContext,
    load_index_from_storage
)
import nest_asyncio
from llama_index.core.workflow import (
    StartEvent,
    StopEvent,
    Workflow,
    step,
    Event,
    Context
)
import json
from pathlib import Path
from dotenv import load_dotenv
import os
import asyncio


storage_dir = "./storage"
nest_asyncio.apply()

load_dotenv()
llama_cloud_api_key = os.getenv("LLAMA_CLOUD_API_KEY")
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
LLAMA_CLOUD_BASE_URL = os.getenv("LLAMA_CLOUD_BASE_URL")

global_llm = Groq(api_key=GROQ_API_KEY, model="llama3-70b-8192")
global_embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
Settings.embed_model = global_embed_model

documents = LlamaParse(
    api_key=llama_cloud_api_key,
    result_type="markdown",
    content_guideline_instruction="This is a resume, gather related facts together and format it as "
                                  "bullet points with headers"
).load_data("data/fake_resume.pdf")

print(documents[0].text)

index = VectorStoreIndex.from_documents(
    documents,
    embed_model=global_embed_model
)

query_engine = index.as_query_engine(llm=global_llm, similarity_top_k=5)
response = query_engine.query("What is this person's name and what was their most recent job?")
print(response)

index.storage_context.persist(persist_dir=storage_dir)

restored_index = None
# Check if the index is stored on disk
if os.path.exists(storage_dir):
    # Load the index from disk
    storage_context = StorageContext.from_defaults(persist_dir=storage_dir)
    restored_index = load_index_from_storage(storage_context)
else:
    print("Index not found on disk.")


print("\n\n Reading back the index \n")
response = restored_index.as_query_engine(llm=global_llm, similarity_top_k=5)\
           .query("What is this person's name and what was their most recent job?")
print(response)

print("\n\n" + "="*50, "\n\n")


def query_resume(q: str) -> str:
    """Answers questions about a specific resume."""
    # we're using the query engine we already created above
    response = query_engine.query(f"This is a question about the specific resume we have in our database: {q}")
    return response.response


resume_tool = FunctionTool.from_defaults(fn=query_resume)

agent = FunctionCallingAgent.from_tools(
    tools=[resume_tool],
    llm=global_llm,
    verbose=True
)

response = agent.chat("How many years of experience does the applicant have?")
print(response)

print("\n\n" + "="*50, "\n\n")


class ParseFormEvent(Event):
    application_form: str


class QueryEvent(Event):
    query: str


class ResponseEvent(Event):
    response: str


    # the first step will be setup
class RAGWorkflow(Workflow):
    # define the LLM to work with
    storage_dir = "./storage"
    llm: Groq
    query_engine: VectorStoreIndex

    @step
    async def set_up(self, ctx: Context, ev: StartEvent) -> ParseFormEvent:
        self.llm = global_llm
        self.storage_dir = storage_dir
        if not ev.resume_file:
            raise ValueError("No resume file provided")

        if not ev.application_form:
            raise ValueError("No application form provided")


        # ingest the data and set up the query engine
        if os.path.exists(self.storage_dir):
            # you've already ingested the resume document
            storage_context = StorageContext.from_defaults(persist_dir=self.storage_dir)
            index = load_index_from_storage(storage_context)
        else:
            # parse and load the resume document
            documents = LlamaParse(
                result_type="markdown",
                content_guideline_instruction="This is a resume, gather related facts together and format it as "
                                              "bullet points with headers"
            ).load_data(ev.resume_file)
            # embed and index the documents
            index = VectorStoreIndex.from_documents(
                documents,
                embed_model=global_embed_model
            )
            index.storage_context.persist(persist_dir=self.storage_dir)

        # create a query engine
        self.query_engine = index.as_query_engine(llm=self.llm, similarity_top_k=5)

        # you no longer need a query to be passed in,
        # you'll be generating the queries instead
        # let's pass the application form to a new step to parse it
        return ParseFormEvent(application_form=ev.application_form)

    @step
    async def parse_form(self, ctx: Context, ev: ParseFormEvent) -> QueryEvent:
        parser = LlamaParse(
            result_type="markdown",
            content_guideline_instruction="This is a job application form. Create a list of all the fields that "
                                          "need to be filled in.",
            formatting_instruction="Return a bulleted list of the fields ONLY."
        )

        # get the LLM to convert the parsed form into JSON
        result = parser.load_data(ev.application_form)[0]
        raw_json = self.llm.complete(
            f"""
            This is a parsed form. 
            Convert it into a JSON object containing only the list 
            of fields to be filled in, in the form {{ fields: [...] }}. 
            <form>{result.text}</form>. 
            Return JSON ONLY, no markdown.
            """)
        fields = json.loads(raw_json.text)["fields"]

        # new!
        # generate one query for each of the fields, and fire them off
        for field in fields:
            ctx.send_event(QueryEvent(
                field=field,
                query=f"How would you answer this question about the candidate? {field}"
            ))

        # store the number of fields so we know how many to wait for later
        await ctx.set("total_fields", len(fields))
        return

    @step
    async def ask_question(self, ctx: Context, ev: QueryEvent) -> ResponseEvent:
        response = self.query_engine.query(
            f"This is a question about the specific resume we have in our database: {ev.query}")
        return ResponseEvent(field=ev.field, response=response.response)

    # new!
    @step
    async def fill_in_application(self, ctx: Context, ev: ResponseEvent) -> StopEvent:
        # get the total number of fields to wait for
        total_fields = await ctx.get("total_fields")

        responses = ctx.collect_events(ev, [ResponseEvent] * total_fields)
        if responses is None:
            return None  # do nothing if there's nothing to do yet

        # we've got all the responses!
        responseList = "\n".join("Field: " + r.field + "\n" + "Response: " + r.response for r in responses)

        result = self.llm.complete(f"""
            You are given a list of fields in an application form and responses to
            questions about those fields from a resume. Combine the two into a list of
            fields and succinct, factual answers to fill in those fields.

            <responses>
            {responseList}
            </responses>
        """)
        return StopEvent(result=result)


async def main():
    w = RAGWorkflow(timeout=120, verbose=False)
    result = await w.run(
        resume_file="data/fake_resume.pdf",
        application_form="data/fake_application_form.pdf"
    )
    print(result)

    # Display of the workflow
    workflow_file = Path(__file__).parent / "workflows" / "form_parsing_workflow.html"
    draw_all_possible_flows(w, filename=str(workflow_file))
    html_content = extract_html_content(str(workflow_file))
    display(HTML(html_content), metadata=dict(isolated=True))


if __name__ == "__main__":
    asyncio.run(main())