Spaces:

wolf1997
/

receipt_scanner

Running

App Files Files Community

wolf1997 commited on Mar 5, 2025

Commit

264fc54

verified ·

1 Parent(s): 4b77121

Update receipt_gen_agent.py

Browse files

Files changed (1) hide show

receipt_gen_agent.py +259 -259

receipt_gen_agent.py CHANGED Viewed

@@ -1,260 +1,260 @@
-from langchain_core.output_parsers import JsonOutputParser
-from langchain_core.prompts import PromptTemplate
-from dotenv import load_dotenv
-import os
-from typing import List
-from typing_extensions import TypedDict
-from langchain_core.messages import HumanMessage
-from langchain_google_genai import ChatGoogleGenerativeAI
-from langchain.output_parsers import RetryOutputParser
-from langgraph.graph import StateGraph, START, END
-import base64
-from IPython.display import Image as img, display
-from langchain_core.runnables.graph import MermaidDrawMethod
-from langgraph.checkpoint.memory import MemorySaver
-import json
-from pydantic import BaseModel, Field
-from io import BytesIO
-load_dotenv()
-GEMINI_API_KEY=os.getenv('google_api_key')
-GEMINI_MODEL='gemini-2.0-flash'
-llm = ChatGoogleGenerativeAI(google_api_key=GEMINI_API_KEY, model=GEMINI_MODEL, temperature=0.3)
-from os import listdir
-from os.path import isfile, join
-class State(TypedDict):
-    prompt: str
-    image_number: int
-    image_data: json
-    image_byte: str
-    eval: dict
-    n_retries:int
-    image_name: str
-    image_data_list: list
-def generate_data_node(state:State):
-    class Items(BaseModel):
-        name: str = Field(description='the name of the item')
-        price : float = Field(description='the price of the item')
-        quantity: int = Field(description='the quantity of the item')
-    class Form(BaseModel):
-        loc_name: str = Field(description='the name of the location if no name put empty str')
-        address: str = Field(description='the address of the location if no location put empty str')
-        date: str = Field(description='the date if no date put empty str')
-        time: str = Field(description='the time if no time put empty str')
-        items: List[Items] = Field(description= 'list of the items if no items put empty list')
-        subtotal: float = Field(description= 'the subtotal if no subtotal put 0')
-        tax: float = Field(description='the tax, if no tax put 0')
-        total: float = Field(description='the total amount if no total amount put 0')
-    parser=JsonOutputParser(pydantic_object=Form)
-    instruction=parser.get_format_instructions()
-    message = HumanMessage(
-    content=[
-        {"type": "text", "text": f"{state.get('prompt')}"+'\n\n'+ instruction},
-        {
-            "type": "image_url",
-            "image_url": {"url": f"data:image/jpeg;base64,{state.get('image_byte')}"},
-        },
-    ],
-)
-    response=llm.invoke([message])
-    try:
-        response=parser.parse(response.content)
-        return {'image_data':response}
-    except:
-        prompt = PromptTemplate(
-        template="Answer the user query.\n{format_instructions}\n{query}\n",
-        input_variables=["query"],
-        partial_variables={"format_instructions": parser.get_format_instructions()},
-        )
-        retry_parser = RetryOutputParser.from_llm(parser=parser, llm=llm)
-        prompt_value=prompt.format_prompt(query=f'{state.get('prompt')}')
-        response=retry_parser.parse_with_prompt(response.content, prompt_value)
-        return {'image_data':response}
-def evaluate_node(state:State):
-    class Decision(BaseModel):
-        decision: str = Field(description='good or modify if changes have to be made')
-        comment: str = Field(description='the changes to make')
-    parser=JsonOutputParser(pydantic_object=Decision)
-    prompt = PromptTemplate(
-    template="Answer the user query.\n{format_instructions}\n{query}\n",
-    input_variables=["query"],
-    partial_variables={"format_instructions": parser.get_format_instructions()},
-    )
-    data=state.get('image_data')
-    query=f" is the {data} correct and makes sense tell the llm what to change, ignore missing data, don't make it up, no explanation or decription needed"
-    chain = prompt | llm
-    response=chain.invoke({'query':query})
-    try:
-        response=parser.parse(response.content)
-    except:
-        retry_parser = RetryOutputParser.from_llm(parser=parser, llm=llm)
-        prompt_value = prompt.format_prompt(query=query)
-        response=retry_parser.parse_with_prompt(response.content, prompt_value)
-    return {'eval': response}
-def data_editor_node(state:State):
-    class Items(BaseModel):
-        name: str = Field(description='the name of the item')
-        price : float = Field(description='the price of the item')
-        quantity: int = Field(description='the quantity of the item')
-    class Form(BaseModel):
-        loc_name: str = Field(description='the name of the location if no name put empty str')
-        address: str = Field(description='the address of the location if no location put empty str')
-        date: str = Field(description='the date if no date put empty str')
-        time: str = Field(description='the time if no time put empty str')
-        items: List[Items] = Field(description= 'list of the items if no items put empty list')
-        subtotal: float = Field(description= 'the subtotal if no subtotal put 0')
-        tax: float = Field(description='the tax, if no tax put 0')
-        total: float = Field(description='the total amount if no total amount put 0')
-    parser=JsonOutputParser(pydantic_object=Form)
-    prompt = PromptTemplate(
-    template="Answer the user query.\n{format_instructions}\n{query}\n",
-    input_variables=["query"],
-    partial_variables={"format_instructions": parser.get_format_instructions()},
-    )
-    data=state.get('image_data')
-    query=f"modify this dict: {data} based on these comments {state.get('eval').get('comment')}, return a json"
-    chain = prompt | llm
-    response=chain.invoke({'query':query})
-    try:
-        response=parser.parse(response.content)
-    except:
-        retry_parser = RetryOutputParser.from_llm(parser=parser, llm=llm)
-        prompt_value = prompt.format_prompt(query=query)
-        response=retry_parser.parse_with_prompt(response.content, prompt_value)
-    return {'image_data': response,
-            'n_retries':state.get('n_retries')+1}
-def should_continue(state:State)-> str:
-    """
-        Determine whether the research process should continue based on the current state.
-        Args:
-            state: The current state of the agent.
-        Returns:
-            str: The next state to transition to ("to_add_data", "to_prompt_editor").
-        """
-    eval=state.get('eval').get('decision')
-    if eval =='good':
-        return 'to_add_data'
-    elif eval =='modify' and state.get('n_retries')<2:
-        return 'to_data_editor'
-    else:
-        return 'to_add_data'
-def add_data_node(state:State):
-        img_number=state.get('image_number')
-        return {
-                'n_retries':0,
-                'image_name':f'{img_number}_new_receipt.jpg'}
-class receipt_agent:
-    def __init__(self):
-        self.agent=self._setup()
-    def _setup(self):
-        agent_builder=StateGraph(State)
-        agent_builder.add_node('generate_data',generate_data_node)
-        agent_builder.add_node('evaluate',evaluate_node)
-        agent_builder.add_node('add_data',add_data_node)
-        agent_builder.add_node('data_editor',data_editor_node)
-        agent_builder.add_edge(START,'generate_data')
-        agent_builder.add_edge('generate_data','evaluate')
-        # agent_builder.add_edge('evaluate',END)
-        agent_builder.add_conditional_edges('evaluate', should_continue, {'to_data_editor':'data_editor', 'to_add_data':'add_data'},)
-        agent_builder.add_edge('data_editor','evaluate')
-        agent_builder.add_edge('add_data', END)
-        checkpointer=MemorySaver()
-        agent=agent_builder.compile(checkpointer=checkpointer)
-        return agent
-    def display_graph(self):
-        return display(
-                        img(
-                                self.agent.get_graph().draw_mermaid_png(
-                                    draw_method=MermaidDrawMethod.API,
-                                )
-                            )
-                        )
-    def get_state(self, state_val:str):
-        config = {"configurable": {"thread_id": "1"}}
-        return self.agent.get_state(config).values[state_val]
-    def receipt_gen(self,image):
-        config = {"configurable": {"thread_id": "1"}}
-        buffered=BytesIO()
-        image.save(buffered, format='JPEG')
-        image_data = base64.b64encode(buffered.getvalue()).decode("utf-8")
-        data_list = [f for f in listdir('new_receipt_data') if isfile(join('new_receipt_data', f))]
-        if not data_list:
-            data_list=[]
-        else:
-            with open(f'new_receipt_data/{data_list[0]}', 'r') as openfile:
-            # Reading from json file
-                data_list = json.load(openfile)
-        response=self.agent.invoke({'prompt':'analyse this receipt and list the items, return a json',
-                                'n_retries':0,
-                                'image_number':len(data_list),
-                                'image_byte': image_data,
-                                'image_data_list':data_list}, config)
-        image_data=response.get('image_data')
-        return image_data
-    def update_state(self, values:dict):
-        config = {"configurable": {"thread_id": "1"}}
-        return self.agent.update_state(config,values=values)
-    def confirm(self,image_data):
-        config = {"configurable": {"thread_id": "1"}}
-        if image_data:
-            data_list=self.agent.get_state(config).values['image_data_list']
-            img_number=self.agent.get_state(config).values['image_number']
-            image_name=self.agent.get_state(config).values['image_name']
-            if not data_list:
-                    data_list=[]
-            data_list.append({'receipt_name':f'{img_number}_new_receipt.jpg',
-                'receipt_data':image_data})
-            self.agent.update_state(config,values={'image_data_list':data_list})
-            return data_list,image_name

+from langchain_core.output_parsers import JsonOutputParser
+from langchain_core.prompts import PromptTemplate
+from dotenv import load_dotenv
+import os
+from typing import List
+from typing_extensions import TypedDict
+from langchain_core.messages import HumanMessage
+from langchain_google_genai import ChatGoogleGenerativeAI
+from langchain.output_parsers import RetryOutputParser
+from langgraph.graph import StateGraph, START, END
+import base64
+from IPython.display import Image as img, display
+from langchain_core.runnables.graph import MermaidDrawMethod
+from langgraph.checkpoint.memory import MemorySaver
+import json
+from pydantic import BaseModel, Field
+from io import BytesIO
+load_dotenv()
+GEMINI_API_KEY=os.getenv('google_api_key')
+GEMINI_MODEL='gemini-2.0-flash'
+llm = ChatGoogleGenerativeAI(google_api_key=GEMINI_API_KEY, model=GEMINI_MODEL, temperature=0.3)
+from os import listdir
+from os.path import isfile, join
+class State(TypedDict):
+    prompt: str
+    image_number: int
+    image_data: json
+    image_byte: str
+    eval: dict
+    n_retries:int
+    image_name: str
+    image_data_list: list
+def generate_data_node(state:State):
+    class Items(BaseModel):
+        name: str = Field(description='the name of the item')
+        price : float = Field(description='the price of the item')
+        quantity: int = Field(description='the quantity of the item')
+    class Form(BaseModel):
+        loc_name: str = Field(description='the name of the location if no name put empty str')
+        address: str = Field(description='the address of the location if no location put empty str')
+        date: str = Field(description='the date if no date put empty str')
+        time: str = Field(description='the time if no time put empty str')
+        items: List[Items] = Field(description= 'list of the items if no items put empty list')
+        subtotal: float = Field(description= 'the subtotal if no subtotal put 0')
+        tax: float = Field(description='the tax, if no tax put 0')
+        total: float = Field(description='the total amount if no total amount put 0')
+    parser=JsonOutputParser(pydantic_object=Form)
+    instruction=parser.get_format_instructions()
+    message = HumanMessage(
+    content=[
+        {"type": "text", "text": f"{state.get('prompt')}"+'\n\n'+ instruction},
+        {
+            "type": "image_url",
+            "image_url": {"url": f"data:image/jpeg;base64,{state.get('image_byte')}"},
+        },
+    ],
+)
+    response=llm.invoke([message])
+    try:
+        response=parser.parse(response.content)
+        return {'image_data':response}
+    except:
+        prompt = PromptTemplate(
+        template="Answer the user query.\n{format_instructions}\n{query}\n",
+        input_variables=["query"],
+        partial_variables={"format_instructions": parser.get_format_instructions()},
+        )
+        retry_parser = RetryOutputParser.from_llm(parser=parser, llm=llm)
+        prompt_value=prompt.format_prompt(query=f"{state.get('prompt')}")
+        response=retry_parser.parse_with_prompt(response.content, prompt_value)
+        return {'image_data':response}
+def evaluate_node(state:State):
+    class Decision(BaseModel):
+        decision: str = Field(description='good or modify if changes have to be made')
+        comment: str = Field(description='the changes to make')
+    parser=JsonOutputParser(pydantic_object=Decision)
+    prompt = PromptTemplate(
+    template="Answer the user query.\n{format_instructions}\n{query}\n",
+    input_variables=["query"],
+    partial_variables={"format_instructions": parser.get_format_instructions()},
+    )
+    data=state.get('image_data')
+    query=f" is the {data} correct and makes sense tell the llm what to change, ignore missing data, don't make it up, no explanation or decription needed"
+    chain = prompt | llm
+    response=chain.invoke({'query':query})
+    try:
+        response=parser.parse(response.content)
+    except:
+        retry_parser = RetryOutputParser.from_llm(parser=parser, llm=llm)
+        prompt_value = prompt.format_prompt(query=query)
+        response=retry_parser.parse_with_prompt(response.content, prompt_value)
+    return {'eval': response}
+def data_editor_node(state:State):
+    class Items(BaseModel):
+        name: str = Field(description='the name of the item')
+        price : float = Field(description='the price of the item')
+        quantity: int = Field(description='the quantity of the item')
+    class Form(BaseModel):
+        loc_name: str = Field(description='the name of the location if no name put empty str')
+        address: str = Field(description='the address of the location if no location put empty str')
+        date: str = Field(description='the date if no date put empty str')
+        time: str = Field(description='the time if no time put empty str')
+        items: List[Items] = Field(description= 'list of the items if no items put empty list')
+        subtotal: float = Field(description= 'the subtotal if no subtotal put 0')
+        tax: float = Field(description='the tax, if no tax put 0')
+        total: float = Field(description='the total amount if no total amount put 0')
+    parser=JsonOutputParser(pydantic_object=Form)
+    prompt = PromptTemplate(
+    template="Answer the user query.\n{format_instructions}\n{query}\n",
+    input_variables=["query"],
+    partial_variables={"format_instructions": parser.get_format_instructions()},
+    )
+    data=state.get('image_data')
+    query=f"modify this dict: {data} based on these comments {state.get('eval').get('comment')}, return a json"
+    chain = prompt | llm
+    response=chain.invoke({'query':query})
+    try:
+        response=parser.parse(response.content)
+    except:
+        retry_parser = RetryOutputParser.from_llm(parser=parser, llm=llm)
+        prompt_value = prompt.format_prompt(query=query)
+        response=retry_parser.parse_with_prompt(response.content, prompt_value)
+    return {'image_data': response,
+            'n_retries':state.get('n_retries')+1}
+def should_continue(state:State)-> str:
+    """
+        Determine whether the research process should continue based on the current state.
+        Args:
+            state: The current state of the agent.
+        Returns:
+            str: The next state to transition to ("to_add_data", "to_prompt_editor").
+        """
+    eval=state.get('eval').get('decision')
+    if eval =='good':
+        return 'to_add_data'
+    elif eval =='modify' and state.get('n_retries')<2:
+        return 'to_data_editor'
+    else:
+        return 'to_add_data'
+def add_data_node(state:State):
+        img_number=state.get('image_number')
+        return {
+                'n_retries':0,
+                'image_name':f'{img_number}_new_receipt.jpg'}
+class receipt_agent:
+    def __init__(self):
+        self.agent=self._setup()
+    def _setup(self):
+        agent_builder=StateGraph(State)
+        agent_builder.add_node('generate_data',generate_data_node)
+        agent_builder.add_node('evaluate',evaluate_node)
+        agent_builder.add_node('add_data',add_data_node)
+        agent_builder.add_node('data_editor',data_editor_node)
+        agent_builder.add_edge(START,'generate_data')
+        agent_builder.add_edge('generate_data','evaluate')
+        # agent_builder.add_edge('evaluate',END)
+        agent_builder.add_conditional_edges('evaluate', should_continue, {'to_data_editor':'data_editor', 'to_add_data':'add_data'},)
+        agent_builder.add_edge('data_editor','evaluate')
+        agent_builder.add_edge('add_data', END)
+        checkpointer=MemorySaver()
+        agent=agent_builder.compile(checkpointer=checkpointer)
+        return agent
+    def display_graph(self):
+        return display(
+                        img(
+                                self.agent.get_graph().draw_mermaid_png(
+                                    draw_method=MermaidDrawMethod.API,
+                                )
+                            )
+                        )
+    def get_state(self, state_val:str):
+        config = {"configurable": {"thread_id": "1"}}
+        return self.agent.get_state(config).values[state_val]
+    def receipt_gen(self,image):
+        config = {"configurable": {"thread_id": "1"}}
+        buffered=BytesIO()
+        image.save(buffered, format='JPEG')
+        image_data = base64.b64encode(buffered.getvalue()).decode("utf-8")
+        data_list = [f for f in listdir('new_receipt_data') if isfile(join('new_receipt_data', f))]
+        if not data_list:
+            data_list=[]
+        else:
+            with open(f'new_receipt_data/{data_list[0]}', 'r') as openfile:
+            # Reading from json file
+                data_list = json.load(openfile)
+        response=self.agent.invoke({'prompt':'analyse this receipt and list the items, return a json',
+                                'n_retries':0,
+                                'image_number':len(data_list),
+                                'image_byte': image_data,
+                                'image_data_list':data_list}, config)
+        image_data=response.get('image_data')
+        return image_data
+    def update_state(self, values:dict):
+        config = {"configurable": {"thread_id": "1"}}
+        return self.agent.update_state(config,values=values)
+    def confirm(self,image_data):
+        config = {"configurable": {"thread_id": "1"}}
+        if image_data:
+            data_list=self.agent.get_state(config).values['image_data_list']
+            img_number=self.agent.get_state(config).values['image_number']
+            image_name=self.agent.get_state(config).values['image_name']
+            if not data_list:
+                    data_list=[]
+            data_list.append({'receipt_name':f'{img_number}_new_receipt.jpg',
+                'receipt_data':image_data})
+            self.agent.update_state(config,values={'image_data_list':data_list})
+            return data_list,image_name