Spaces:

WebashalarForML
/

ImageDataExtractor2

Runtime error

WebashalarForML commited on Oct 11, 2024

Commit

bae46d8

verified ·

1 Parent(s): 2779ae0

Update utility/utils.py

Files changed (1) hide show

utility/utils.py CHANGED Viewed

@@ -185,20 +185,25 @@ def extract_text_from_images(image_paths):
 # Function to call the Gemma model and process the output as Json
 def Data_Extractor(data, client=client):
-    text = f'''Act as a  Text extractor for the following text given in text: {data}
-    Extract text in the following output JSON string:
     {{
-    "Name": ["Identify and Extract All the person's name from the text."],
-    "Designation": ["Extract All the designation or job title mentioned in the text."],
-    "Company": ["Extract All the company or organization name if mentioned."],
-    "Contact": ["Extract All phone number, including country codes if present."],
-    "Address": ["Extract All the full postal address or location mentioned in the text."],
-    "Email": ["Identify and Extract All valid email addresses mentioned in the text else 'Not found'."],
-    "Link": ["Identify and Extract any website URLs or social media links present in the text."]
     }}
-    Output:
-    '''
     # Call the API for inference
     response = client.text_generation(text, max_new_tokens=1000)#, temperature=0.4, top_k=50, top_p=0.9, repetition_penalty=1.2)

 # Function to call the Gemma model and process the output as Json
 def Data_Extractor(data, client=client):
+    text = f'''<s>[INST] Act as a Text extractor for the following text given in text: {data}.
+    Your task is to extract specific information and return it in a JSON format as outlined below:
+    Please extract the following details:
     {{
+        "Name": ["Identify and extract all the person's names from the text."],
+        "Designation": ["Extract all designations or job titles mentioned in the text."],
+        "Company": ["Extract all company or organization names if mentioned."],
+        "Contact": ["Extract all phone numbers, including country codes if present."],
+        "Address": ["Extract all full postal addresses or locations mentioned in the text."],
+        "Email": ["Identify and extract all valid email addresses mentioned in the text; if none are found, return 'Not found'."],
+        "Link": ["Identify and extract any website URLs or social media links present in the text."]
     }}
+    Output:
+    [/INST]</s> [INST]
+    Return the extracted information in JSON format as follows:
+    [/INST]'''
     # Call the API for inference
     response = client.text_generation(text, max_new_tokens=1000)#, temperature=0.4, top_k=50, top_p=0.9, repetition_penalty=1.2)