Spaces:
Runtime error
Runtime error
Update utility/utils.py
Browse files- utility/utils.py +16 -11
utility/utils.py
CHANGED
|
@@ -185,20 +185,25 @@ def extract_text_from_images(image_paths):
|
|
| 185 |
|
| 186 |
# Function to call the Gemma model and process the output as Json
|
| 187 |
def Data_Extractor(data, client=client):
|
| 188 |
-
text = f'''Act as a
|
| 189 |
-
|
|
|
|
|
|
|
| 190 |
{{
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
}}
|
| 199 |
|
| 200 |
-
Output:
|
| 201 |
-
|
|
|
|
|
|
|
|
|
|
| 202 |
# Call the API for inference
|
| 203 |
response = client.text_generation(text, max_new_tokens=1000)#, temperature=0.4, top_k=50, top_p=0.9, repetition_penalty=1.2)
|
| 204 |
|
|
|
|
| 185 |
|
| 186 |
# Function to call the Gemma model and process the output as Json
|
| 187 |
def Data_Extractor(data, client=client):
|
| 188 |
+
text = f'''<s>[INST] Act as a Text extractor for the following text given in text: {data}.
|
| 189 |
+
Your task is to extract specific information and return it in a JSON format as outlined below:
|
| 190 |
+
|
| 191 |
+
Please extract the following details:
|
| 192 |
{{
|
| 193 |
+
"Name": ["Identify and extract all the person's names from the text."],
|
| 194 |
+
"Designation": ["Extract all designations or job titles mentioned in the text."],
|
| 195 |
+
"Company": ["Extract all company or organization names if mentioned."],
|
| 196 |
+
"Contact": ["Extract all phone numbers, including country codes if present."],
|
| 197 |
+
"Address": ["Extract all full postal addresses or locations mentioned in the text."],
|
| 198 |
+
"Email": ["Identify and extract all valid email addresses mentioned in the text; if none are found, return 'Not found'."],
|
| 199 |
+
"Link": ["Identify and extract any website URLs or social media links present in the text."]
|
| 200 |
}}
|
| 201 |
|
| 202 |
+
Output:
|
| 203 |
+
[/INST]</s> [INST]
|
| 204 |
+
Return the extracted information in JSON format as follows:
|
| 205 |
+
[/INST]'''
|
| 206 |
+
|
| 207 |
# Call the API for inference
|
| 208 |
response = client.text_generation(text, max_new_tokens=1000)#, temperature=0.4, top_k=50, top_p=0.9, repetition_penalty=1.2)
|
| 209 |
|