Spaces:
Sleeping
Sleeping
Commit ·
cb4d28e
1
Parent(s): 70deb6a
last
Browse files
app.py
CHANGED
|
@@ -9,6 +9,7 @@ from langchain_core.prompts import PromptTemplate
|
|
| 9 |
import re
|
| 10 |
import json
|
| 11 |
|
|
|
|
| 12 |
api_key = os.environ.get("HFBearer")
|
| 13 |
os.environ["HUGGINGFACEHUB_API_TOKEN"] = api_key
|
| 14 |
|
|
@@ -17,38 +18,31 @@ API_URL = "https://pllfc7e5i0rujahy.us-east-1.aws.endpoints.huggingface.cloud"
|
|
| 17 |
|
| 18 |
# Function to extract text from image
|
| 19 |
def extract_text_from_image(image):
|
| 20 |
-
|
| 21 |
-
return text
|
| 22 |
|
| 23 |
# Function to extract JSON from text
|
| 24 |
def extract_json(text):
|
| 25 |
-
# Use regex to find the JSON between <JSON> and </JSON>
|
| 26 |
match = re.search(r'<JSON>\s*(.*?)\s*</JSON>', text, re.DOTALL)
|
| 27 |
-
|
| 28 |
if match:
|
| 29 |
-
json_str = match.group(1)
|
| 30 |
try:
|
| 31 |
-
|
| 32 |
-
json_data = json.loads(json_str)
|
| 33 |
-
return json_data
|
| 34 |
except json.JSONDecodeError:
|
| 35 |
-
return "
|
| 36 |
-
|
| 37 |
-
return "Aucun JSON trouvé"
|
| 38 |
|
| 39 |
# Function to get metadata title from image
|
| 40 |
def get_image_metadata(image):
|
| 41 |
-
|
| 42 |
-
title = image.name.split('.')[0] # Simple title extraction from file name without extension
|
| 43 |
-
return title
|
| 44 |
|
| 45 |
def count_tokens(text):
|
| 46 |
return len(text.split())
|
| 47 |
|
|
|
|
| 48 |
image_params = {
|
| 49 |
"bilan-atherosclerose": "medecin_responsable, rythme_sinusal, valeur_EIM, score_calcique",
|
| 50 |
"bilan-medical": "medecin_responsable, date_naissance, prenom, nom, identifiant_patient, nom_medecin",
|
| 51 |
-
"ECG": "medecin_responsable, poids, taille, ECG_repos_valeur_par_minute
|
| 52 |
"echo-doppler": "medecin_responsable, sous_clavieres, vertebrales, carotides",
|
| 53 |
"echographie-poumons": "medecin_responsable, score calcique, technique, resultats",
|
| 54 |
"echotomographie-abdominale": "medecin_responsable, foie, vesicule, pancreas, reins, rate, aorte_abdominale, conclusion",
|
|
@@ -58,34 +52,31 @@ image_params = {
|
|
| 58 |
}
|
| 59 |
|
| 60 |
# Streamlit app layout
|
| 61 |
-
st.title("
|
| 62 |
-
st.write("This app
|
| 63 |
|
|
|
|
| 64 |
user_input = """
|
| 65 |
-
|
| 66 |
-
|
| 67 |
|
| 68 |
-
|
| 69 |
<JSON>
|
| 70 |
{{"date_naissance": "", "prenom": "", "nom": ""}}
|
| 71 |
</JSON>
|
| 72 |
|
| 73 |
-
|
| 74 |
{texte}
|
| 75 |
"""
|
| 76 |
-
|
| 77 |
prompt = PromptTemplate.from_template(user_input)
|
| 78 |
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
)
|
| 82 |
-
|
| 83 |
llm_chain = prompt | llm
|
| 84 |
|
| 85 |
# File uploader for multiple images
|
| 86 |
uploaded_images = st.file_uploader("Upload images", type=["png", "jpg", "jpeg"], accept_multiple_files=True)
|
| 87 |
|
| 88 |
-
# Modify the Streamlit section to extract the JSON for multiple images
|
| 89 |
if st.button("Submit"):
|
| 90 |
if uploaded_images:
|
| 91 |
all_json_data = {} # Dictionary to store JSON data for each image
|
|
@@ -93,24 +84,25 @@ if st.button("Submit"):
|
|
| 93 |
with st.spinner(f"Extracting text from image: {uploaded_image.name}..."):
|
| 94 |
image = Image.open(uploaded_image)
|
| 95 |
extracted_text = extract_text_from_image(image)
|
|
|
|
| 96 |
|
| 97 |
-
max_text_length = 500 # Adjust as needed
|
| 98 |
if count_tokens(extracted_text) > max_text_length:
|
| 99 |
extracted_text = " ".join(extracted_text.split()[:max_text_length])
|
| 100 |
|
|
|
|
|
|
|
|
|
|
| 101 |
with st.spinner(f"Fetching response from API for {uploaded_image.name}..."):
|
| 102 |
-
|
| 103 |
-
title = get_image_metadata(uploaded_image)
|
| 104 |
-
parameters = image_params[title]
|
| 105 |
-
output = llm_chain.invoke({"texte": extracted_text, "parameters": parameters})
|
| 106 |
st.success(f"Response received for {uploaded_image.name}!")
|
| 107 |
|
| 108 |
# Extract JSON from the API output
|
| 109 |
-
json_data = extract_json(output)
|
| 110 |
-
all_json_data[title] = json_data
|
| 111 |
-
st.write(title
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
st.
|
| 115 |
else:
|
| 116 |
-
st.warning("Please upload at least one image to extract text.")
|
|
|
|
| 9 |
import re
|
| 10 |
import json
|
| 11 |
|
| 12 |
+
# Set up the Hugging Face API key
|
| 13 |
api_key = os.environ.get("HFBearer")
|
| 14 |
os.environ["HUGGINGFACEHUB_API_TOKEN"] = api_key
|
| 15 |
|
|
|
|
| 18 |
|
| 19 |
# Function to extract text from image
|
| 20 |
def extract_text_from_image(image):
|
| 21 |
+
return pytesseract.image_to_string(image)
|
|
|
|
| 22 |
|
| 23 |
# Function to extract JSON from text
|
| 24 |
def extract_json(text):
|
|
|
|
| 25 |
match = re.search(r'<JSON>\s*(.*?)\s*</JSON>', text, re.DOTALL)
|
|
|
|
| 26 |
if match:
|
| 27 |
+
json_str = match.group(1)
|
| 28 |
try:
|
| 29 |
+
return json.loads(json_str)
|
|
|
|
|
|
|
| 30 |
except json.JSONDecodeError:
|
| 31 |
+
return "Error decoding JSON"
|
| 32 |
+
return "No JSON found"
|
|
|
|
| 33 |
|
| 34 |
# Function to get metadata title from image
|
| 35 |
def get_image_metadata(image):
|
| 36 |
+
return image.name.split('.')[0]
|
|
|
|
|
|
|
| 37 |
|
| 38 |
def count_tokens(text):
|
| 39 |
return len(text.split())
|
| 40 |
|
| 41 |
+
# Mapping of image parameters to expected fields
|
| 42 |
image_params = {
|
| 43 |
"bilan-atherosclerose": "medecin_responsable, rythme_sinusal, valeur_EIM, score_calcique",
|
| 44 |
"bilan-medical": "medecin_responsable, date_naissance, prenom, nom, identifiant_patient, nom_medecin",
|
| 45 |
+
"ECG": "medecin_responsable, poids, taille, ECG_repos_valeur_par_minute, valeur_FMT, valeur_niveau_atteint, valeur_diminution_frequence_cardiaque_bpm",
|
| 46 |
"echo-doppler": "medecin_responsable, sous_clavieres, vertebrales, carotides",
|
| 47 |
"echographie-poumons": "medecin_responsable, score calcique, technique, resultats",
|
| 48 |
"echotomographie-abdominale": "medecin_responsable, foie, vesicule, pancreas, reins, rate, aorte_abdominale, conclusion",
|
|
|
|
| 52 |
}
|
| 53 |
|
| 54 |
# Streamlit app layout
|
| 55 |
+
st.title("Medical Patient Data Extractor")
|
| 56 |
+
st.write("This app extracts medical patient data from uploaded images.")
|
| 57 |
|
| 58 |
+
# User prompt template
|
| 59 |
user_input = """
|
| 60 |
+
You will extract parameters from a text inside a JSON object, written between <JSON> and </JSON>.
|
| 61 |
+
List of parameters: {parameters}
|
| 62 |
|
| 63 |
+
Here is an example of a valid response:
|
| 64 |
<JSON>
|
| 65 |
{{"date_naissance": "", "prenom": "", "nom": ""}}
|
| 66 |
</JSON>
|
| 67 |
|
| 68 |
+
Here is the text from which you need to extract the parameters:
|
| 69 |
{texte}
|
| 70 |
"""
|
|
|
|
| 71 |
prompt = PromptTemplate.from_template(user_input)
|
| 72 |
|
| 73 |
+
# Initialize Hugging Face LLM
|
| 74 |
+
llm = HuggingFaceEndpoint(endpoint_url=API_URL)
|
|
|
|
|
|
|
| 75 |
llm_chain = prompt | llm
|
| 76 |
|
| 77 |
# File uploader for multiple images
|
| 78 |
uploaded_images = st.file_uploader("Upload images", type=["png", "jpg", "jpeg"], accept_multiple_files=True)
|
| 79 |
|
|
|
|
| 80 |
if st.button("Submit"):
|
| 81 |
if uploaded_images:
|
| 82 |
all_json_data = {} # Dictionary to store JSON data for each image
|
|
|
|
| 84 |
with st.spinner(f"Extracting text from image: {uploaded_image.name}..."):
|
| 85 |
image = Image.open(uploaded_image)
|
| 86 |
extracted_text = extract_text_from_image(image)
|
| 87 |
+
st.text_area(f"Extracted Text from {uploaded_image.name}", value=extracted_text, height=200)
|
| 88 |
|
| 89 |
+
max_text_length = 500 # Adjust as needed
|
| 90 |
if count_tokens(extracted_text) > max_text_length:
|
| 91 |
extracted_text = " ".join(extracted_text.split()[:max_text_length])
|
| 92 |
|
| 93 |
+
title = get_image_metadata(uploaded_image)
|
| 94 |
+
parameters = image_params.get(title, "Unknown parameters")
|
| 95 |
+
|
| 96 |
with st.spinner(f"Fetching response from API for {uploaded_image.name}..."):
|
| 97 |
+
output = llm_chain.invoke({"texte": extracted_text, "parameters": parameters})
|
|
|
|
|
|
|
|
|
|
| 98 |
st.success(f"Response received for {uploaded_image.name}!")
|
| 99 |
|
| 100 |
# Extract JSON from the API output
|
| 101 |
+
json_data = extract_json(output)
|
| 102 |
+
all_json_data[title] = json_data
|
| 103 |
+
st.write(f"**{title} JSON Data:**")
|
| 104 |
+
st.json(json_data) # Display JSON nicely
|
| 105 |
+
st.write("All extracted JSON Data:")
|
| 106 |
+
st.json(all_json_data) # Display all extracted JSON data together
|
| 107 |
else:
|
| 108 |
+
st.warning("Please upload at least one image to extract text.")
|