Spaces:

ppaihack
/

ZamaKlinik

Sleeping

App Files Files Community

lukalafaye commited on Sep 28, 2024

Commit

cb4d28e

1 Parent(s): 70deb6a

last

Browse files

Files changed (1) hide show

app.py +31 -39

app.py CHANGED Viewed

@@ -9,6 +9,7 @@ from langchain_core.prompts import PromptTemplate
 import re
 import json
 api_key = os.environ.get("HFBearer")
 os.environ["HUGGINGFACEHUB_API_TOKEN"] = api_key
@@ -17,38 +18,31 @@ API_URL = "https://pllfc7e5i0rujahy.us-east-1.aws.endpoints.huggingface.cloud"
 # Function to extract text from image
 def extract_text_from_image(image):
-    text = pytesseract.image_to_string(image)
-    return text
 # Function to extract JSON from text
 def extract_json(text):
-    # Use regex to find the JSON between <JSON> and </JSON>
     match = re.search(r'<JSON>\s*(.*?)\s*</JSON>', text, re.DOTALL)
     if match:
-        json_str = match.group(1)  # Get the JSON string
         try:
-            # Load the JSON string into a Python dictionary
-            json_data = json.loads(json_str)
-            return json_data
         except json.JSONDecodeError:
-            return "Erreur de décodage JSON"
-    else:
-        return "Aucun JSON trouvé"
 # Function to get metadata title from image
 def get_image_metadata(image):
-    # You can customize this function to extract other metadata as needed
-    title = image.name.split('.')[0]  # Simple title extraction from file name without extension
-    return title
 def count_tokens(text):
     return len(text.split())
 image_params = {
     "bilan-atherosclerose": "medecin_responsable, rythme_sinusal, valeur_EIM, score_calcique",
     "bilan-medical": "medecin_responsable, date_naissance, prenom, nom, identifiant_patient, nom_medecin",
-    "ECG": "medecin_responsable, poids, taille, ECG_repos_valeur_par_minute), valeur_FMT, valeur_niveau_atteint, valeur_diminution_frequence_cardiaque_bpm",
     "echo-doppler": "medecin_responsable, sous_clavieres, vertebrales, carotides",
     "echographie-poumons": "medecin_responsable, score calcique, technique, resultats",
     "echotomographie-abdominale": "medecin_responsable, foie, vesicule, pancreas, reins, rate, aorte_abdominale, conclusion",
@@ -58,34 +52,31 @@ image_params = {
 }
 # Streamlit app layout
-st.title("API Query App")
-st.write("This app allows you to query the API and retrieve responses.")
 user_input = """
-Vous allez extraire des paramètres d'un texte à l'intérieur d'un objet JSON, écrit entre <JSON> et </JSON>.
-Liste des paramètres : {parameters}
-Voici un exemple de réponse valide :
 <JSON>
 {{"date_naissance": "", "prenom": "", "nom": ""}}
 </JSON>
-Voici le texte à partir duquel vous devez extraire les paramètres :
 {texte}
 """
 prompt = PromptTemplate.from_template(user_input)
-llm = HuggingFaceEndpoint(
-    endpoint_url=API_URL,
-)
 llm_chain = prompt | llm
 # File uploader for multiple images
 uploaded_images = st.file_uploader("Upload images", type=["png", "jpg", "jpeg"], accept_multiple_files=True)
-# Modify the Streamlit section to extract the JSON for multiple images
 if st.button("Submit"):
     if uploaded_images:
         all_json_data = {}  # Dictionary to store JSON data for each image
@@ -93,24 +84,25 @@ if st.button("Submit"):
             with st.spinner(f"Extracting text from image: {uploaded_image.name}..."):
                 image = Image.open(uploaded_image)
                 extracted_text = extract_text_from_image(image)
-                max_text_length = 500  # Adjust as needed to keep total tokens under 1024
                 if count_tokens(extracted_text) > max_text_length:
                     extracted_text = " ".join(extracted_text.split()[:max_text_length])
                 with st.spinner(f"Fetching response from API for {uploaded_image.name}..."):
-                    # Get metadata title from the image
-                    title = get_image_metadata(uploaded_image)
-                    parameters = image_params[title]
-                    output = llm_chain.invoke({"texte": extracted_text, "parameters": parameters})
                     st.success(f"Response received for {uploaded_image.name}!")
                     # Extract JSON from the API output
-                    json_data = extract_json(output)  # Extract JSON from the API output
-                    all_json_data[title] = json_data  # Store JSON data with title as key
-                    st.write(title, json_data)
-        # Display all extracted JSON data
-        st.write("Extracted JSON Data for all images.")
     else:
-        st.warning("Please upload at least one image to extract text.")

 import re
 import json
+# Set up the Hugging Face API key
 api_key = os.environ.get("HFBearer")
 os.environ["HUGGINGFACEHUB_API_TOKEN"] = api_key
 # Function to extract text from image
 def extract_text_from_image(image):
+    return pytesseract.image_to_string(image)
 # Function to extract JSON from text
 def extract_json(text):
     match = re.search(r'<JSON>\s*(.*?)\s*</JSON>', text, re.DOTALL)
     if match:
+        json_str = match.group(1)
         try:
+            return json.loads(json_str)
         except json.JSONDecodeError:
+            return "Error decoding JSON"
+    return "No JSON found"
 # Function to get metadata title from image
 def get_image_metadata(image):
+    return image.name.split('.')[0]
 def count_tokens(text):
     return len(text.split())
+# Mapping of image parameters to expected fields
 image_params = {
     "bilan-atherosclerose": "medecin_responsable, rythme_sinusal, valeur_EIM, score_calcique",
     "bilan-medical": "medecin_responsable, date_naissance, prenom, nom, identifiant_patient, nom_medecin",
+    "ECG": "medecin_responsable, poids, taille, ECG_repos_valeur_par_minute, valeur_FMT, valeur_niveau_atteint, valeur_diminution_frequence_cardiaque_bpm",
     "echo-doppler": "medecin_responsable, sous_clavieres, vertebrales, carotides",
     "echographie-poumons": "medecin_responsable, score calcique, technique, resultats",
     "echotomographie-abdominale": "medecin_responsable, foie, vesicule, pancreas, reins, rate, aorte_abdominale, conclusion",
 }
 # Streamlit app layout
+st.title("Medical Patient Data Extractor")
+st.write("This app extracts medical patient data from uploaded images.")
+# User prompt template
 user_input = """
+You will extract parameters from a text inside a JSON object, written between <JSON> and </JSON>.
+List of parameters: {parameters}
+Here is an example of a valid response:
 <JSON>
 {{"date_naissance": "", "prenom": "", "nom": ""}}
 </JSON>
+Here is the text from which you need to extract the parameters:
 {texte}
 """
 prompt = PromptTemplate.from_template(user_input)
+# Initialize Hugging Face LLM
+llm = HuggingFaceEndpoint(endpoint_url=API_URL)
 llm_chain = prompt | llm
 # File uploader for multiple images
 uploaded_images = st.file_uploader("Upload images", type=["png", "jpg", "jpeg"], accept_multiple_files=True)
 if st.button("Submit"):
     if uploaded_images:
         all_json_data = {}  # Dictionary to store JSON data for each image
             with st.spinner(f"Extracting text from image: {uploaded_image.name}..."):
                 image = Image.open(uploaded_image)
                 extracted_text = extract_text_from_image(image)
+                st.text_area(f"Extracted Text from {uploaded_image.name}", value=extracted_text, height=200)
+                max_text_length = 500  # Adjust as needed
                 if count_tokens(extracted_text) > max_text_length:
                     extracted_text = " ".join(extracted_text.split()[:max_text_length])
+                title = get_image_metadata(uploaded_image)
+                parameters = image_params.get(title, "Unknown parameters")
                 with st.spinner(f"Fetching response from API for {uploaded_image.name}..."):
+                    output = llm_chain.invoke({"texte": extracted_text, "parameters": parameters})
                     st.success(f"Response received for {uploaded_image.name}!")
                     # Extract JSON from the API output
+                    json_data = extract_json(output)
+                    all_json_data[title] = json_data
+                    st.write(f"**{title} JSON Data:**")
+                    st.json(json_data)  # Display JSON nicely
+        st.write("All extracted JSON Data:")
+        st.json(all_json_data)  # Display all extracted JSON data together
     else:
+        st.warning("Please upload at least one image to extract text.")