Spaces:

rohitashva
/

Report-Analyzer

Sleeping

App Files Files Community

rohitashva commited on Feb 23, 2025

Commit

1ec5d32

verified ·

1 Parent(s): 3b24287

Update app.py

Browse files

Files changed (1) hide show

app.py +62 -44

app.py CHANGED Viewed

@@ -1,45 +1,63 @@
-import fitz  # PyMuPDF
-import google.generativeai as genai
-import streamlit as st
-def extract_text_from_pdf(pdf_path):
-    """Extracts text from a PDF file."""
-    text = ""
-    try:
-        with fitz.open(pdf_path) as doc:
-            for page in doc:
-                text += page.get_text("text") + "\n"
-    except Exception as e:
-        st.error(f"Error reading PDF: {e}")
-    return text
-def analyze_health_data(text):
-    """Analyzes extracted text using Google Generative AI (Free Tier API)."""
-    try:
-        # Get a free API key from Google AI Studio: https://aistudio.google.com/
-        genai.configure(api_key="AIzaSyAY6ZYxOzVV5N7mBZzDJ96WEPJGfuFx-mU")  # Replace with free API key
-        model = genai.GenerativeModel("gemini-pro")  # Choose appropriate model
-        response = model.generate_content(
-            f"Analyze this blood report and provide trends, risks, and health suggestions:\n{text}"
-        )
-        return response.text
-    except Exception as e:
-        return f"Error in LLM response: {e}"
-def main():
-    st.title("Health Report Analyzer")
-    uploaded_file = st.file_uploader("Upload your health report (PDF)", type=["pdf"])
-    if uploaded_file is not None:
-        with open("temp.pdf", "wb") as f:
-            f.write(uploaded_file.getbuffer())
-        extracted_text = extract_text_from_pdf("temp.pdf")
-        st.subheader("Extracted Report Text:")
-        st.text_area("Extracted Text", extracted_text[:1000], height=200)
-        if st.button("Analyze Report"):
-            with st.spinner("Analyzing..."):
-                analysis = analyze_health_data(extracted_text)
-            st.subheader("Health Analysis:")
-            st.write(analysis)
-if __name__ == "__main__":
     main()

+import streamlit as st
+import google.generativeai as genai
+from transformers import AutoModel, AutoTokenizer
+from pdf2image import convert_from_path
+import torch
+import os
+# Load the OCR model
+tokenizer = AutoTokenizer.from_pretrained("ucaslcl/GOT-OCR2_0", trust_remote_code=True)
+model = AutoModel.from_pretrained("ucaslcl/GOT-OCR2_0", trust_remote_code=True, low_cpu_mem_usage=True,
+                                  device_map="cuda" if torch.cuda.is_available() else "cpu",
+                                  use_safetensors=True, pad_token_id=tokenizer.eos_token_id).eval()
+def extract_text_from_pdf(pdf_path):
+    """Converts PDF pages to images and extracts text using the GOT-OCR2_0 model."""
+    text = ""
+    try:
+        images = convert_from_path(pdf_path)
+        for idx, image in enumerate(images):
+            image_path = f"temp_page_{idx}.png"
+            image.save(image_path, "PNG")
+            extracted_text = model.chat(tokenizer, image_path, ocr_type="ocr")
+            text += extracted_text + "\n"
+            os.remove(image_path)  # Clean up the temporary image file
+    except Exception as e:
+        st.error(f"Error extracting text: {e}")
+    return text
+def analyze_health_data(text):
+    """Analyzes extracted text using Google Generative AI (Free Tier API)."""
+    try:
+        genai.configure(api_key="AIzaSyAY6ZYxOzVV5N7mBZzDJ96WEPJGfuFx-mU")  # Replace with your Google API key
+        model = genai.GenerativeModel("gemini-pro")
+        response = model.generate_content(
+            f"Analyze this medical report and provide trends, risks, and health suggestions:\n{text}"
+        )
+        return response.text
+    except Exception as e:
+        return f"Error in LLM response: {e}"
+def main():
+    st.title("Health Report Analyzer")
+    uploaded_file = st.file_uploader("Upload your health report (PDF)", type=["pdf"])
+    if uploaded_file is not None:
+        pdf_path = "temp.pdf"
+        with open(pdf_path, "wb") as f:
+            f.write(uploaded_file.getbuffer())
+        with st.spinner("Extracting text from the report..."):
+            extracted_text = extract_text_from_pdf(pdf_path)
+        st.subheader("Extracted Report Text:")
+        st.text_area("Extracted Text", extracted_text[:1000], height=200)
+        if st.button("Analyze Report"):
+            with st.spinner("Analyzing..."):
+                analysis = analyze_health_data(extracted_text)
+            st.subheader("Health Analysis:")
+            st.write(analysis)
+if __name__ == "__main__":
     main()