Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -35,7 +35,7 @@ uploaded_file = st.sidebar.file_uploader("Upload a medical text book (image)", t
|
|
| 35 |
# Initialize the parser
|
| 36 |
parser = LlamaParse(
|
| 37 |
result_type="markdown",
|
| 38 |
-
parsing_instruction="You are given medical
|
| 39 |
use_vendor_multimodal_model=True,
|
| 40 |
vendor_multimodal_model_name="gpt-4o-mini-2024-07-18",
|
| 41 |
show_progress=True,
|
|
@@ -46,14 +46,8 @@ parser = LlamaParse(
|
|
| 46 |
language="en"
|
| 47 |
)
|
| 48 |
|
| 49 |
-
#
|
| 50 |
-
|
| 51 |
-
mime_type, _ = guess_type(image_path)
|
| 52 |
-
if mime_type is None:
|
| 53 |
-
mime_type = 'image/png'
|
| 54 |
-
with open(image_path, "rb") as image_file:
|
| 55 |
-
base64_encoded_data = base64.b64encode(image_file.read()).decode('utf-8')
|
| 56 |
-
return f"data:{mime_type};base64,{base64_encoded_data}"
|
| 57 |
|
| 58 |
# Upload and process file
|
| 59 |
if uploaded_file:
|
|
@@ -70,6 +64,15 @@ if uploaded_file:
|
|
| 70 |
st.write("File successfully processed!")
|
| 71 |
st.write(f"Processed file: {uploaded_file.name}")
|
| 72 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
# Function to get sorted image files
|
| 74 |
def get_page_number(file_name):
|
| 75 |
match = re.search(r"-page-(\d+)\.jpg$", str(file_name))
|
|
@@ -97,8 +100,11 @@ def get_text_nodes(md_json_objs, image_dir) -> t.List[TextNode]:
|
|
| 97 |
nodes.append(node)
|
| 98 |
return nodes
|
| 99 |
|
| 100 |
-
# Load text nodes
|
| 101 |
-
|
|
|
|
|
|
|
|
|
|
| 102 |
|
| 103 |
# Setup index and LLM
|
| 104 |
embed_model = OpenAIEmbedding(model="text-embedding-3-large")
|
|
@@ -168,4 +174,4 @@ query_engine = MultimodalQueryEngine(QA_PROMPT, retriever, gpt_4o_mm)
|
|
| 168 |
if query_text:
|
| 169 |
st.write("Querying...")
|
| 170 |
response = query_engine.custom_query(query_text)
|
| 171 |
-
st.markdown(response.response)
|
|
|
|
| 35 |
# Initialize the parser
|
| 36 |
parser = LlamaParse(
|
| 37 |
result_type="markdown",
|
| 38 |
+
parsing_instruction="You are given a medical textbook on medicine",
|
| 39 |
use_vendor_multimodal_model=True,
|
| 40 |
vendor_multimodal_model_name="gpt-4o-mini-2024-07-18",
|
| 41 |
show_progress=True,
|
|
|
|
| 46 |
language="en"
|
| 47 |
)
|
| 48 |
|
| 49 |
+
# Initialize md_json_objs as an empty list
|
| 50 |
+
md_json_objs = []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
|
| 52 |
# Upload and process file
|
| 53 |
if uploaded_file:
|
|
|
|
| 64 |
st.write("File successfully processed!")
|
| 65 |
st.write(f"Processed file: {uploaded_file.name}")
|
| 66 |
|
| 67 |
+
# Function to encode image to data URL
|
| 68 |
+
def local_image_to_data_url(image_path):
|
| 69 |
+
mime_type, _ = guess_type(image_path)
|
| 70 |
+
if mime_type is None:
|
| 71 |
+
mime_type = 'image/png'
|
| 72 |
+
with open(image_path, "rb") as image_file:
|
| 73 |
+
base64_encoded_data = base64.b64encode(image_file.read()).decode('utf-8')
|
| 74 |
+
return f"data:{mime_type};base64,{base64_encoded_data}"
|
| 75 |
+
|
| 76 |
# Function to get sorted image files
|
| 77 |
def get_page_number(file_name):
|
| 78 |
match = re.search(r"-page-(\d+)\.jpg$", str(file_name))
|
|
|
|
| 100 |
nodes.append(node)
|
| 101 |
return nodes
|
| 102 |
|
| 103 |
+
# Load text nodes if md_json_objs is not empty
|
| 104 |
+
if md_json_objs:
|
| 105 |
+
text_nodes = get_text_nodes(md_json_objs, "data_images")
|
| 106 |
+
else:
|
| 107 |
+
text_nodes = []
|
| 108 |
|
| 109 |
# Setup index and LLM
|
| 110 |
embed_model = OpenAIEmbedding(model="text-embedding-3-large")
|
|
|
|
| 174 |
if query_text:
|
| 175 |
st.write("Querying...")
|
| 176 |
response = query_engine.custom_query(query_text)
|
| 177 |
+
st.markdown(response.response)
|