Spaces:
Sleeping
Sleeping
Update sherlock2.py
Browse files- sherlock2.py +21 -37
sherlock2.py
CHANGED
|
@@ -21,7 +21,7 @@ genai.configure(api_key=os.getenv("GEMINI_API_KEY_PROJECTID"))
|
|
| 21 |
|
| 22 |
# Load pre-trained Gemini model
|
| 23 |
model = genai.GenerativeModel('models/gemini-1.5-pro-latest')
|
| 24 |
-
vision_model = genai.GenerativeModel('models/gemini-pro-vision')
|
| 25 |
|
| 26 |
# Define Sherlock Holmes's persona and guidelines
|
| 27 |
sherlock_persona = """
|
|
@@ -50,21 +50,6 @@ Let your every utterance and action be a masterpiece of deductive prowess, a sym
|
|
| 50 |
# Generate embeddings using the Gemini Embedding API
|
| 51 |
embed_model = 'models/embedding-001'
|
| 52 |
|
| 53 |
-
# Function for embedding generation (using models/embedding-001)
|
| 54 |
-
def generate_embeddings_from_documents(extracted_text):
|
| 55 |
-
"""Generates embeddings for a list of extracted text documents using the 'models/embedding-001' model
|
| 56 |
-
and the appropriate task type."""
|
| 57 |
-
embeddings = []
|
| 58 |
-
for text in extracted_text:
|
| 59 |
-
try:
|
| 60 |
-
# Determine the appropriate task type (e.g., "RETRIEVAL_DOCUMENT" for search/similarity)
|
| 61 |
-
task_type = "RETRIEVAL_DOCUMENT"
|
| 62 |
-
response = genai.embed_content(model=embed_model, content=text, task_type=task_type)
|
| 63 |
-
embeddings.append(response["embedding"])
|
| 64 |
-
except Exception as e:
|
| 65 |
-
st.error(f"Error generating embeddings: {e}")
|
| 66 |
-
return embeddings
|
| 67 |
-
|
| 68 |
def extract_keywords_simple(extracted_text):
|
| 69 |
"""Extracts keywords and important information from the given text using Gemini 1.5 Pro."""
|
| 70 |
prompt = """
|
|
@@ -77,15 +62,16 @@ def extract_keywords_simple(extracted_text):
|
|
| 77 |
return keywords
|
| 78 |
|
| 79 |
# Function to extract text from various file types
|
| 80 |
-
def
|
| 81 |
-
"""Extracts text content
|
| 82 |
-
|
| 83 |
for uploaded_file in uploaded_files:
|
| 84 |
file_type = uploaded_file.type
|
| 85 |
if file_type == "text/plain":
|
| 86 |
# Plain Text File
|
| 87 |
raw_text = str(uploaded_file.read(), "utf-8")
|
| 88 |
-
|
|
|
|
| 89 |
elif file_type == "application/pdf":
|
| 90 |
# PDF Document
|
| 91 |
pdf_reader = PyPDF2.PdfReader(uploaded_file)
|
|
@@ -93,19 +79,21 @@ def extract_text_from_files(uploaded_files):
|
|
| 93 |
for page_num in range(len(pdf_reader.pages)):
|
| 94 |
page = pdf_reader.pages[page_num]
|
| 95 |
text += page.extract_text()
|
| 96 |
-
|
|
|
|
| 97 |
else:
|
| 98 |
# Other Document Types (Using Textract)
|
| 99 |
try:
|
| 100 |
text = textract.process(uploaded_file).decode("utf-8")
|
| 101 |
-
|
|
|
|
| 102 |
except Exception as e:
|
| 103 |
st.error(f"Error extracting text from file: {e}")
|
| 104 |
-
return
|
| 105 |
|
| 106 |
-
# Function to process images using Gemini Pro Vision
|
| 107 |
def process_images(uploaded_images):
|
| 108 |
-
"""Processes a list of uploaded images using Gemini Pro Vision to extract relevant information."""
|
| 109 |
image_insights = []
|
| 110 |
for uploaded_image in uploaded_images:
|
| 111 |
try:
|
|
@@ -171,7 +159,7 @@ def clear_chat():
|
|
| 171 |
st.session_state.chat_history = []
|
| 172 |
|
| 173 |
def investigate():
|
| 174 |
-
"""Handles the case investigation process with
|
| 175 |
st.subheader("Case Investigation")
|
| 176 |
|
| 177 |
# File upload with clear labels and progress bars
|
|
@@ -189,24 +177,20 @@ def investigate():
|
|
| 189 |
|
| 190 |
# Extract text and process images with progress indication
|
| 191 |
with st.spinner("Extracting text and analyzing images..."):
|
| 192 |
-
|
| 193 |
-
keywords = extract_keywords_simple("\n\n".join(
|
| 194 |
-
case_embeddings = generate_embeddings_from_documents(case_text)
|
| 195 |
image_insights = process_images(images)
|
| 196 |
|
| 197 |
combined_information = {
|
| 198 |
-
"
|
| 199 |
"image_insights": image_insights,
|
| 200 |
"keywords": keywords
|
| 201 |
}
|
| 202 |
|
| 203 |
-
# Convert case_embeddings to a string
|
| 204 |
-
case_embeddings_str = " ".join(str(embedding) for embedding in case_embeddings)
|
| 205 |
-
|
| 206 |
prompt = """
|
| 207 |
You are Sherlock Holmes, the renowned detective. Analyze the following case information and provide insights or
|
| 208 |
suggestions for further investigation:
|
| 209 |
-
""" + str(combined_information)
|
| 210 |
|
| 211 |
response = model.generate_content([sherlock_persona, sherlock_guidelines, prompt])
|
| 212 |
|
|
@@ -214,12 +198,12 @@ def investigate():
|
|
| 214 |
with st.expander("Sherlock's Analysis and Suggestions:"):
|
| 215 |
st.write(response.text)
|
| 216 |
|
| 217 |
-
web_search_results = []
|
| 218 |
|
| 219 |
search_options = st.multiselect("Search for additional clues:", ["Internet"], default=["Internet"])
|
| 220 |
if st.button("Search"):
|
| 221 |
with st.spinner("Searching for clues..."):
|
| 222 |
-
web_search_results = search_internet("\n\n".join(
|
| 223 |
st.subheader("Internet Search Results:")
|
| 224 |
for result in web_search_results:
|
| 225 |
st.write(f"**Title:** {result['title']}")
|
|
@@ -235,7 +219,7 @@ def investigate():
|
|
| 235 |
including deductions, potential suspects, and conclusions.
|
| 236 |
"""
|
| 237 |
final_report = model.generate_content([sherlock_persona, sherlock_guidelines, report_prompt,
|
| 238 |
-
|
| 239 |
st.header("Case Report")
|
| 240 |
st.write(final_report.text)
|
| 241 |
|
|
|
|
| 21 |
|
| 22 |
# Load pre-trained Gemini model
|
| 23 |
model = genai.GenerativeModel('models/gemini-1.5-pro-latest')
|
| 24 |
+
vision_model = genai.GenerativeModel('models/gemini-1.0-pro-vision-latest')
|
| 25 |
|
| 26 |
# Define Sherlock Holmes's persona and guidelines
|
| 27 |
sherlock_persona = """
|
|
|
|
| 50 |
# Generate embeddings using the Gemini Embedding API
|
| 51 |
embed_model = 'models/embedding-001'
|
| 52 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
def extract_keywords_simple(extracted_text):
|
| 54 |
"""Extracts keywords and important information from the given text using Gemini 1.5 Pro."""
|
| 55 |
prompt = """
|
|
|
|
| 62 |
return keywords
|
| 63 |
|
| 64 |
# Function to extract text from various file types
|
| 65 |
+
def extract_text_and_embeddings(uploaded_files):
|
| 66 |
+
"""Extracts text content and generates embeddings for a list of uploaded files."""
|
| 67 |
+
extracted_data = []
|
| 68 |
for uploaded_file in uploaded_files:
|
| 69 |
file_type = uploaded_file.type
|
| 70 |
if file_type == "text/plain":
|
| 71 |
# Plain Text File
|
| 72 |
raw_text = str(uploaded_file.read(), "utf-8")
|
| 73 |
+
embedding = genai.embed_content(model=embed_model, content=raw_text.strip(), task_type="RETRIEVAL_DOCUMENT")["embedding"]
|
| 74 |
+
extracted_data.append({"text": raw_text.strip(), "embedding": embedding})
|
| 75 |
elif file_type == "application/pdf":
|
| 76 |
# PDF Document
|
| 77 |
pdf_reader = PyPDF2.PdfReader(uploaded_file)
|
|
|
|
| 79 |
for page_num in range(len(pdf_reader.pages)):
|
| 80 |
page = pdf_reader.pages[page_num]
|
| 81 |
text += page.extract_text()
|
| 82 |
+
embedding = genai.embed_content(model=embed_model, content=text, task_type="RETRIEVAL_DOCUMENT")["embedding"]
|
| 83 |
+
extracted_data.append({"text": text, "embedding": embedding})
|
| 84 |
else:
|
| 85 |
# Other Document Types (Using Textract)
|
| 86 |
try:
|
| 87 |
text = textract.process(uploaded_file).decode("utf-8")
|
| 88 |
+
embedding = genai.embed_content(model=embed_model, content=text, task_type="RETRIEVAL_DOCUMENT")["embedding"]
|
| 89 |
+
extracted_data.append({"text": text, "embedding": embedding})
|
| 90 |
except Exception as e:
|
| 91 |
st.error(f"Error extracting text from file: {e}")
|
| 92 |
+
return pd.DataFrame(extracted_data)
|
| 93 |
|
| 94 |
+
# Function to process images using Gemini 1.0 Pro Vision
|
| 95 |
def process_images(uploaded_images):
|
| 96 |
+
"""Processes a list of uploaded images using Gemini 1.0 Pro Vision to extract relevant information."""
|
| 97 |
image_insights = []
|
| 98 |
for uploaded_image in uploaded_images:
|
| 99 |
try:
|
|
|
|
| 159 |
st.session_state.chat_history = []
|
| 160 |
|
| 161 |
def investigate():
|
| 162 |
+
"""Handles the case investigation process with Pandas for embeddings."""
|
| 163 |
st.subheader("Case Investigation")
|
| 164 |
|
| 165 |
# File upload with clear labels and progress bars
|
|
|
|
| 177 |
|
| 178 |
# Extract text and process images with progress indication
|
| 179 |
with st.spinner("Extracting text and analyzing images..."):
|
| 180 |
+
case_data = extract_text_and_embeddings(documents)
|
| 181 |
+
keywords = extract_keywords_simple("\n\n".join(case_data["text"]))
|
|
|
|
| 182 |
image_insights = process_images(images)
|
| 183 |
|
| 184 |
combined_information = {
|
| 185 |
+
"case_data": case_data,
|
| 186 |
"image_insights": image_insights,
|
| 187 |
"keywords": keywords
|
| 188 |
}
|
| 189 |
|
|
|
|
|
|
|
|
|
|
| 190 |
prompt = """
|
| 191 |
You are Sherlock Holmes, the renowned detective. Analyze the following case information and provide insights or
|
| 192 |
suggestions for further investigation:
|
| 193 |
+
""" + str(combined_information)
|
| 194 |
|
| 195 |
response = model.generate_content([sherlock_persona, sherlock_guidelines, prompt])
|
| 196 |
|
|
|
|
| 198 |
with st.expander("Sherlock's Analysis and Suggestions:"):
|
| 199 |
st.write(response.text)
|
| 200 |
|
| 201 |
+
web_search_results = []
|
| 202 |
|
| 203 |
search_options = st.multiselect("Search for additional clues:", ["Internet"], default=["Internet"])
|
| 204 |
if st.button("Search"):
|
| 205 |
with st.spinner("Searching for clues..."):
|
| 206 |
+
web_search_results = search_internet("\n\n".join(case_data["text"]))
|
| 207 |
st.subheader("Internet Search Results:")
|
| 208 |
for result in web_search_results:
|
| 209 |
st.write(f"**Title:** {result['title']}")
|
|
|
|
| 219 |
including deductions, potential suspects, and conclusions.
|
| 220 |
"""
|
| 221 |
final_report = model.generate_content([sherlock_persona, sherlock_guidelines, report_prompt,
|
| 222 |
+
str(web_search_results)])
|
| 223 |
st.header("Case Report")
|
| 224 |
st.write(final_report.text)
|
| 225 |
|