Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -30,30 +30,22 @@ ADS.TOKEN = os.getenv('ADS_API_KEY') # Ensure your ADS API key is stored in env
|
|
| 30 |
# Define system message with instructions
|
| 31 |
system_message = """
|
| 32 |
You are ExosAI, a helpful assistant specializing in Exoplanet and Astrophysics research.
|
| 33 |
-
|
| 34 |
Generate a detailed structured response based on the following science context and user input, including the necessary observables, physical parameters, and technical requirements for observations. The response should include the following sections:
|
| 35 |
-
|
| 36 |
Science Objectives: Describe key scientific study objectives related to the science context and user input.
|
| 37 |
-
|
| 38 |
Physical Parameters: Outline the physical parameters related to the science context and user input.
|
| 39 |
-
|
| 40 |
Observables: Specify the observables related to the science context and user input.
|
| 41 |
-
|
| 42 |
Description of Desired Observations: Detail the types of observations related to the science context and user input.
|
| 43 |
-
|
| 44 |
Technical Requirements Table: Generate a table with the following columns:
|
| 45 |
-
- Requirements: The specific observational requirements (e.g., UV observations, Optical observations or Infrared observations
|
| 46 |
- Necessary: The necessary values or parameters (e.g., wavelength ranges, spatial resolution).
|
| 47 |
- Desired: The desired values or parameters.
|
| 48 |
- Justification: A scientific explanation of why these requirements are important.
|
| 49 |
- Comments: Additional notes or remarks regarding each requirement.
|
| 50 |
-
|
| 51 |
Example:
|
| 52 |
| Requirements | Necessary | Desired | Justification | Comments |
|
| 53 |
|----------------------------------|------------------------------------------|------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------|
|
| 54 |
| UV Observations | Wavelength: 1200–2100 Å, 2500–3300 Å | Wavelength: 1200–3300 Å | Characterization of atomic and molecular emissions (H, C, O, S, etc.) from fluorescence and dissociative electron impact | Needed for detecting H2O, CO, CO2, and other volatile molecules relevant for volatile delivery studies. |
|
| 55 |
| Infrared Observations | Wavelength: 2.5–4.8 μm | Wavelength: 1.5–4.8 μm | Tracks water emissions and CO2 lines in icy bodies and small planetesimals | Also allows detection of 3 μm absorption feature in icy bodies. |
|
| 56 |
-
|
| 57 |
Ensure the response is structured clearly and the technical requirements table follows this format.
|
| 58 |
"""
|
| 59 |
|
|
@@ -62,13 +54,54 @@ def encode_text(text):
|
|
| 62 |
outputs = bi_model(**inputs)
|
| 63 |
return outputs.last_hidden_state.mean(dim=1).detach().numpy().flatten()
|
| 64 |
|
| 65 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
user_embedding = encode_text(user_input).reshape(1, -1)
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
most_relevant_idx = np.argmax(similarities)
|
| 71 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
|
| 73 |
def extract_keywords_with_gpt(user_input, max_tokens=100, temperature=0.3):
|
| 74 |
# Define a prompt to ask GPT-4 to extract keywords and important terms
|
|
@@ -307,7 +340,7 @@ def gpt_response_to_dataframe(gpt_response):
|
|
| 307 |
|
| 308 |
def chatbot(user_input, context="", subdomain="", use_encoder=False, max_tokens=150, temperature=0.7, top_p=0.9, frequency_penalty=0.5, presence_penalty=0.0):
|
| 309 |
if use_encoder and context:
|
| 310 |
-
context_texts = context
|
| 311 |
relevant_context = retrieve_relevant_context(user_input, context_texts)
|
| 312 |
else:
|
| 313 |
relevant_context = ""
|
|
@@ -389,7 +422,7 @@ iface = gr.Interface(
|
|
| 389 |
gr.HTML(label="Miro"),
|
| 390 |
gr.HTML(label="Generate Mind Map on Mapify")
|
| 391 |
],
|
| 392 |
-
title="ExosAI - NASA SMD SCDD AI Assistant [version-0.
|
| 393 |
description="ExosAI is an AI-powered assistant for generating and visualising HWO Science Cases",
|
| 394 |
)
|
| 395 |
|
|
|
|
| 30 |
# Define system message with instructions
|
| 31 |
system_message = """
|
| 32 |
You are ExosAI, a helpful assistant specializing in Exoplanet and Astrophysics research.
|
|
|
|
| 33 |
Generate a detailed structured response based on the following science context and user input, including the necessary observables, physical parameters, and technical requirements for observations. The response should include the following sections:
|
|
|
|
| 34 |
Science Objectives: Describe key scientific study objectives related to the science context and user input.
|
|
|
|
| 35 |
Physical Parameters: Outline the physical parameters related to the science context and user input.
|
|
|
|
| 36 |
Observables: Specify the observables related to the science context and user input.
|
|
|
|
| 37 |
Description of Desired Observations: Detail the types of observations related to the science context and user input.
|
|
|
|
| 38 |
Technical Requirements Table: Generate a table with the following columns:
|
| 39 |
+
- Requirements: The specific observational requirements (e.g., UV observations, Optical observations or Infrared observations).
|
| 40 |
- Necessary: The necessary values or parameters (e.g., wavelength ranges, spatial resolution).
|
| 41 |
- Desired: The desired values or parameters.
|
| 42 |
- Justification: A scientific explanation of why these requirements are important.
|
| 43 |
- Comments: Additional notes or remarks regarding each requirement.
|
|
|
|
| 44 |
Example:
|
| 45 |
| Requirements | Necessary | Desired | Justification | Comments |
|
| 46 |
|----------------------------------|------------------------------------------|------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------|
|
| 47 |
| UV Observations | Wavelength: 1200–2100 Å, 2500–3300 Å | Wavelength: 1200–3300 Å | Characterization of atomic and molecular emissions (H, C, O, S, etc.) from fluorescence and dissociative electron impact | Needed for detecting H2O, CO, CO2, and other volatile molecules relevant for volatile delivery studies. |
|
| 48 |
| Infrared Observations | Wavelength: 2.5–4.8 μm | Wavelength: 1.5–4.8 μm | Tracks water emissions and CO2 lines in icy bodies and small planetesimals | Also allows detection of 3 μm absorption feature in icy bodies. |
|
|
|
|
| 49 |
Ensure the response is structured clearly and the technical requirements table follows this format.
|
| 50 |
"""
|
| 51 |
|
|
|
|
| 54 |
outputs = bi_model(**inputs)
|
| 55 |
return outputs.last_hidden_state.mean(dim=1).detach().numpy().flatten()
|
| 56 |
|
| 57 |
+
def get_chunks(text, chunk_size=300):
|
| 58 |
+
"""
|
| 59 |
+
Split a long piece of text into smaller chunks of approximately 'chunk_size' characters.
|
| 60 |
+
"""
|
| 61 |
+
if not text.strip():
|
| 62 |
+
raise ValueError("The provided context is empty or blank.")
|
| 63 |
+
|
| 64 |
+
# Split the text into chunks of approximately 'chunk_size' characters
|
| 65 |
+
chunks = [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)]
|
| 66 |
+
return chunks
|
| 67 |
+
|
| 68 |
+
def retrieve_relevant_context(user_input, context_texts, chunk_size=300, similarity_threshold=0.3):
|
| 69 |
+
"""
|
| 70 |
+
Split the context text into smaller chunks, find the most relevant chunk
|
| 71 |
+
using cosine similarity, and return the most relevant chunk.
|
| 72 |
+
If no chunk meets the similarity threshold, return a fallback message.
|
| 73 |
+
"""
|
| 74 |
+
# Check if the context is empty or just whitespace
|
| 75 |
+
if not context_texts.strip():
|
| 76 |
+
return "Error: Context is empty or improperly formatted.", None
|
| 77 |
+
|
| 78 |
+
# Split the long context text into chunks using the chunking function
|
| 79 |
+
context_chunks = get_chunks(context_texts, chunk_size)
|
| 80 |
+
|
| 81 |
+
# Handle single context case
|
| 82 |
+
if len(context_chunks) == 1:
|
| 83 |
+
return context_chunks[0], 1.0 # Return the single chunk with perfect similarity
|
| 84 |
+
|
| 85 |
+
# Encode the user input to create a query embedding
|
| 86 |
user_embedding = encode_text(user_input).reshape(1, -1)
|
| 87 |
+
|
| 88 |
+
# Encode all context chunks to create embeddings
|
| 89 |
+
chunk_embeddings = np.array([encode_text(chunk) for chunk in context_chunks])
|
| 90 |
+
|
| 91 |
+
# Compute cosine similarity between the user input and each chunk
|
| 92 |
+
similarities = cosine_similarity(user_embedding, chunk_embeddings).flatten()
|
| 93 |
+
|
| 94 |
+
# Check if any similarity scores are above the threshold
|
| 95 |
+
if max(similarities) < similarity_threshold:
|
| 96 |
+
return "No relevant context found for the user input.", None
|
| 97 |
+
|
| 98 |
+
# Identify the most relevant chunk based on the highest cosine similarity score
|
| 99 |
most_relevant_idx = np.argmax(similarities)
|
| 100 |
+
most_relevant_chunk = context_chunks[most_relevant_idx]
|
| 101 |
+
|
| 102 |
+
# Return the most relevant chunk and the similarity score
|
| 103 |
+
return most_relevant_chunk
|
| 104 |
+
|
| 105 |
|
| 106 |
def extract_keywords_with_gpt(user_input, max_tokens=100, temperature=0.3):
|
| 107 |
# Define a prompt to ask GPT-4 to extract keywords and important terms
|
|
|
|
| 340 |
|
| 341 |
def chatbot(user_input, context="", subdomain="", use_encoder=False, max_tokens=150, temperature=0.7, top_p=0.9, frequency_penalty=0.5, presence_penalty=0.0):
|
| 342 |
if use_encoder and context:
|
| 343 |
+
context_texts = context
|
| 344 |
relevant_context = retrieve_relevant_context(user_input, context_texts)
|
| 345 |
else:
|
| 346 |
relevant_context = ""
|
|
|
|
| 422 |
gr.HTML(label="Miro"),
|
| 423 |
gr.HTML(label="Generate Mind Map on Mapify")
|
| 424 |
],
|
| 425 |
+
title="ExosAI - NASA SMD SCDD AI Assistant [version-0.9a]",
|
| 426 |
description="ExosAI is an AI-powered assistant for generating and visualising HWO Science Cases",
|
| 427 |
)
|
| 428 |
|