Update app.py
Browse files
app.py
CHANGED
|
@@ -12,14 +12,14 @@ from docx.oxml.ns import nsdecls
|
|
| 12 |
from docx.oxml import parse_xml
|
| 13 |
import io
|
| 14 |
import tempfile
|
| 15 |
-
#
|
| 16 |
-
import pyvo as vo
|
| 17 |
import pandas as pd
|
| 18 |
from pinecone import Pinecone
|
| 19 |
import logging
|
| 20 |
import re
|
| 21 |
|
| 22 |
from utils.ads_references import extract_keywords_with_gpt, fetch_nasa_ads_references
|
|
|
|
| 23 |
|
| 24 |
|
| 25 |
from langchain_openai import ChatOpenAI
|
|
@@ -41,9 +41,6 @@ bi_model = AutoModel.from_pretrained(bi_encoder_model_name)
|
|
| 41 |
api_key = os.getenv('OPENAI_API_KEY')
|
| 42 |
client = OpenAI(api_key=api_key)
|
| 43 |
|
| 44 |
-
# Set up NASA ADS token
|
| 45 |
-
#ADS.TOKEN = os.getenv('ADS_API_KEY') # Ensure your ADS API key is stored in environment variables
|
| 46 |
-
|
| 47 |
# Pinecone setup
|
| 48 |
pinecone_api_key = os.getenv('PINECONE_API_KEY')
|
| 49 |
pc = Pinecone(api_key=pinecone_api_key)
|
|
@@ -132,24 +129,6 @@ def clean_retrieved_context(raw_context):
|
|
| 132 |
# Return explicitly cleaned context
|
| 133 |
return cleaned.strip()
|
| 134 |
|
| 135 |
-
def fetch_exoplanet_data():
|
| 136 |
-
# Connect to NASA Exoplanet Archive TAP Service
|
| 137 |
-
tap_service = vo.dal.TAPService("https://exoplanetarchive.ipac.caltech.edu/TAP")
|
| 138 |
-
|
| 139 |
-
# Query to fetch all columns from the pscomppars table
|
| 140 |
-
ex_query = """
|
| 141 |
-
SELECT TOP 10 pl_name, hostname, sy_snum, sy_pnum, discoverymethod, disc_year, disc_facility, pl_controv_flag, pl_orbper, pl_orbsmax, pl_rade, pl_bmasse, pl_orbeccen, pl_eqt, st_spectype, st_teff, st_rad, st_mass, ra, dec, sy_vmag
|
| 142 |
-
FROM pscomppars
|
| 143 |
-
"""
|
| 144 |
-
# Execute the query
|
| 145 |
-
qresult = tap_service.search(ex_query)
|
| 146 |
-
|
| 147 |
-
# Convert to a Pandas DataFrame
|
| 148 |
-
ptable = qresult.to_table()
|
| 149 |
-
exoplanet_data = ptable.to_pandas()
|
| 150 |
-
|
| 151 |
-
return exoplanet_data
|
| 152 |
-
|
| 153 |
def generate_response(user_input, science_objectives="", relevant_context="", references=[], max_tokens=150, temperature=0.7, top_p=0.9, frequency_penalty=0.5, presence_penalty=0.0):
|
| 154 |
# Case 1: Both relevant context and science objectives are provided
|
| 155 |
if relevant_context and science_objectives.strip():
|
|
@@ -195,37 +174,6 @@ def generate_response(user_input, science_objectives="", relevant_context="", re
|
|
| 195 |
# Return two clearly separated responses
|
| 196 |
return full_response, response_only
|
| 197 |
|
| 198 |
-
def generate_data_insights(user_input, exoplanet_data, max_tokens=500, temperature=0.3):
|
| 199 |
-
"""
|
| 200 |
-
Generate insights by passing the user's input along with the exoplanet data to GPT-4.
|
| 201 |
-
"""
|
| 202 |
-
# Convert the dataframe to a readable format for GPT (e.g., CSV-style text)
|
| 203 |
-
data_as_text = exoplanet_data.to_csv(index=False) # CSV-style for better readability
|
| 204 |
-
|
| 205 |
-
# Create a prompt with the user query and the data sample
|
| 206 |
-
insights_prompt = (
|
| 207 |
-
f"Analyze the following user query and provide relevant insights based on the provided exoplanet data.\n\n"
|
| 208 |
-
f"User Query: {user_input}\n\n"
|
| 209 |
-
f"Exoplanet Data:\n{data_as_text}\n\n"
|
| 210 |
-
f"Please provide insights that are relevant to the user's query."
|
| 211 |
-
)
|
| 212 |
-
|
| 213 |
-
# Call GPT-4 to generate insights based on the data and user input
|
| 214 |
-
response = client.chat.completions.create(
|
| 215 |
-
model="gpt-4",
|
| 216 |
-
messages=[
|
| 217 |
-
{"role": "system", "content": "You are an expert in analyzing astronomical data and generating insights."},
|
| 218 |
-
{"role": "user", "content": insights_prompt}
|
| 219 |
-
],
|
| 220 |
-
max_tokens=max_tokens,
|
| 221 |
-
temperature=temperature
|
| 222 |
-
)
|
| 223 |
-
|
| 224 |
-
# Extract and return GPT-4's insights
|
| 225 |
-
data_insights = response.choices[0].message.content.strip()
|
| 226 |
-
return data_insights
|
| 227 |
-
|
| 228 |
-
|
| 229 |
def export_to_word(response_content, subdomain_definition, science_goal, context, max_tokens, temperature, top_p, frequency_penalty, presence_penalty):
|
| 230 |
doc = Document()
|
| 231 |
|
|
@@ -430,7 +378,7 @@ def chatbot(user_input, science_objectives="", context="", subdomain="", max_tok
|
|
| 430 |
|
| 431 |
# Fetch exoplanet data and generate insights
|
| 432 |
exoplanet_data = fetch_exoplanet_data()
|
| 433 |
-
|
| 434 |
|
| 435 |
# Extract GPT-generated table into DataFrame
|
| 436 |
extracted_table_df = gpt_response_to_dataframe(full_response)
|
|
|
|
| 12 |
from docx.oxml import parse_xml
|
| 13 |
import io
|
| 14 |
import tempfile
|
| 15 |
+
#import pyvo as vo
|
|
|
|
| 16 |
import pandas as pd
|
| 17 |
from pinecone import Pinecone
|
| 18 |
import logging
|
| 19 |
import re
|
| 20 |
|
| 21 |
from utils.ads_references import extract_keywords_with_gpt, fetch_nasa_ads_references
|
| 22 |
+
from utils.data_insights import fetch_exoplanet_data, generate_data_insights
|
| 23 |
|
| 24 |
|
| 25 |
from langchain_openai import ChatOpenAI
|
|
|
|
| 41 |
api_key = os.getenv('OPENAI_API_KEY')
|
| 42 |
client = OpenAI(api_key=api_key)
|
| 43 |
|
|
|
|
|
|
|
|
|
|
| 44 |
# Pinecone setup
|
| 45 |
pinecone_api_key = os.getenv('PINECONE_API_KEY')
|
| 46 |
pc = Pinecone(api_key=pinecone_api_key)
|
|
|
|
| 129 |
# Return explicitly cleaned context
|
| 130 |
return cleaned.strip()
|
| 131 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 132 |
def generate_response(user_input, science_objectives="", relevant_context="", references=[], max_tokens=150, temperature=0.7, top_p=0.9, frequency_penalty=0.5, presence_penalty=0.0):
|
| 133 |
# Case 1: Both relevant context and science objectives are provided
|
| 134 |
if relevant_context and science_objectives.strip():
|
|
|
|
| 174 |
# Return two clearly separated responses
|
| 175 |
return full_response, response_only
|
| 176 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 177 |
def export_to_word(response_content, subdomain_definition, science_goal, context, max_tokens, temperature, top_p, frequency_penalty, presence_penalty):
|
| 178 |
doc = Document()
|
| 179 |
|
|
|
|
| 378 |
|
| 379 |
# Fetch exoplanet data and generate insights
|
| 380 |
exoplanet_data = fetch_exoplanet_data()
|
| 381 |
+
data_insights_uq = generate_data_insights(user_input, client, exoplanet_data)
|
| 382 |
|
| 383 |
# Extract GPT-generated table into DataFrame
|
| 384 |
extracted_table_df = gpt_response_to_dataframe(full_response)
|