Spaces:
Runtime error
Runtime error
creating initial app.py
Browse files
app.py
ADDED
|
@@ -0,0 +1,232 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from sqlalchemy import create_engine
|
| 2 |
+
#from sqlalchemy.ext.declarative import declarative_base
|
| 3 |
+
from sqlalchemy.orm import sessionmaker
|
| 4 |
+
#import models
|
| 5 |
+
DB_PASSWORD= "kc{!ET7g"
|
| 6 |
+
DB_HOST ="10.230.50.66"
|
| 7 |
+
DB_PORT = "1433"
|
| 8 |
+
DB= "Reporting"
|
| 9 |
+
# Construct the connection string
|
| 10 |
+
SQL_DATABASE_URL = f'mssql+pymssql://Benedette:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB}'
|
| 11 |
+
# Create an engine instance
|
| 12 |
+
engine = create_engine(
|
| 13 |
+
SQL_DATABASE_URL, connect_args={}, echo=True
|
| 14 |
+
)
|
| 15 |
+
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
|
| 16 |
+
#linelist_factart_schema = models.t_Linelist_FACTART
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
# Base = declarative_base()
|
| 20 |
+
db = SessionLocal()
|
| 21 |
+
# linelist_factart_schema
|
| 22 |
+
|
| 23 |
+
# a wrapper around the SQLAlchemy engine to interact with a SQL database.
|
| 24 |
+
from llama_index.core import SQLDatabase
|
| 25 |
+
# sql_database = SQLDatabase(engine)
|
| 26 |
+
tables = ["Linelist_FACTART","LineListTransHTS", "LinelistPrep","LinelistHEI", "AggregateDSD","AggregateOTZEligibilityAndEnrollments","AggregateARTHistory"]
|
| 27 |
+
sql_database = SQLDatabase(engine, include_tables=tables)
|
| 28 |
+
sql_database
|
| 29 |
+
|
| 30 |
+
import os
|
| 31 |
+
os.environ["OPENAI_API_KEY"]
|
| 32 |
+
from llama_index.llms.openai import OpenAI
|
| 33 |
+
llm = OpenAI(temperature=0, model="gpt-3.5-turbo")
|
| 34 |
+
|
| 35 |
+
fact_linelist_str = (
|
| 36 |
+
"A client-level linelist that contains comprehensive data on all clients who have ever received treatment for HIV/AIDS, "
|
| 37 |
+
"encompassing various indicators and clinical parameters."
|
| 38 |
+
"Use this table to answer quetions related to active patients currently on treatment/txcurr, viral load results"
|
| 39 |
+
"Key attributes captured in this linelist include:"
|
| 40 |
+
"if input in the NUPI column is NULL then the client has no NUPI"
|
| 41 |
+
"Active patients is where ARTOutcomeDescription is Active."
|
| 42 |
+
"Clinical indicators like Last CD4 count, Last VL (Viral Load), and WHO Stage, aiding in the assessment of disease progression and treatment response."
|
| 43 |
+
"Demographic information, including Age at ART Start, Gender, Marital Status, and County/Sub-County, facilitating analysis of patient demographics."
|
| 44 |
+
"Medical history and co-morbidities, such as Diabetes and Hypertension status, providing context on underlying health conditions and associated risk factors."
|
| 45 |
+
"Facility-related data, such as Facility Name, Site Code,County, and Partner Name, enabling assessment of service delivery across different healthcare facilities and implementing partners."
|
| 46 |
+
"Pregnancy-related indicators, including Pregnant ART Start and Pregnant at Enrollment, supporting maternal and child health monitoring and intervention."
|
| 47 |
+
"clients are uniquley identified by concatenating PatientPKHash and Sitecode"
|
| 48 |
+
"LastVL is the most current VL for the client"
|
| 49 |
+
"LowViremia/suppressed is when a client viral load is less that 200 copies per ml,1= True 0 = False "
|
| 50 |
+
"HighViremia/unsuppressed is when a client viral load is more that 200 copies per ml,1= True 0 = False "
|
| 51 |
+
"HasValidVL is boolean value for if client has a valid VL"
|
| 52 |
+
"ISTxCurs indicates whether the patients are active on teatment where 1= True"
|
| 53 |
+
"Treatment Outcomes(ARTOutcomeDescription) is as at one point"
|
| 54 |
+
"This table can be used to answer queries such as:"
|
| 55 |
+
"What is the distribution of treatment outcomes among HIV/AIDS patients, such as Active, Transfer Out, and Loss to Follow-Up by county, partner,age?"
|
| 56 |
+
"What proportion of patients have achieved viral suppression, as indicated by their Last VL results by coounty?"
|
| 57 |
+
"What percentage of HIV/AIDS patients have co-morbid conditions such as diabetes or hypertension?"
|
| 58 |
+
|
| 59 |
+
)
|
| 60 |
+
|
| 61 |
+
hts_linelist_str = (
|
| 62 |
+
"A client-level linelist containing comprehensive HIV testing data for all adult clients (> 18 years) who have undergone HIV testing."
|
| 63 |
+
"This dataset captures a wide range of information including demographic details, testing outcomes, testing history, and programmatic indicators."
|
| 64 |
+
"It serves as a valuable table for analyzing HIV testing patterns, testing outcomes, and testing strategies among adult populations."
|
| 65 |
+
"Please note that this dataset is not suitable for inquiries related to patients on treatment."
|
| 66 |
+
"use this table to answer any questions related to HIV testing"
|
| 67 |
+
"Additional details available include:"
|
| 68 |
+
"- Age at Testing (AgeAtTesting): Age of the client at the time of HIV testing."
|
| 69 |
+
"- Age Group (AgeGroup): Categorization of clients into 4-year age bands from 1 to 64 years."
|
| 70 |
+
"- Agency Name (AgencyName): Name of the funding body or organization supporting the testing program."
|
| 71 |
+
"- Client Self-Tested (ClientSelfTested): Indicates whether a client has ever performed self-testing for HIV."
|
| 72 |
+
"- Client Tested As (ClientTestedAs): Categorizes clients based on whether they were tested individually or as part of a couple."
|
| 73 |
+
"- County (County) and Sub-County (SubCounty): Geographic location of the testing facility."
|
| 74 |
+
"- Couple Discordant (CoupleDiscordant): Indicates whether a couple tested together was concordant or discordant for HIV."
|
| 75 |
+
"- Date of Birth (DOB): Date of birth of the client."
|
| 76 |
+
"- Enrollment Date (EnrollmentDate): Date when the client was enrolled into the CCC."
|
| 77 |
+
"- Entry Point (EntryPoint): Service point where the HIV test was conducted (e.g., VCT, OPD)."
|
| 78 |
+
"- Ever Tested for HIV (EverTestedForHiv): Indicates whether the client has ever been tested for HIV before."
|
| 79 |
+
"- Facility Name (FacilityName) and MFL Code (MFLCode): Name and code of the testing facility."
|
| 80 |
+
"- Final Test Result (FinalTestResult): Result of the HIV test for the encounter."
|
| 81 |
+
"- Gender (Gender) and Marital Status (MaritalStatus): Demographic characteristics of the client."
|
| 82 |
+
"- Linked (Linked): Boolean value indicating whether the client was successfully linked to follow-up services."
|
| 83 |
+
"- Months Since Last Test (MonthsSinceLastTest): Number of months since the client's last HIV test."
|
| 84 |
+
"- Test Date (TestDate): Date when the client was tested for HIV."
|
| 85 |
+
"- Test Strategy (TestStrategy): Strategy employed for HIV testing (e.g., Hospital Patient, Non-Patient)."
|
| 86 |
+
"- Test Type (TestType): Type of HIV test conducted during the encounter."
|
| 87 |
+
"- Tested (Tested): Boolean value indicating whether the client was tested for HIV."
|
| 88 |
+
"- Tested Before (TestedBefore): Indicates if the client has been tested for HIV within the last 12 months."
|
| 89 |
+
"- TB Screening (tbScreening): Outcome of TB screening conducted during the encounter."
|
| 90 |
+
"Positivity rate is number of positive tests from all the test conducted in a certain period"
|
| 91 |
+
)
|
| 92 |
+
|
| 93 |
+
prep_str = (
|
| 94 |
+
"A client-level line list containing comprehensive information on all clients enrolled in Pre-Exposure Prophylaxis (PrEP) programs."
|
| 95 |
+
"Additional information available includes:"
|
| 96 |
+
"- As of Date (AsofDate): End of the reporting month for the data."
|
| 97 |
+
"- Assessment Month (AssessmentMonth) and Assessment Year (AssessmentYear): Month and year when the client was assessed for PrEP enrollment."
|
| 98 |
+
"- Eligible for PrEP (EligiblePrep): Boolean value indicating whether the client is eligible for enrollment in PrEP based on risk category."
|
| 99 |
+
"- Latest HIV Risk Category (LatestHIVRiskCategory): Last recorded risk category from the HIV testing machine learning model."
|
| 100 |
+
"- Screened for PrEP (ScreenedPrep): Boolean value indicating whether the client was assessed for enrollment into PrEP."
|
| 101 |
+
"- PatientPKHash: Hashed value representing the unique client ID in the specific facility."
|
| 102 |
+
"Use this table to answer any prep related question,i.e from high risk clients how many were enrolled in Prep"
|
| 103 |
+
)
|
| 104 |
+
|
| 105 |
+
hei_str= ("A client level linelist that contains various indicators of HIV-exposed infants"
|
| 106 |
+
"this table should be used for any HEI related questions,Iincluding whether HEI is breastfeeding,tested at different timepoints, "
|
| 107 |
+
"outcome of Hei after they exit the HEI program"
|
| 108 |
+
"Additional information available includes:"
|
| 109 |
+
"-BF12mnths Indicates whether the HEI is breastfeeding at 12 months of age as of last cwc visit"
|
| 110 |
+
"-BF18mnths Indicates whether the HEI is breastfeeding at 18 months of age as of last cwc visit"
|
| 111 |
+
"-EBF6mnths Indicates whether the HEI is using Exclusive Replacement(ERF) feeding method at 6 months of age as of last cwc visit"
|
| 112 |
+
"-HEIExitCriteria the Exit reason for an exposed infant after 24months"
|
| 113 |
+
"-InitialPCRBtwn8wks_12mnthsIndicates whether the HEI's DNAPCR1 was done at age of between 8 weeks and 48 weeks"
|
| 114 |
+
"-TestedAt12months-Indicates whether the HEI's DNAPCR2 was done at age of 12 months of age")
|
| 115 |
+
otzenroll_str=("An aggregate table that contains counts of TXCurr/number of active individuals between 10 and 19 years who are eligible for OTZ program, /enrolled in OTZ,"
|
| 116 |
+
"completed training modules and eligible for VL"
|
| 117 |
+
"Use this table for addressing any inquiries regarding OTZ and corresponding viral loads"
|
| 118 |
+
"AgeGroup: A 4-year age band from 1 to 64 years"
|
| 119 |
+
"CompletedToday_OTZ_Beyond: Has the client completed OTZ_Beyond today"
|
| 120 |
+
"CompletedToday_OTZ_Leadership: Has the client completed OTZ_Leadership today"
|
| 121 |
+
"CompletedToday_OTZ_MakingDecisions: Has the client completed OTZ_MakingDecisions today"
|
| 122 |
+
"CompletedToday_OTZ_Orientation: Has the client completed OTZ_Orientation today"
|
| 123 |
+
"CompletedToday_OTZ_Participation: Has the client completed OTZ_Participation today"
|
| 124 |
+
"CompletedToday_OTZ_SRH: Has the client completed OTZ_SRH today"
|
| 125 |
+
"CompletedToday_OTZ_Transition: Has the client completed OTZ_Transition today"
|
| 126 |
+
"CompletedToday_OTZ_TreatmentLiteracy: Has the client completed OTZ_TreatmentLiteracy today"
|
| 127 |
+
"CompletedTraining: Number of clients who have completed OTZ modules training"
|
| 128 |
+
"County: The County where the facility is located"
|
| 129 |
+
"EligibleVL: Is the client eligible for a viral load"
|
| 130 |
+
"Enrolled: Number of clients enrolled into OTZ program"
|
| 131 |
+
"FacilityName: The facility name as entered in KHMFL"
|
| 132 |
+
"FirstVL: The first ever documented viral load"
|
| 133 |
+
"Gender: Sex of the patient"
|
| 134 |
+
"HasValidVL: Does the client have a valid viral load"
|
| 135 |
+
"LastVL: This is the most current Viral load for the client -"
|
| 136 |
+
"LoadDate: Date when the dataset was ETL loaded"
|
| 137 |
+
"MFLCode: Master facility code as assigned in the KHMFL"
|
| 138 |
+
"ModulesPreviouslyCovered: Modules that the client has covered before this visit"
|
| 139 |
+
"OTZEnrollmentYearMonth: The year and the month the client was enrolled in OTZ program"
|
| 140 |
+
"PartnerName: The implementing partner mechanism"
|
| 141 |
+
"SubCounty: The Sub County where the facility is located"
|
| 142 |
+
"TransferInStatus: Did the client transfer in"
|
| 143 |
+
"ValidVLResult: The VL result that is within 12 months from the reporting period taking into account age group validity"
|
| 144 |
+
"ValidVLResultCategory: The viral load results categorizations as LDL, High-risk LLV, Low-risk LLV, and unsuppressed"
|
| 145 |
+
"patients_eligible: Number of clients eligible for enrollment into OTZ program")
|
| 146 |
+
|
| 147 |
+
aggtxcurr_str= (
|
| 148 |
+
"An aggregate dataset containing counts of active number of patients/TxCurr for each facility at each month, disaggregated by various indicators."
|
| 149 |
+
"Query this table To identify increase/decrease the total number of active patients at overtime"
|
| 150 |
+
"Number of acive patients or treatment is calculated at end of the month "
|
| 151 |
+
"AsofDateKey:the End of month reporting date (format = yyyy-mm-dd), use this date to extract number of active client as at that month "
|
| 152 |
+
"DATIMAgeGroup: The DATIM Age disaggregations"
|
| 153 |
+
#"NumofPatients The total number of active patients/TXcurr"
|
| 154 |
+
"isTxCurr: The total number of active patients/TXcurr"
|
| 155 |
+
)
|
| 156 |
+
|
| 157 |
+
from llama_index.core.objects import (
|
| 158 |
+
SQLTableNodeMapping,
|
| 159 |
+
ObjectIndex,
|
| 160 |
+
SQLTableSchema,
|
| 161 |
+
)
|
| 162 |
+
|
| 163 |
+
from llama_index.core import VectorStoreIndex
|
| 164 |
+
#store the table schema in an index
|
| 165 |
+
table_node_mapping = SQLTableNodeMapping(sql_database)
|
| 166 |
+
|
| 167 |
+
#store schema information for each table.
|
| 168 |
+
table_schema_objs = [
|
| 169 |
+
(SQLTableSchema(table_name="Linelist_FACTART", context_str=fact_linelist_str)),
|
| 170 |
+
(SQLTableSchema(table_name="LineListTransHTS", context_str=hts_linelist_str)),
|
| 171 |
+
(SQLTableSchema(table_name="LineListPrep", context_str=prep_str)),
|
| 172 |
+
(SQLTableSchema(table_name="LinelistHEI", context_str=hei_str)),
|
| 173 |
+
(SQLTableSchema(table_name="AggregateOTZEligibilityAndEnrollments", context_str=otzenroll_str)),
|
| 174 |
+
#(SQLTableSchema(table_name="AggregateDSD", context_str=dsd_str)),
|
| 175 |
+
(SQLTableSchema(table_name="AggregateARTHistory", context_str=aggtxcurr_str)),
|
| 176 |
+
]
|
| 177 |
+
|
| 178 |
+
obj_index = ObjectIndex.from_objects(
|
| 179 |
+
table_schema_objs, # A list of table schema objects
|
| 180 |
+
table_node_mapping, # An object responsible for mapping tables to nodes.
|
| 181 |
+
VectorStoreIndex, # for vector-based searching or indexing.
|
| 182 |
+
)
|
| 183 |
+
|
| 184 |
+
from llama_index.core.indices.struct_store import SQLTableRetrieverQueryEngine
|
| 185 |
+
|
| 186 |
+
query_engine = SQLTableRetrieverQueryEngine(
|
| 187 |
+
sql_database,
|
| 188 |
+
obj_index.as_retriever(similarity_top_k=2),
|
| 189 |
+
)
|
| 190 |
+
|
| 191 |
+
|
| 192 |
+
preamble = ("Given an input question, first create a syntactically correct"
|
| 193 |
+
"query to run, then look at the results of the query and return the answer"
|
| 194 |
+
"You can order the results by a relevant column to return the most"
|
| 195 |
+
"interesting examples in the database."
|
| 196 |
+
"Pay attention to use only the column names that you can see in the schema"
|
| 197 |
+
"description. Be careful to not query for columns that do not exist."
|
| 198 |
+
"Pay attention to which column is in which table. Also, qualify column names"
|
| 199 |
+
"with the table name when needed.")
|
| 200 |
+
|
| 201 |
+
prompt_intro = (" Here is the prompt: ")
|
| 202 |
+
|
| 203 |
+
|
| 204 |
+
import gradio as gr
|
| 205 |
+
def texttosql(question: str, conversation_history: list[str]):
|
| 206 |
+
|
| 207 |
+
|
| 208 |
+
context = " ".join([item["user"] + " " + item["chatbot"] for item in conversation_history])
|
| 209 |
+
response = query_engine.query(preamble +
|
| 210 |
+
"the user previously asked and received the following: " +
|
| 211 |
+
context +
|
| 212 |
+
prompt_intro +
|
| 213 |
+
question)
|
| 214 |
+
|
| 215 |
+
conversation_history.append({"user": question, "chatbot": response.response})
|
| 216 |
+
|
| 217 |
+
|
| 218 |
+
|
| 219 |
+
return response.response,response.metadata["sql_query"] ,response.metadata["result"] , conversation_history
|
| 220 |
+
|
| 221 |
+
inputs = [gr.Textbox(lines=10, label="Question"),
|
| 222 |
+
gr.State(value=[])]
|
| 223 |
+
outputs = [
|
| 224 |
+
gr.Textbox(label="Chatbot Response", type="text"),
|
| 225 |
+
gr.Textbox(label="sql_query", autoscroll = False, type="text"),
|
| 226 |
+
gr.Textbox(label="Metadata_result", autoscroll = False, type="text"),
|
| 227 |
+
# gr.Textbox(label="Source 3", max_lines = 10, autoscroll = False, type="text"),
|
| 228 |
+
gr.State()
|
| 229 |
+
]
|
| 230 |
+
|
| 231 |
+
gr.Interface(fn=texttosql, inputs=inputs, outputs=outputs, title="txttosql Chatbot",
|
| 232 |
+
description="Enter a question and see the processed outputs in collapsible boxes.").launch()
|