Hunflair / prompts.py
samaresh55's picture
Upload prompts.py
f3d245e verified
raw
history blame
23.9 kB
################################################## PROMPT ################################################
def get_prompts_emea():
system_prompt = '''An entity is a person(name), age, gender, title, named organization, city, state, country, address, zipcode, dates, site_ids, subject id, email, phone number, height, weight, Race, ethnicity, smoking habits, drinking habits, non-particiant, Social Security Number, Account Number, Medical Record Number, Medical History, Health Insurance Beneficiary Number, Certificate License Number, IP Address, Website URL, Vehicle Identifier.
All entities should strictly get extracted, but don't extract incorrect entities.
Entity present in any part of the paragraph should be detected.
Names/Person : Names of persons mentioned in the input sentence or paragraph.ile
Age: Age of a person should get detected. Some examples are 12 yo, 15 years old, 11 months old, 79 years, 51 yrs, 18-year-old, > 13 Years, <14 years
Date: All dates should get detected as Dates. Some examples are 10-1-2021, 23-01-2022, 27th June, June 29, 10-09-2007, 4/5/67, 7/67, 31-12-2000. start_date, end_date should get detected as Date itself.
Gender: Consider instances of "Male," "Female," "male," "female," "She," "He," "women," and "man" as gender references, note don't extract other than these instances
Study_day: Day 2, Day 5, day 9, day 11 of study, 3rd day of study, etc.,
Height: six feet tall, 5 feet 7 inches, 8 feet 1 inch, 170 centimeters tall, 188 centimeters, 189cm, 140.73 cm, etc., should be detected as Height.
Weight: some of the examples are 100 pounds, 50 pounds, 77.4 pounds, 66 kgs, 78.9 kilograms, 88 kilograms, etc.,should be detected as Weight.
Race: some of the examples of Race are caucasian, Asian, Black, White, African Amercian, Hispanic descent, mixed race heritage, Native American, etc., should be detected as Race.
Phone_number: persons contact number or phone number should get detected. example format of phone numbers are +91443563223, (123)456-7890, +1(123)456-7890, (555) 123-4567, +44 20 1234 5678
Email: all emails from the paragraph should be detected.
Site_id: Site ID #56, Site ID 87, siteid #22, siteid356, at Site Y789, at site 235 etc.,
Address: Extract Address from paragraph and should detect as Address.
Country: Extract Country names from paragraph.
Non_participant: non-participant, observer, spectator etc., extract these type of entities
Smoking_habit: never smoked, smoker, non-smoker etc., extract these type of entities
Drinking_habit: never drinked, drinker, drinking, non-drinker etc., extract these type of entities
BMI: Body Mass Index value should get detected.
Social Security Number: Social Security Number should get detected from paragraph.
Medical Record Number: All Medical Record Numbers should get detected from paragraph.
If there are multiple values for same entity they should be extracted with | separation.
Random text should not get detected as any of above entities.
All entities from above should strictly be extracted.
URLs shouldn't get detected as Location or Address.
'''
example1 = '''Essentially , Mr. Cornea is a 60 year old male who noted the onset of dark urine during early January .
Answer:
Name: Mr. Cornea
Age: 60 year old
Gender: male
Date: early January
End-Answer
'''
example2 = '''In the 205 subjects with post-ofatumumab HAHA results (G2 MSD ECL assay; Section 3.5), one subject (Subject 257) tested positive for HAHA on 10-1-2021, 23-01-2022, and 185 subjects had all negative post-ofatumumab HAHA results with at least one ofatumumab plasma concentration low enough (<200 µg/mL) for the negative HAHA results to be considered conclusive. Subject 257 tested positive for HAHA at the six-month follow-up visit (titer = 16); samples at all other time points were negative.
2 patients were released from the study due to inactivity.
Answer:
Subject_id: 257 | 257
Date: 10-1-2021 | 23-01-2022
Numper_of_patients: 2 | 205 | 185
End-Answer
'''
example3 = '''Subject 169: 54 years, Female, Pneumocystis jirovecii pneumonia, 61 days after last dose, resolved. The subject(CS3A Subject 1833-2303) was hospitalized for severe bronchopneumonia. Bronchoalveolar lavage and polymerase chain reaction (PCR) for Pneumocystis were positive for Pneumocystis jiroveci. She was hospitalized on 27th June and discharged on June 29. Two patients were on the study from 10-09-2007.
Answer:
Subject_id: 169 | 1833-2303
Age: 54 years
Gender: Female | She
Race: White
Date: 27th June | June 29 | 10-09-2007
Medical_history: Pneumocystis jirovecii pneumonia
End-Answer
'''
example4 = '''Saw Dr Oakley 4/5/67 - he was happy with results of ETT at Clarkfield. To f/u 7/67. No CP's since last admit. On Day 521, the patient was discharged.
Another patient died of cardio-respiratory arrest on Day 8.
Answer:
Name: Dr Oakley
Date: 4/5/67 | 7/67
Gender: he
Location: Clarkfield
Study_day: Day 521 | Day 8
End-Answer
'''
example5 = '''Patient 12367/2134 Oseltamivir 100 mg IV q12h for 5 days S295H/Y Patient 12367/2144 was a 57 year old male enrolled on 31-12-2000, on Day 5(03-Jan-2021) the patient was admitted.
Answer:
Patient_id: 12367/2134 | 12367/2144
Gender: male
Date: 31-12-2000 | 03-Jan-2021
Study_day: Day 5
Age: 57 year old
End-Answer'''
example6 = '''Final Clinical Study Report - NV25118: A Randomized, Multicenter, Single Blinded, Parallel Study of the Safety of 100 mg and 200 mg Oseltamivir Administered Intravenously for the Treatment of Influenza in Patients Aged > 13 Years. Report No. 1037027. June 3, 2013
Answer:
Study_id: NV25118
Age: 13 Years
Date: June 3, 2013
End-Answer'''
# example6 = '''918 Abdominal pain 2013-02-02 (8) Y Y 918-07 Abdominal pain Gastrointestinal disorders 2013-02-03 (9) Y Grade II Medical / other treatment Not suspected Suspected Recovered/resolved.
# The events resolved afterwards on Day 4 (pyrexia, vomiting) (02-Feb-2013) and Day 21 (asthenia) (19-Feb-2013).'
# Answer:
# Site_id: 918
# Subject_id: 918-07
# Study_day: Day 4 | Day 21
# Date: 2013-02-02 | 2013-02-03 | 02-Feb-2013 | 19-Feb-2013
# Medical_history: Abdominal pain | Gastrointestinal disorders
# End-Answer'''
example7 = '''The medical history included throat cancer beginning in 2011 and ending in 2011, a broken hip beginning in 2011 and ending in 2011, insertion of two stents in 2012, coronary artery bypass surgery on 25-Jun-2014, a right cerebrovascular accident on 25-Jun-2014, aortic valve replacement on 25-Jun-2014, bilateral cataracts on 15-Aug-2014.
Answer:
Medical_history: throat cancer | broken hip | insertion of two stents | coronary artery bypass surgery | right cerebrovascular accident | aortic valve replacement | bilateral cataracts.
Date: 25-Jun-2014 | 25-Jun-2014 | 25-Jun-2014 | 15-Aug-2014
End-Answer'''
example8 = '''1083\nPatient [AUS02T-0215-787043] - Death\n(disseminated intravascular coagulation),\nSAE (haemorrhage intracranial,\ndisseminated intravascular coagulation) .
Answer:
Subject_id: [AUS02T-0215-787043]
End-Answer'''
example9 = '''The accident involved Vehicle ID: ABC123 and occurred on August 15, 2023.
Answer:
Vehicle_Identifier: ABC123
Date: August 15, 2023
End-Answer
'''
example10 = '''Sarah underwent an ECHO and endoscopy at Ingree and Ot of Weamanshy Medical Center(www.wmchospital.com) on April 28 . In one patient the SAE had a fatal\noutcome.
Answer:
Name: Sarah
Location: Weamanshy Medical Center
URL: www.wmchospital.com
Date: April 28
End-Answer'''
example11 = '''The patient had never smoked and was a former drinker. The subject was accompanied by his mother. He then completed study treatment with a total of 19 doses of IV oseltamivir (10 doses standard treatment plus 9 doses of treatment extension).
Answer:
Smoking_habit: never smoked
Drinking_habit: former drinker
Gender: his | He
Non_participant: mother
End-Answer'''
example12 = '''Patient ois/51317-31580/10653 was a 54-year-old Hispanic male. he doesn't have anything to inform. He's good at writing, participated in a medical research study at Site Y789.
Answer:
Subject_id: ois/51317-31580/10653
Age: 54-year-old
Race: Hispanic
Gender: male | he | He's
Site_id: Y789
End-Answer'''
example13 = '''This Task Order CPDR001F2301 (“Task Order”) shall be binding upon the undersigned upon its execution by the duly authorized representative(s) of Novartis. ', 'It is subject to the terms of that certain General Services Agreement (GSA) between Novartis Pharmaceuticals Corporation, with an office at 59 Route 10, East Hanover, NJ 07936 and Statistics Collaborative Inc. (“SCI” or “Organization”), with an office at 1625 Massachusetts Avenue NW, Suite 600, Washington, DC 20036 dated 16 June 2016 (“Agreement”).
Answer:
Address: 59 Route 10, East Hanover, NJ 07936 | 1625 Massachusetts Avenue NW, Suite 600, Washington, DC 20036
Date: 16 June 2016
End-Answer'''
example14 = '''ATC classes are presented alphabetically; preferred terms are sorted within ATC class alphabetically. - A medication can appear with more than one ATC class. Program: CIGG013A/CIGG013A1101J/report/pgm_saf/t_cmd03.sas, 18:33 14OCT2014 Final. https://www.google.com/
Study_dates: 18 Nov 2009-28 Jun 2010
Answer:
Program: CIGG013A/CIGG013A1101J/report/pgm_saf/t_cmd03.sas
Date: 14OCT2014 | 18 Nov 2009 | 28 Jun 2010
URL: https://www.google.com/
End-Answer
'''
example15 = '''This Task Order CPDRB0081E5 (“Task Order”) shall be binding upon the undersigned upon its execution by the duly authorized representative(s) of Novartis. It is subject to the terms of that certain General Services Agreement (GSA) between Novartis Pharmaceuticals Corporation, with an office at 59 Route 10, East Hanover, NJ 07936 and Acme Inc. (“Organization”), with an office at 123 Ben Avenue NW, Suite 600, Washington, DC 20001 dated 16 June 2016 (“Agreement”).
Answer:
Address: 59 Route 10, East Hanover, NJ 07936 | 123 Ben Avenue NW, Suite 600, Washington, DC 20001
Date: 16 June 2016
End-Answer
'''
example16 ='''In a pre-clinical study day, Subject ID #235 with Medical Record Number 783-ABD has a BMI of 27.5. His name is John Doe, aged 35, residing at 123 Main St. He's a non-participant, non-smoker, and non-drinker. His medical history includes allergies. He's 6 feet tall, male, Caucasian, and his health insurance beneficiary number is 4567890. He doesn't have a social security number but has an account number 123456789. He drives a Toyota with license plate XYZ-123. contact him at john.doe@exam.com
Answer:
Subject_id: 235
Medical_Record_Number: 783-ABD
BMI: 27.5
Name: John Doe
Age: 35
Address: 123 Main St.
Non_participant: non-participant
Smoking_habit: non-smoker
Drinking_habit: non-drinker
Medical_history: allergies
Height: 6 feet
Gender: male
Race: Caucasian
Health_Insurance_Beneficiary_Number: 4567890
Account_Number: 123456789
Vehicle_Identifier: Toyota | XYZ-123
Email: john.doe@exam.com
End-Answer
'''
example17 = '''He is a non-participant, non-smoker, and non-drinker. Anyone can reach at (555) 123-4567
Answer:
Gender: He
Non_participant: non-participant
Smoking_habit: non-smoker
Drinking_habit: non-drinker
Phone_number:(555) 123-4567
End-Answer
'''
example18 = '''Dr. Sarah Miller has a valid medical license (Certificate License Number: MD12345) and practices at 789 Oak Avenue. her medical history includes allergies.
Answer:
Name: Dr. Sarah Miller
Certificate_License_Number: MD12345
Adddress: 789 Oak Avenue
Gender: her
Medical_history: allergies
End-Answer
'''
example19 = '''During a pre-clinical study day at Site ID 56, Subject ID 1234, a 40-year-old Caucasian female named Amritha, her bmi with 2.65.
Answer:
Site_id: 56
Subject_id: 1234
Age: 40-year-old
Race: Caucasian
Gender: female | her
Name: Amritha
BMI: 2.65
End-Answer
'''
example20 = '''John, a non-smoker and an occasional drinker, He importance of maintaining a healthy lifestyle. Sarah, his colleague, shares his commitment to wellness. Despite societal pressures, they prioritize their health by avoiding tobacco and moderating alcohol consumption.
Answer:
Name: John | Sarah
Drinking_habit: occasional drinker | alcohol consumption
Gender: He | his | his
Smoking_habit: non-smoker | tobacco
End-Answer
'''
example21 = '''Study Dates: First patient screened: Jan 15, 2010 Last patient visit: Sept 14, 2012
Answer:
Date: Jan 15, 2010 | Sept 14, 2012
End-Answer
'''
example22 = '''Father: Hypertension, deceased at 70 (myocardial infarction) Mother: Type 2 Diabetes Mellitus, alive, age 72
Answer:
Medical_history: Hypertension | Diabetes Mellitus
Age: 70 | 72
End-Answer
'''
whole_task = '''Given the paragraph below identify list of possible entities.
Paragraph:'''
example_list = [example1,example17,example12,example11,example13,example14, example2, example9, example3, example4, example5, example6, example15, example7, example8, example9, example10,example16, example18,example19, example20, example21, example22 ]
final_prompt = ''
for ex in example_list:
final_prompt = final_prompt+''+whole_task+'\n'
final_prompt = final_prompt+''+ex+'\n'
final_prompt = system_prompt+'\n'+final_prompt
return final_prompt, whole_task
def get_prompts_emea2():
# system_prompt = # General entities
# {"Name": "NAME", "Age": "AGE", "Gender": "GENDER", "Date": "DATE", "Location": "ADDRESS", "Country": "COUNTRY"}
system_prompt2 = '''An entity is a Organization_name, Dosage Values, Drug names, Bank Account Numbers, Bank Swift Codes, Cost Values.
All entities should strictly get extracted, but don't extract incorrect entities.
Entity present in any part of the paragraph or sentence should be detected.
Dosages: Medical Dosage values should get detected. Some examples are 500 mg, 100 mg, 1000mg, 10ml, 250mg, 20mcg, 15μg, 19μg, 21 μg, 80 μg, 800 mcg, etc.,
Drug_names : Drug names should get detected. Some examples are Adelphane-Esidrex, Afinitor, Amturnide, Anafranil, Arcapta Neohaler, Brinaldix, Clozaril, Co-Diovan, EXV8111, Fabhalta, FUB5231, JDQ443, KAE609, Kisqali, KLU1563
Organization_names: Extract Organization names from given paragraph or sentence.
Account_Number: Bank Account Numbers should get detected.
Swift_code: Bank SWIFTcodes should get detected.
Costs: Only Costs Numerical monetory values like Euro(€),rupee(₹),doller($) should get detected as costs. Some examples are €10, €1200, €10,000, $100, $70,000, ₹25
If there are multiple values for same entity they should be extracted with | separation.
Random text should not get detected as any of above entities.
All entities from above should strictly be extracted
'''
# Countries like China, US, France, India should always get detected as country. Countries like China, US, France, India, South Korea should always get detected as Country. A country can't be a Subject_id.
example1 ='''The international conference registration fee is €300 for European attendees, ₹25,000 for participants from India, and $350 for delegates from other countries. Please note that all fees are exclusive of any applicable taxes or transaction fees. Payment can be made in the respective currencies via bank transfer or credit card. For currency conversion rates, please consult your financial institution or use a reliable online currency converter. Early bird discounts are available for registrations completed before the deadline. We look forward to welcoming you to the conference!
Answer:
Costs: €300 | ₹25,000 | $350
End-Answer'''
example2 = '''The project budget for the construction of the new pharmaceutical manufacturing facility has been estimated at €10 million, with additional expenses projected in Indian rupees (INR) amounting to ₹50 crore. This budget includes the procurement of state-of-the-art equipment and machinery sourced from various suppliers worldwide, with an estimated cost of $12.5 million in US dollars. The allocation of funds across different currencies reflects the global nature of the project, ensuring that costs are managed efficiently to meet quality standards and regulatory requirements.
Answer:
Costs: €10 | ₹50 | $12.5
End-Answer'''
example3 = '''She budgeted approximately $150 per night for accommodations, €50 for meals, and ₹1,500 for daily miscellaneous expenses. This comprehensive budget allowed her to comfortably experience the beauty of European countries while managing her day-to-day spending.
Answer:
Costs: $150 | €50 | ₹1,500
End-Answer'''
example4 = '''Bank Name: ABC Bank Branch: XYZ Branch Account Holder: [Your Name] Account Number: 123-456-789 SWIFT Code: XYZABCD1234 IBAN: [Your IBAN Number] Routing Number: 987654321
Answer:
Account_number: 123-456-789
Swift_code: XYZABCD1234
End-Answer'''
example5 = '''Bank Name: DEF Bank Branch: PQR Branch Account Holder: [Your Name] Account Number: 9876543210 SWIFT Code: DEFPQR12345 IBAN: [Your IBAN Number] Routing Number: 123456789
Answer:
Account_number: 9876543210
Swift_code: DEFPQR12345
End-Answer'''
example7 = '''Prescription Label: "Take 1 tablet by mouth daily with food." "Apply a thin layer of cream to the affected area twice daily." "Administer 10 mg/mL orally every 4 hours as needed for pain."Clinical Trial Protocol: "Participants will receive 100 mg of Drug A orally once daily for 12 weeks." "Patients will be administered 50 mg/kg of Drug B intravenously every 2 weeks for 6 cycles." "Dosage escalation will occur in 25 mg increments every week until a maximum tolerated dose is reached."
Answer:
Dosages: 1 tablet | 10 mg/ml | 100 mg | 50 mg/kg | 25 mg
End-Answer'''
example6 = '''Final Clinical Study Report - NV25118: A Randomized, Multicenter, Single Blinded, Parallel Study of the Safety of 100 mg and 200 mg Oseltamivir Administered Intravenously for the Treatment of Influenza in Patients Aged > 13 Years. Report No. 1037027. June 3, 2013
Answer:
Dosages: 100 mg | 200 mg
Drug_names: Oseltamivir
End-Answer'''
example8 = '''Medication Package Insert:"Recommended dosage for adults: 500μg to 1000μg orally every 6 hours, as needed." "Pediatric dosage: 10 mg/kg orally every 8 hours for children aged 2 to 12 years." "For elderly patients (>65 years), initiate therapy at 25% of the recommended adult dosage." Hospital Discharge Instructions: "Continue taking 75 μg of Medication C orally twice daily for 10 days.""Start with 250 mg of Medication D intravenously every 6 hours, then titrate to effect." "Resume home medications: 20 mg of Medication E orally once daily at bedtime."
Answer:
Dosages: 500μg | 1000μg | 10 mg/kg | 75 μg | 250 mg | 20 mg
End-Answer'''
example9 = '''Acme Corporation, represented herein as Party A, hereby enters into a distribution agreement with Smith & Sons Enterprises, hereinafter referred to as Party B. Under the terms of this agreement, Party A agrees to supply Party B with pharmaceutical products manufactured at Novartis Pharma AG facility located in Switzerland. Party B, in turn, agrees to distribute and promote these products within the designated territories. This agreement also encompasses cooperation between Novartis Pharma Schweiz AG, a subsidiary of Novartis, and Party B for marketing activities in the Indian market. Both parties acknowledge and agree to abide by the terms and conditions outlined herein, including confidentiality provisions and dispute resolution mechanisms.
Answer:
Organization_names: Acme Corporation | Smith & Sons Enterprises| Novartis Pharma AG | Novartis Pharma Schweiz AG
End-Answer'''
example10 = '''This Research Collaboration Agreement ("Agreement") is entered into on [Date] by and between Novartis Institutes for BioMedical Research, Inc. ("Novartis"), a research organization duly organized and existing under the laws of [Country], having its principal place of business at [Address], and MedTech Innovations LLC ("MedTech"), a technology company duly organized and existing under the laws of [Country], having its principal place of business at [Address].
Answer:
Organization_names: Novartis Institutes for BioMedical Research, Inc. | MedTech Innovations LLC.
End-Answer'''
example11 = '''This Task Order CPDR001F2301 (“Task Order”) shall be binding upon the undersigned upon its execution by the duly authorized representative(s) of Novartis. ', 'It is subject to the terms of that certain General Services Agreement (GSA) between Novartis Pharmaceuticals Corporation, with an office at 59 Route 10, East Hanover, NJ 07936 and Statistics Collaborative Inc. (“SCI” or “Organization”), with an office at 1625 Massachusetts Avenue NW, Suite 600, Washington, DC 20036 dated 16 June 2016 (“Agreement”).\n
Answer:
Organization_names: Novartis Pharmaceuticals Corporation | Statistics Collaborative Inc.
End-Answer'''
example12 = '''Afinitor, Arcapta Neohaler, and Clozaril are among the pharmaceuticals frequently prescribed for a variety of medical conditions.
Answer:
Drug_names: Afinitor | Arcapta Neohaler | Clozaril
End-Answer'''
example13 = '''Femara functions as a protease-activated receptor-1 (PAR-1) antagonist, effectively inhibiting platelet activation and reducing the risk of thrombotic events in patients with a history of myocardial infarction or peripheral arterial disease. With its unique mechanism of action and demonstrated efficacy in clinical trials, this drug offers new hope for patients seeking to mitigate the devastating consequences of atherosclerosis and other cardiovascular conditions.
Answer:
Drug_names: Femara
End-Answer
'''
example14 = '''Coartem, Kisqali, and Leqvio are among the pharmaceuticals utilized in the treatment and management of diverse medical conditions. specific dosage regimen based on the patient's weight and age. 600 mg daily for 21 days, cutaneous injection at a dosage of 300 mg every six months after an initial loading dose.
Answer:
Drug_names: Coartem | Kisqali | Leqvio
Dosages: 600 mg | 300 mg
End-Answer
'''
example15 = '''This Task Order CPDRB0081E5 (“Task Order”) shall be binding upon the undersigned upon its execution by the duly authorized representative(s) of Novartis. It is subject to the terms of that certain General Services Agreement (GSA) between Novartis Pharmaceuticals Corporation, with an office at 59 Route 10, East Hanover, NJ 07936 and Acme Inc. (“Organization”), with an office at 123 Ben Avenue NW, Suite 600, Washington, DC 20001 dated 16 June 2016 (“Agreement”).
Answer:
Orgnaization_names: Novartis Pharmaceuticals Corporation | Acme Inc
End-Answer
'''
example16 = ''''Patient 25291/3280 Oseltamivir 100 mg IV q12h for 5 days H275H/Y Patient 25291/3280 was a 57 year old male enrolled on 16-09-2021 , 2 days after onset of influenza symptoms in the hospital. ', "The patient's medical history included congestive cardiac failure, cardiac murmur, aortic stenosis, aspiration pneumonia and acute respiratory failure.
Drug_names: Oseltamivir
Dosages: 100 mg
End-Answer
'''
whole_task = '''Given the paragraph below identify list of entities that are mentioned in list [Person/Name, Dosages,].
Paragraph:'''
example_list = [example1, example5, example3,example9, example4, example6, example13, example2, example7, example8, example10, example11, example12,example15,example14,example16]
final_prompt = ''
for ex in example_list:
final_prompt = final_prompt+''+whole_task+'\n'
final_prompt = final_prompt+''+ex+'\n'
final_prompt = system_prompt2+'\n'+final_prompt
return final_prompt,whole_task