Spaces:

Muhammadidrees
/

JAYConverstionalAI

Paused

App Files Files Community

Muhammadidrees commited on Oct 13, 2025

Commit

373f237

verified ·

1 Parent(s): 7ba8f15

Upload 15 files

Browse files

Files changed (16) hide show

.gitattributes +1 -0
Coreectcodewithoutfronted.py +141 -0
LICENSE +201 -0
README.md +182 -12
alpaca_data.json +3 -0
chat.py +100 -0
chatdoctor5k.json +0 -0
format_dataset.csv +0 -0
frontend.py +313 -0
frontend_VOic.py +459 -0
requirements.txt +10 -0
teak.py +103 -0
test.py +328 -0
train.py +231 -0
train_lora.py +321 -0
utils.py +174 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+alpaca_data.json filter=lfs diff=lfs merge=lfs -text

Coreectcodewithoutfronted.py ADDED Viewed

	@@ -0,0 +1,141 @@

+import os
+import gc
+import torch
+from transformers import LlamaTokenizer, LlamaForCausalLM, StoppingCriteria, StoppingCriteriaList
+# =============================
+# Configuration
+# =============================
+MODEL_PATH = r"C:\Users\JAY\Downloads\Chatdoc\ChatDoctor\pretrained"
+MAX_NEW_TOKENS = 200
+TEMPERATURE = 0.5
+TOP_K = 50
+REPETITION_PENALTY = 1.1
+# Detect device
+device = "cuda" if torch.cuda.is_available() else "cpu"
+print(f"Loading model from {MODEL_PATH} on {device}...")
+# =============================
+# Load Tokenizer and Model
+# =============================
+tokenizer = LlamaTokenizer.from_pretrained(MODEL_PATH)
+model = LlamaForCausalLM.from_pretrained(
+    MODEL_PATH,
+    device_map="auto",
+    torch_dtype=torch.float16,
+    low_cpu_mem_usage=True
+)
+generator = model.generate
+print("✅ ChatDoctor model loaded successfully!\n")
+# =============================
+# Stopping Criteria
+# =============================
+class StopOnTokens(StoppingCriteria):
+    def __init__(self, stop_ids):
+        self.stop_ids = stop_ids
+    def __call__(self, input_ids, scores, **kwargs):
+        for stop_id_seq in self.stop_ids:
+            if len(stop_id_seq) == 1:
+                if input_ids[0][-1] == stop_id_seq[0]:
+                    return True
+            else:
+                if len(input_ids[0]) >= len(stop_id_seq):
+                    if input_ids[0][-len(stop_id_seq):].tolist() == stop_id_seq:
+                        return True
+        return False
+# =============================
+# Chat History
+# =============================
+history = ["ChatDoctor: I am ChatDoctor, your AI medical assistant. How can I help you today?"]
+# =============================
+# Get Response Function
+# =============================
+def get_response(user_input):
+    global history
+    human_invitation = "Patient: "
+    doctor_invitation = "ChatDoctor: "
+    # Add user input to history
+    history.append(human_invitation + user_input)
+    # Build conversation prompt
+    prompt = "\n".join(history) + "\n" + doctor_invitation
+    input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
+    # Define stop words and their token IDs
+    stop_words = ["Patient:", "\nPatient:", "Patient :", "\n\nPatient"]
+    stop_ids = [tokenizer.encode(word, add_special_tokens=False) for word in stop_words]
+    stopping_criteria = StoppingCriteriaList([StopOnTokens(stop_ids)])
+    # Generate model response
+    with torch.no_grad():
+        output_ids = generator(
+            input_ids,
+            max_new_tokens=MAX_NEW_TOKENS,
+            do_sample=True,
+            temperature=TEMPERATURE,
+            top_k=TOP_K,
+            repetition_penalty=REPETITION_PENALTY,
+            stopping_criteria=stopping_criteria,
+            pad_token_id=tokenizer.eos_token_id,
+            eos_token_id=tokenizer.eos_token_id
+        )
+    # Decode and clean response
+    full_output = tokenizer.decode(output_ids[0], skip_special_tokens=True)
+    response = full_output[len(prompt):].strip()
+    # Remove any "Patient:" that might have slipped through
+    for stop_word in ["Patient:", "Patient :", "\nPatient:", "\nPatient", "Patient"]:
+        if stop_word in response:
+            response = response.split(stop_word)[0].strip()
+            break
+    # Remove any leading/trailing punctuation artifacts
+    response = response.strip()
+    history.append(doctor_invitation + response)
+    # Free memory
+    del input_ids, output_ids
+    gc.collect()
+    torch.cuda.empty_cache()
+    return response
+# =============================
+# Chat Loop
+# =============================
+if __name__ == "__main__":
+    print("\n=== ChatDoctor is ready! ===")
+    print("You (the human) = Patient ")
+    print("AI = ChatDoctor")
+    print("Type 'exit' or 'quit' to end the chat.\n")
+    print("ChatDoctor: Hi there! How can I help you today?\n")
+    while True:
+        try:
+            user_input = input("Patient: ").strip()
+            if user_input.lower() in ["exit", "quit"]:
+                print("ChatDoctor: Take care! Goodbye ")
+                break
+            if not user_input:
+                continue
+            response = get_response(user_input)
+            print("ChatDoctor:", response, "\n")
+        except KeyboardInterrupt:
+            print("\nChatDoctor: Take care! Goodbye")
+            break
+        except Exception as e:
+            print(f"Error: {e}")
+            print("Please try again.\n")

LICENSE ADDED Viewed

	@@ -0,0 +1,201 @@

+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright [yyyy] [name of copyright owner]
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

README.md CHANGED Viewed

@@ -1,12 +1,182 @@
----
-title: JAYConverstionalAI
-emoji: 🏃
-colorFrom: green
-colorTo: blue
-sdk: gradio
-sdk_version: 5.49.1
-app_file: app.py
-pinned: false
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+<p align="center" width="80%">
+<img src="fig/logo.png" style="width: 40%; min-width: 300px; display: block; margin: auto;">
+</p>
+# [ChatDoctor: A Medical Chat Model Fine-Tuned on a Large Language Model Meta-AI (LLaMA) Using Medical Domain Knowledge](https://www.cureus.com/articles/152858-chatdoctor-a-medical-chat-model-fine-tuned-on-a-large-language-model-meta-ai-llama-using-medical-domain-knowledge#!/)
+Yunxiang Li<sup>1</sup>, Zihan Li<sup>2</sup>, Kai Zhang<sup>3</sup>, Ruilong Dan<sup>4</sup>, Steve Jiang<sup>1</sup>, You Zhang<sup>1</sup>
+<h5>1 UT Southwestern Medical Center, USA</h5>
+<h5>2 University of Illinois at Urbana-Champaign, USA</h5>
+<h5>3 Ohio State University, USA</h5>
+<h5>4 Hangzhou Dianzi University, China</h5>
+[![License](https://img.shields.io/badge/License-Apache_2.0-green.svg)](https://github.com/HUANGLIZI/ChatDoctor/blob/main/LICENSE)
+[![Python 3.9+](https://img.shields.io/badge/python-3.9+-blue.svg)](https://www.python.org/downloads/release/python-390/)
+[![Page](https://img.shields.io/badge/Web-Page-yellow)](https://www.yunxiangli.top/ChatDoctor/)
+## Resources List
+Autonomous ChatDoctor with Disease Database [Demo](https://huggingface.co/spaces/kenton-li/chatdoctor_csv).
+100k real conversations between patients and doctors from HealthCareMagic.com [HealthCareMagic-100k](https://drive.google.com/file/d/1lyfqIwlLSClhgrCutWuEe_IACNq6XNUt/view?usp=sharing).
+Real conversations between patients and doctors from icliniq.com [icliniq-10k](https://drive.google.com/file/d/1ZKbqgYqWc7DJHs3N9TQYQVPdDQmZaClA/view?usp=sharing).
+Checkpoints of ChatDoctor, [link](https://drive.google.com/drive/folders/11-qPzz9ZdHD6pc47wBSOUSU61MaDPyRh?usp=sharing).
+Stanford Alpaca data for basic conversational capabilities. [Alpaca link](https://github.com/Kent0n-Li/ChatDoctor/blob/main/alpaca_data.json).
+<p align="center" width="100%">
+<img src="fig/overview.PNG" style="width: 70%; min-width: 300px; display: block; margin: auto;">
+</p>
+<p align="center" width="100%">
+<img src="fig/wiki.PNG" style="width: 70%; min-width: 300px; display: block; margin: auto;">
+</p>
+ ## Setup:
+ In a conda env with pytorch available, run:
+```
+pip install -r requirements.txt
+```
+ ## Interactive Demo Page:
+Demo Page: https://huggingface.co/spaces/kenton-li/chatdoctor_csv
+It is worth noting that our model has not yet achieved 100% accurate output, please do not apply it to real clinical scenarios.
+For those who want to try the online demo, please register for hugging face and fill out this form [link](https://forms.office.com/Pages/ResponsePage.aspx?id=lYZBnaxxMUy1ssGWyOw8ij06Cb8qnDJKvu2bVpV1-ANURUU0TllBWVVHUjQ1MDJUNldGTTZWV1c5UC4u).
+ ## Data and model:
+ ### 1. ChatDoctor Dataset:
+You can download the following training dataset
+100k real conversations between patients and doctors from HealthCareMagic.com [HealthCareMagic-100k](https://drive.google.com/file/d/1lyfqIwlLSClhgrCutWuEe_IACNq6XNUt/view?usp=sharing).
+10k real conversations between patients and doctors from icliniq.com [icliniq-10k](https://drive.google.com/file/d/1ZKbqgYqWc7DJHs3N9TQYQVPdDQmZaClA/view?usp=sharing).
+5k generated conversations between patients and physicians from ChatGPT [GenMedGPT-5k](https://drive.google.com/file/d/1nDTKZ3wZbZWTkFMBkxlamrzbNz0frugg/view?usp=sharing) and [disease database](https://github.com/Kent0n-Li/ChatDoctor/blob/main/format_dataset.csv).
+Our model was firstly be fine-tuned by Stanford Alpaca's data to have some basic conversational capabilities. [Alpaca link](https://github.com/Kent0n-Li/ChatDoctor/blob/main/alpaca_data.json)
+ ### 2. Model Weights:
+Place the model weights file in the ./pretrained folder.
+ ## How to fine-tuning
+ ```python
+torchrun --nproc_per_node=4 --master_port=<your_random_port> train.py \
+    --model_name_or_path <your_path_to_hf_converted_llama_ckpt_and_tokenizer> \
+    --data_path ./HealthCareMagic-100k.json \
+    --bf16 True \
+    --output_dir pretrained \
+    --num_train_epochs 1 \
+    --per_device_train_batch_size 4 \
+    --per_device_eval_batch_size 4 \
+    --gradient_accumulation_steps 8 \
+    --evaluation_strategy "no" \
+    --save_strategy "steps" \
+    --save_steps 2000 \
+    --save_total_limit 1 \
+    --learning_rate 2e-6 \
+    --weight_decay 0. \
+    --warmup_ratio 0.03 \
+    --lr_scheduler_type "cosine" \
+    --logging_steps 1 \
+    --fsdp "full_shard auto_wrap" \
+    --fsdp_transformer_layer_cls_to_wrap 'LLaMADecoderLayer' \
+    --tf32 True
+ ```
+Fine-tuning with Lora
+```python
+WORLD_SIZE=6 CUDA_VISIBLE_DEVICES=0,1,2,3,4,5 torchrun --nproc_per_node=6 --master_port=4567 train_lora.py \
+  --base_model './weights-alpaca/' \
+  --data_path 'HealthCareMagic-100k.json' \
+  --output_dir './lora_models/' \
+  --batch_size 32 \
+  --micro_batch_size 4 \
+  --num_epochs 1 \
+  --learning_rate 3e-5 \
+  --cutoff_len 256 \
+  --val_set_size 120 \
+  --adapter_name lora
+ ```
+ ## How to inference
+ You can build a ChatDoctor model on your own machine and communicate with it.
+ ```python
+python chat.py
+ ```
+  ```python
+cd Autonomous_ChatDoctor_csv
+python chat_csv.py
+ ```
+  ```python
+cd Autonomous_ChatDoctor_Wikipedia
+python chat_wiki.py
+ ```
+## Examples:
+We fine-tuned our model on HealthCareMagic-100K, and test our model on iCliniq-10k.
+```
+Patient question from icliniq: Hi doctor,I am getting one-sided headache. I do not get them often. They are behind my right eye and temple area. Now this time I am also fighting with heavy cold and cough. It came on late morning today and it hurts more when I cough or shake my head or bend over. Prior to this, I had the same headache about four months ago. At that time it came on midafternoon and lasted till next morning. Prior to that, I had the same headache exactly on the right side and lasted for a day. I am on CCB and Protonix. I am scared of brain tumor. Please help
+Real doctor answer from icliniq: Hi. Based on what you have told, this is more likely to be due to sinusitis. Sinuses are small air filled cavities in your skull bone. They can get infected and when they are infected, you will get headaches behind eyes. Such headaches will be more common during cold and cough. Brain tumor headache will be on both sides, more in forehead and you will also have vomiting in that case. Based on what you have told, this is more likely due to simple sinus infection instead of brain tumor. CT brain and CT paranasal sinuses.
+Answer form ChatGPT: Hello! I'm sorry to hear you're experiencing these headaches. Based on the location of the pain, it sounds like it may be a tension headache or a migraine. These types of headaches can sometimes be triggered by colds or other illnesses. However, it's always best to get checked out by your doctor to rule out any other underlying conditions. In terms of a brain tumor, that is a very rare cause of headaches. Most people who experience headaches do not have a brain tumor. That being said, if you have any concerning symptoms such as vision changes, weakness, or numbness, it is important to seek medical attention right away. In the meantime, you can try taking over-the-counter pain relievers such as ibuprofen or acetaminophen to help with the headache. It's also important to get plenty of rest and stay hydrated. If the headaches persist or become more frequent, please make an appointment with your doctor. Take care!
+Answer form ChatDoctor: Hi, Thank you for posting your query. First, I would like to reassure you that there is no need to worry about brain tumor. Also, we should keep migraine apart as well. The most likely cause of your headaches could be sinusitis. This can be confirmed by doing an X-ray of face. Treatment would consist of using decongestant tablets/ nasal sprays. Best wishes, Chat Doctor.
+```
+ChatDoctor accurately mentions sinusitis, which is also present in the real conversation in icliniq.
+## Overview
+ChatDoctor is a next-generation AI doctor model that is based on the [LLaMA](https://github.com/facebookresearch/llama) model. The goal of this project is to provide patients with an intelligent and reliable healthcare companion that can answer their medical queries and provide them with personalized medical advice.
+The ChatDoctor is an advanced language model that is specifically designed for medical applications. It has been trained on a large corpus of medical literature and has a deep understanding of medical terminology, procedures, and diagnoses. This model serves as the foundation for ChatDoctor, enabling it to analyze patients' symptoms and medical history, provide accurate diagnoses, and suggest appropriate treatment options.
+The ChatDoctor model is designed to simulate a conversation between a doctor and a patient, using natural language processing (NLP) and machine learning techniques. Patients can interact with the ChatDoctor model through a chat interface, asking questions about their health, symptoms, or medical conditions. The model will then analyze the input and provide a response that is tailored to the patient's unique situation.
+One of the key features of the ChatDoctor model is its ability to learn and adapt over time. As more patients interact with the model, it will continue to refine its responses and improve its accuracy. This means that patients can expect to receive increasingly personalized and accurate medical advice over time.
+ ## Patient-physician Conversation Dataset</h2>
+The first step in fine-tuning is to collect a dataset of patient-physician conversations. In patient-physician conversations, the patient's descriptions of disease symptoms are often colloquial and cursory. If we manually construct the synthesized patient-physician conversation dataset, it often leads to the problem of insufficient diversity and over-specialized descriptions, which are often spaced out from real scenarios. Collecting real patient-physician conversations is a better solution. Therefore, we collected about 100k real doctor-patient conversations from an online medical consultation website HealthCareMagic(www.healthcaremagic.com). We filtered these data both manually and automatically, removed the identity information of the doctor and patient, and used language tools to correct grammatical errors, and we named this dataset HealthCareMagic-100k. In addition, we collected approximately 10k patient-physician conversations from the online medical consultation website iCliniq to evaluate the performance of our model.
+## Autonomous ChatDoctor based on Knowledge Brain</h2>
+Equipped with the external knowledge brain, i.e., Wikipedia or our constructed database encompassing over 700 diseases, ChatDoctor could retrieve the corresponding knowledge and reliable sources to answer patients' inquiries more accurately. After constructing the external knowledge brain, we need to let our ChatDoctor retrieve the knowledge he needs autonomously, which can generally be achieved in a large language model by constructing appropriate prompts. To automate this process, we design keyword mining prompts for ChatDoctor to extract key terms for relevant knowledge seeking. Then, the top-ranked relevant passages were retrieved from Knowledge Brain with a term-matching retrieval system. As for the disease database, since the model cannot read all the data at once, we first let the model read the data in batches and select for itself the data entries that might help answer the patient's question. Finally, all the data entries selected by the model are given to the model for a final answer. This approach better ensures that patients receive well-informed and precise responses backed by credible references.
+## Limitations
+We emphasize that ChatDoctor is for academic research only and any commercial use and clinical use is prohibited. There are three factors in this decision: First, ChatDoctor is based on LLaMA and has a non-commercial license, so we necessarily inherited this decision. Second, our model is not licensed for healthcare-related purposes. Also, we have not designed sufficient security measures, and the current model still does not guarantee the full correctness of medical diagnoses.
+## Reference
+ChatDoctor: A Medical Chat Model Fine-tuned on LLaMA Model using Medical Domain Knowledge
+```
+@article{li2023chatdoctor,
+  title={ChatDoctor: A Medical Chat Model Fine-Tuned on a Large Language Model Meta-AI (LLaMA) Using Medical Domain Knowledge},
+  author={Li, Yunxiang and Li, Zihan and Zhang, Kai and Dan, Ruilong and Jiang, Steve and Zhang, You},
+  journal={Cureus},
+  volume={15},
+  number={6},
+  year={2023},
+  publisher={Cureus}
+}
+```

alpaca_data.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1a0a9920c72e27b32013e5c4ad7727d9eede8eaab75c3f4b7eb62eda019561d7
+size 23034003

chat.py ADDED Viewed

	@@ -0,0 +1,100 @@

+# chat.py
+import os
+import gc
+import torch
+from transformers import LlamaTokenizer, LlamaForCausalLM
+# =============================
+# Configuration
+# =============================
+MODEL_PATH = r"C:\Users\JAY\Downloads\Chatdoc\ChatDoctor\pretrained"
+MAX_NEW_TOKENS = 200
+TEMPERATURE = 0.5
+TOP_K = 50
+REPETITION_PENALTY = 1.1
+# Detect device
+device = "cuda" if torch.cuda.is_available() else "cpu"
+print(f"Loading model from {MODEL_PATH} on {device}...")
+# =============================
+# Load Tokenizer and Model
+# =============================
+tokenizer = LlamaTokenizer.from_pretrained(MODEL_PATH)
+model = LlamaForCausalLM.from_pretrained(
+    MODEL_PATH,
+    device_map="auto",          # automatically dispatch weights to GPU
+    torch_dtype=torch.float16,  # half precision for faster inference
+    low_cpu_mem_usage=True      # optimize CPU memory
+)
+# DO NOT call model.to(device) when using device_map="auto"
+generator = model.generate
+print("✅ Model loaded successfully!\n")
+# =============================
+# Chat History
+# =============================
+history = ["ChatDoctor: I am ChatDoctor, what medical questions do you have?"]
+# =============================
+# Response Function
+# =============================
+def get_response(user_input):
+    global history
+    human_invitation = "Patient: "
+    doctor_invitation = "ChatDoctor: "
+    # Append user input
+    history.append(human_invitation + user_input)
+    # Build prompt
+    prompt = "\n".join(history) + "\n" + doctor_invitation
+    input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
+    # Generate response
+    with torch.no_grad():
+        output_ids = generator(
+            input_ids,
+            max_new_tokens=MAX_NEW_TOKENS,
+            do_sample=True,
+            temperature=TEMPERATURE,
+            top_k=TOP_K,
+            repetition_penalty=REPETITION_PENALTY
+        )
+    # Decode response
+    full_output = tokenizer.decode(output_ids[0], skip_special_tokens=True)
+    response = full_output[len(prompt):].strip()
+    # Clean if the model repeats the patient prompt
+    if response.startswith("Patient:"):
+        response = response[len("Patient:"):].strip()
+    # Append model response to history
+    history.append(doctor_invitation + response)
+    # Free memory
+    del input_ids, output_ids
+    gc.collect()
+    torch.cuda.empty_cache()
+    return response
+# =============================
+# CLI Chat
+# =============================
+if __name__ == "__main__":
+    print("\n=== ChatDoctor is ready! Type your questions. ===\n")
+    while True:
+        try:
+            user_input = input("Patient: ").strip()
+            if user_input.lower() in ["exit", "quit"]:
+                print("Exiting ChatDoctor. Goodbye!")
+                break
+            response = get_response(user_input)
+            print("ChatDoctor: " + response + "\n")
+        except KeyboardInterrupt:
+            print("\nExiting ChatDoctor. Goodbye!")
+            break

chatdoctor5k.json ADDED Viewed

The diff for this file is too large to render. See raw diff

format_dataset.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

frontend.py ADDED Viewed

	@@ -0,0 +1,313 @@

+import os
+import gc
+import torch
+import gradio as gr
+from transformers import LlamaTokenizer, LlamaForCausalLM, StoppingCriteria, StoppingCriteriaList
+# =============================
+# Configuration
+# =============================
+MODEL_PATH = r"C:\Users\JAY\Downloads\Chatdoc\ChatDoctor\pretrained"
+MAX_NEW_TOKENS = 200
+TEMPERATURE = 0.5
+TOP_K = 50
+REPETITION_PENALTY = 1.1
+# Detect device
+device = "cuda" if torch.cuda.is_available() else "cpu"
+print(f"Loading model from {MODEL_PATH} on {device}...")
+# =============================
+# Load Tokenizer and Model
+# =============================
+tokenizer = LlamaTokenizer.from_pretrained(MODEL_PATH)
+model = LlamaForCausalLM.from_pretrained(
+    MODEL_PATH,
+    device_map="auto",
+    torch_dtype=torch.float16,
+    low_cpu_mem_usage=True
+)
+generator = model.generate
+print("✅ ChatDoctor model loaded successfully!\n")
+# =============================
+# Stopping Criteria
+# =============================
+class StopOnTokens(StoppingCriteria):
+    def __init__(self, stop_ids):
+        self.stop_ids = stop_ids
+    def __call__(self, input_ids, scores, **kwargs):
+        for stop_id_seq in self.stop_ids:
+            if len(stop_id_seq) == 1:
+                if input_ids[0][-1] == stop_id_seq[0]:
+                    return True
+            else:
+                if len(input_ids[0]) >= len(stop_id_seq):
+                    if input_ids[0][-len(stop_id_seq):].tolist() == stop_id_seq:
+                        return True
+        return False
+# =============================
+# Chat History (Global)
+# =============================
+conversation_history = []
+# =============================
+# Get Response Function
+# =============================
+def get_response(user_input, history_context):
+    """Generate response from ChatDoctor model"""
+    human_invitation = "Patient: "
+    doctor_invitation = "ChatDoctor: "
+    # Build conversation from history
+    history_text = []
+    for human, assistant in history_context:
+        if human:
+            history_text.append(human_invitation + human)
+        if assistant:
+            history_text.append(doctor_invitation + assistant)
+    # Add current user input
+    history_text.append(human_invitation + user_input)
+    # Build conversation prompt
+    prompt = "\n".join(history_text) + "\n" + doctor_invitation
+    input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
+    # Define stop words and their token IDs
+    stop_words = ["Patient:", "\nPatient:", "Patient :", "\n\nPatient"]
+    stop_ids = [tokenizer.encode(word, add_special_tokens=False) for word in stop_words]
+    stopping_criteria = StoppingCriteriaList([StopOnTokens(stop_ids)])
+    # Generate model response
+    with torch.no_grad():
+        output_ids = generator(
+            input_ids,
+            max_new_tokens=MAX_NEW_TOKENS,
+            do_sample=True,
+            temperature=TEMPERATURE,
+            top_k=TOP_K,
+            repetition_penalty=REPETITION_PENALTY,
+            stopping_criteria=stopping_criteria,
+            pad_token_id=tokenizer.eos_token_id,
+            eos_token_id=tokenizer.eos_token_id
+        )
+    # Decode and clean response
+    full_output = tokenizer.decode(output_ids[0], skip_special_tokens=True)
+    response = full_output[len(prompt):].strip()
+    # Remove any "Patient:" that might have slipped through
+    for stop_word in ["Patient:", "Patient :", "\nPatient:", "\nPatient", "Patient"]:
+        if stop_word in response:
+            response = response.split(stop_word)[0].strip()
+            break
+    response = response.strip()
+    # Free memory
+    del input_ids, output_ids
+    gc.collect()
+    torch.cuda.empty_cache()
+    return response
+# =============================
+# Gradio Chat Function
+# =============================
+def chat_function(message, history):
+    """Gradio chat interface function"""
+    if not message.strip():
+        return ""
+    try:
+        response = get_response(message, history)
+        return response
+    except Exception as e:
+        return f"Error: {str(e)}"
+# =============================
+# Custom CSS
+# =============================
+custom_css = """
+#header {
+    text-align: center;
+    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+    color: white;
+    padding: 20px;
+    border-radius: 10px;
+    margin-bottom: 20px;
+}
+#header h1 {
+    margin: 0;
+    font-size: 2.5em;
+}
+#header p {
+    margin: 10px 0 0 0;
+    font-size: 1.1em;
+    opacity: 0.9;
+}
+.disclaimer {
+    background-color: #fff3cd;
+    border: 1px solid #ffc107;
+    border-radius: 8px;
+    padding: 15px;
+    margin: 20px 0;
+    color: #856404;
+}
+.disclaimer h3 {
+    margin-top: 0;
+    color: #856404;
+}
+footer {
+    text-align: center;
+    margin-top: 30px;
+    color: #666;
+    font-size: 0.9em;
+}
+"""
+# =============================
+# Gradio Interface
+# =============================
+with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
+    # Header
+    gr.HTML("""
+        <div id="header">
+            <h1>🩺 ChatDoctor AI Assistant</h1>
+            <p>Your AI-powered medical conversation partner</p>
+        </div>
+    """)
+    # Disclaimer
+    gr.HTML("""
+        <div class="disclaimer">
+            <h3>⚠️ Medical Disclaimer</h3>
+            <p><strong>Important:</strong> This AI assistant is for informational and educational purposes only.
+            It is NOT a substitute for professional medical advice, diagnosis, or treatment.
+            Always seek the advice of your physician or other qualified health provider with any questions
+            you may have regarding a medical condition. Never disregard professional medical advice or
+            delay in seeking it because of something you have read here.</p>
+        </div>
+    """)
+    # Chatbot Interface
+    chatbot = gr.Chatbot(
+        height=500,
+        placeholder="<div style='text-align: center; padding: 40px;'><h3>👋 Welcome to ChatDoctor!</h3><p>I'm here to discuss your health concerns. How can I assist you today?</p></div>",
+        show_label=False,
+        avatar_images=(None, "🤖"),
+    )
+    with gr.Row():
+        msg = gr.Textbox(
+            placeholder="Type your message here... (e.g., 'I have a headache')",
+            show_label=False,
+            scale=9,
+            container=False
+        )
+        submit_btn = gr.Button("Send 📤", scale=1, variant="primary")
+    with gr.Row():
+        clear_btn = gr.Button("🗑️ Clear Chat", scale=1)
+        retry_btn = gr.Button("🔄 Retry", scale=1)
+    # Examples
+    gr.Examples(
+        examples=[
+            "I have a persistent headache for 3 days. What should I do?",
+            "What are the symptoms of diabetes?",
+            "How can I improve my sleep quality?",
+            "I have a fever and sore throat. Should I be concerned?",
+            "What are some natural ways to reduce stress?",
+        ],
+        inputs=msg,
+        label="💡 Example Questions"
+    )
+    # Settings (collapsed by default)
+    with gr.Accordion("⚙️ Advanced Settings", open=False):
+        temperature_slider = gr.Slider(
+            minimum=0.1,
+            maximum=1.0,
+            value=TEMPERATURE,
+            step=0.1,
+            label="Temperature (Creativity)",
+            info="Higher values make responses more creative but less focused"
+        )
+        max_tokens_slider = gr.Slider(
+            minimum=50,
+            maximum=500,
+            value=MAX_NEW_TOKENS,
+            step=50,
+            label="Max Response Length",
+            info="Maximum number of tokens in response"
+        )
+        top_k_slider = gr.Slider(
+            minimum=1,
+            maximum=100,
+            value=TOP_K,
+            step=1,
+            label="Top K",
+            info="Limits vocabulary selection"
+        )
+    # Footer
+    gr.HTML("""
+        <footer>
+            <p>Powered by ChatDoctor Model | Built with Gradio</p>
+            <p>Device: """ + device.upper() + """ | Model: LLaMA-based Medical AI</p>
+        </footer>
+    """)
+    # Event handlers
+    def user_message(user_msg, history):
+        return "", history + [[user_msg, None]]
+    def bot_response(history, temp, max_tok, top_k_val):
+        global TEMPERATURE, MAX_NEW_TOKENS, TOP_K
+        TEMPERATURE = temp
+        MAX_NEW_TOKENS = int(max_tok)
+        TOP_K = int(top_k_val)
+        user_msg = history[-1][0]
+        bot_msg = chat_function(user_msg, history[:-1])
+        history[-1][1] = bot_msg
+        return history
+    # Connect events
+    msg.submit(user_message, [msg, chatbot], [msg, chatbot], queue=False).then(
+        bot_response, [chatbot, temperature_slider, max_tokens_slider, top_k_slider], chatbot
+    )
+    submit_btn.click(user_message, [msg, chatbot], [msg, chatbot], queue=False).then(
+        bot_response, [chatbot, temperature_slider, max_tokens_slider, top_k_slider], chatbot
+    )
+    clear_btn.click(lambda: None, None, chatbot, queue=False)
+    def retry_last():
+        return None
+    retry_btn.click(retry_last, None, chatbot, queue=False)
+# =============================
+# Launch Interface
+# =============================
+if __name__ == "__main__":
+    print("\n🚀 Launching ChatDoctor Gradio Interface...")
+    demo.queue()
+    demo.launch(
+        server_name="0.0.0.0",  # Accessible from network
+        server_port=7860,
+        share=False,  # Set to True to create public link
+        show_error=True
+    )

frontend_VOic.py ADDED Viewed

	@@ -0,0 +1,459 @@

+import os
+import gc
+import torch
+import gradio as gr
+from transformers import LlamaTokenizer, LlamaForCausalLM, StoppingCriteria, StoppingCriteriaList
+# =============================
+# Configuration
+# =============================
+MODEL_PATH = r"C:\Users\JAY\Downloads\Chatdoc\ChatDoctor\pretrained"
+MAX_NEW_TOKENS = 200
+TEMPERATURE = 0.5
+TOP_K = 50
+REPETITION_PENALTY = 1.1
+# Detect device
+device = "cuda" if torch.cuda.is_available() else "cpu"
+print(f"Loading model from {MODEL_PATH} on {device}...")
+# =============================
+# Load Tokenizer and Model
+# =============================
+tokenizer = LlamaTokenizer.from_pretrained(MODEL_PATH)
+model = LlamaForCausalLM.from_pretrained(
+    MODEL_PATH,
+    device_map="auto",
+    torch_dtype=torch.float16,
+    low_cpu_mem_usage=True
+)
+generator = model.generate
+print("✅ ChatDoctor model loaded successfully!\n")
+# =============================
+# Stopping Criteria
+# =============================
+class StopOnTokens(StoppingCriteria):
+    def __init__(self, stop_ids):
+        self.stop_ids = stop_ids
+    def __call__(self, input_ids, scores, **kwargs):
+        for stop_id_seq in self.stop_ids:
+            if len(stop_id_seq) == 1:
+                if input_ids[0][-1] == stop_id_seq[0]:
+                    return True
+            else:
+                if len(input_ids[0]) >= len(stop_id_seq):
+                    if input_ids[0][-len(stop_id_seq):].tolist() == stop_id_seq:
+                        return True
+        return False
+# =============================
+# Get Response Function
+# =============================
+def get_response(user_input, history_context):
+    """Generate response from ChatDoctor model"""
+    human_invitation = "Patient: "
+    doctor_invitation = "ChatDoctor: "
+    # Build conversation from history
+    history_text = []
+    for human, assistant in history_context:
+        if human:
+            history_text.append(human_invitation + human)
+        if assistant:
+            history_text.append(doctor_invitation + assistant)
+    # Add current user input
+    history_text.append(human_invitation + user_input)
+    # Build conversation prompt
+    prompt = "\n".join(history_text) + "\n" + doctor_invitation
+    input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
+    # Define stop words and their token IDs
+    stop_words = ["Patient:", "\nPatient:", "Patient :", "\n\nPatient"]
+    stop_ids = [tokenizer.encode(word, add_special_tokens=False) for word in stop_words]
+    stopping_criteria = StoppingCriteriaList([StopOnTokens(stop_ids)])
+    # Generate model response
+    with torch.no_grad():
+        output_ids = generator(
+            input_ids,
+            max_new_tokens=MAX_NEW_TOKENS,
+            do_sample=True,
+            temperature=TEMPERATURE,
+            top_k=TOP_K,
+            repetition_penalty=REPETITION_PENALTY,
+            stopping_criteria=stopping_criteria,
+            pad_token_id=tokenizer.eos_token_id,
+            eos_token_id=tokenizer.eos_token_id
+        )
+    # Decode and clean response
+    full_output = tokenizer.decode(output_ids[0], skip_special_tokens=True)
+    response = full_output[len(prompt):].strip()
+    # Remove any "Patient:" that might have slipped through
+    for stop_word in ["Patient:", "Patient :", "\nPatient:", "\nPatient", "Patient"]:
+        if stop_word in response:
+            response = response.split(stop_word)[0].strip()
+            break
+    response = response.strip()
+    # Free memory
+    del input_ids, output_ids
+    gc.collect()
+    torch.cuda.empty_cache()
+    return response
+# =============================
+# Gradio Chat Function
+# =============================
+def chat_function(message, history):
+    """Gradio chat interface function"""
+    if not message.strip():
+        return ""
+    try:
+        response = get_response(message, history)
+        return response
+    except Exception as e:
+        return f"Error: {str(e)}"
+# =============================
+# Text-to-Speech Function
+# =============================
+def text_to_speech(text):
+    """Convert text response to speech"""
+    try:
+        from gtts import gTTS
+        import tempfile
+        if not text or text.startswith("Error:"):
+            return None
+        # Create speech
+        tts = gTTS(text=text, lang='en', slow=False)
+        # Save to temporary file
+        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3')
+        tts.save(temp_file.name)
+        return temp_file.name
+    except Exception as e:
+        print(f"TTS Error: {e}")
+        return None
+# =============================
+# Custom CSS
+# =============================
+custom_css = """
+#header {
+    text-align: center;
+    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+    color: white;
+    padding: 20px;
+    border-radius: 10px;
+    margin-bottom: 20px;
+}
+#header h1 {
+    margin: 0;
+    font-size: 2.5em;
+}
+#header p {
+    margin: 10px 0 0 0;
+    font-size: 1.1em;
+    opacity: 0.9;
+}
+.disclaimer {
+    background-color: #fff3cd;
+    border: 1px solid #ffc107;
+    border-radius: 8px;
+    padding: 15px;
+    margin: 20px 0;
+    color: #856404;
+}
+.disclaimer h3 {
+    margin-top: 0;
+    color: #856404;
+}
+.voice-section {
+    background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%);
+    padding: 20px;
+    border-radius: 10px;
+    margin: 20px 0;
+}
+footer {
+    text-align: center;
+    margin-top: 30px;
+    color: #666;
+    font-size: 0.9em;
+}
+"""
+# =============================
+# Gradio Interface
+# =============================
+with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
+    # Header
+    gr.HTML("""
+        <div id="header">
+            <h1>🩺 ChatDoctor AI Assistant</h1>
+            <p>Your AI-powered medical conversation partner with Voice Support</p>
+        </div>
+    """)
+    # Disclaimer
+    gr.HTML("""
+        <div class="disclaimer">
+            <h3>⚠️ Medical Disclaimer</h3>
+            <p><strong>Important:</strong> This AI assistant is for informational and educational purposes only.
+            It is NOT a substitute for professional medical advice, diagnosis, or treatment.
+            Always seek the advice of your physician or other qualified health provider with any questions
+            you may have regarding a medical condition. Never disregard professional medical advice or
+            delay in seeking it because of something you have read here.</p>
+        </div>
+    """)
+    with gr.Row():
+        with gr.Column(scale=7):
+            # Chatbot Interface
+            chatbot = gr.Chatbot(
+                height=500,
+                placeholder="<div style='text-align: center; padding: 40px;'><h3>👋 Welcome to ChatDoctor!</h3><p>I'm here to discuss your health concerns. Type or speak your question!</p></div>",
+                show_label=False,
+                avatar_images=(None, "🤖"),
+            )
+            with gr.Row():
+                msg = gr.Textbox(
+                    placeholder="Type your message here... (e.g., 'I have a headache')",
+                    show_label=False,
+                    scale=9,
+                    container=False
+                )
+                submit_btn = gr.Button("Send 📤", scale=1, variant="primary")
+            with gr.Row():
+                clear_btn = gr.Button("🗑️ Clear Chat", scale=1)
+                retry_btn = gr.Button("🔄 Retry", scale=1)
+        with gr.Column(scale=3):
+            # Voice Input Section
+            gr.HTML("<div class='voice-section'><h3 style='color: white; text-align: center; margin-top: 0;'>🎤 Voice Features</h3></div>")
+            audio_input = gr.Audio(
+                sources=["microphone"],
+                type="filepath",
+                label="🎙️ Speak Your Question",
+                show_download_button=False
+            )
+            transcribed_text = gr.Textbox(
+                label="📝 Transcribed Text",
+                placeholder="Your speech will appear here...",
+                interactive=False,
+                lines=3
+            )
+            send_voice_btn = gr.Button("Send Voice Message 🔊", variant="primary")
+            gr.Markdown("---")
+            # Voice Output
+            tts_enabled = gr.Checkbox(
+                label="🔊 Enable Text-to-Speech for responses",
+                value=True,
+                info="Hear the doctor's response"
+            )
+            audio_output = gr.Audio(
+                label="🔈 AI Response Audio",
+                autoplay=False,
+                visible=True
+            )
+    # Examples
+    gr.Examples(
+        examples=[
+            "I have a persistent headache for 3 days. What should I do?",
+            "What are the symptoms of diabetes?",
+            "How can I improve my sleep quality?",
+            "I have a fever and sore throat. Should I be concerned?",
+            "What are some natural ways to reduce stress?",
+        ],
+        inputs=msg,
+        label="💡 Example Questions"
+    )
+    # Settings (collapsed by default)
+    with gr.Accordion("⚙️ Advanced Settings", open=False):
+        temperature_slider = gr.Slider(
+            minimum=0.1,
+            maximum=1.0,
+            value=TEMPERATURE,
+            step=0.1,
+            label="Temperature (Creativity)",
+            info="Higher values make responses more creative but less focused"
+        )
+        max_tokens_slider = gr.Slider(
+            minimum=50,
+            maximum=500,
+            value=MAX_NEW_TOKENS,
+            step=50,
+            label="Max Response Length",
+            info="Maximum number of tokens in response"
+        )
+        top_k_slider = gr.Slider(
+            minimum=1,
+            maximum=100,
+            value=TOP_K,
+            step=1,
+            label="Top K",
+            info="Limits vocabulary selection"
+        )
+    # Footer
+    gr.HTML("""
+        <footer>
+            <p>Powered by ChatDoctor Model | Built with Gradio | Voice-Enabled 🎤</p>
+            <p>Device: """ + device.upper() + """ | Model: LLaMA-based Medical AI</p>
+        </footer>
+    """)
+    # =============================
+    # Event Handlers
+    # =============================
+    def user_message(user_msg, history):
+        return "", history + [[user_msg, None]], None
+    def bot_response(history, temp, max_tok, top_k_val, tts_enabled_val):
+        global TEMPERATURE, MAX_NEW_TOKENS, TOP_K
+        TEMPERATURE = temp
+        MAX_NEW_TOKENS = int(max_tok)
+        TOP_K = int(top_k_val)
+        user_msg = history[-1][0]
+        bot_msg = chat_function(user_msg, history[:-1])
+        history[-1][1] = bot_msg
+        # Generate audio if TTS is enabled
+        audio_file = None
+        if tts_enabled_val and bot_msg and not bot_msg.startswith("Error:"):
+            audio_file = text_to_speech(bot_msg)
+        return history, audio_file
+    def transcribe_audio(audio_file):
+        """Transcribe audio to text using Whisper"""
+        if audio_file is None:
+            return ""
+        try:
+            import whisper
+            model = whisper.load_model("base")
+            result = model.transcribe(audio_file)
+            return result["text"]
+        except ImportError:
+            return "Error: Please install whisper: pip install openai-whisper"
+        except Exception as e:
+            return f"Transcription error: {str(e)}"
+    def process_voice_input(audio_file, history, temp, max_tok, top_k_val, tts_enabled_val):
+        """Process voice input: transcribe -> send -> get response"""
+        if audio_file is None:
+            return history, "", None, None
+        # Transcribe
+        transcribed = transcribe_audio(audio_file)
+        if transcribed.startswith("Error:"):
+            return history, transcribed, None, None
+        # Add to chat
+        history = history + [[transcribed, None]]
+        # Get response
+        global TEMPERATURE, MAX_NEW_TOKENS, TOP_K
+        TEMPERATURE = temp
+        MAX_NEW_TOKENS = int(max_tok)
+        TOP_K = int(top_k_val)
+        bot_msg = chat_function(transcribed, history[:-1])
+        history[-1][1] = bot_msg
+        # Generate audio if TTS is enabled
+        audio_file = None
+        if tts_enabled_val and bot_msg and not bot_msg.startswith("Error:"):
+            audio_file = text_to_speech(bot_msg)
+        return history, transcribed, None, audio_file
+    # Text input events
+    msg.submit(
+        user_message,
+        [msg, chatbot],
+        [msg, chatbot, audio_output],
+        queue=False
+    ).then(
+        bot_response,
+        [chatbot, temperature_slider, max_tokens_slider, top_k_slider, tts_enabled],
+        [chatbot, audio_output]
+    )
+    submit_btn.click(
+        user_message,
+        [msg, chatbot],
+        [msg, chatbot, audio_output],
+        queue=False
+    ).then(
+        bot_response,
+        [chatbot, temperature_slider, max_tokens_slider, top_k_slider, tts_enabled],
+        [chatbot, audio_output]
+    )
+    # Voice input events
+    audio_input.change(
+        transcribe_audio,
+        [audio_input],
+        [transcribed_text]
+    )
+    send_voice_btn.click(
+        process_voice_input,
+        [audio_input, chatbot, temperature_slider, max_tokens_slider, top_k_slider, tts_enabled],
+        [chatbot, transcribed_text, audio_input, audio_output]
+    )
+    # Clear and retry
+    clear_btn.click(lambda: (None, None, None), None, [chatbot, audio_output, transcribed_text], queue=False)
+    retry_btn.click(lambda: None, None, chatbot, queue=False)
+# =============================
+# Launch Interface
+# =============================
+if __name__ == "__main__":
+    print("\n🚀 Launching ChatDoctor Gradio Interface with Voice Support...")
+    print("\n📦 Required packages:")
+    print("   pip install gradio gTTS openai-whisper")
+    print("\nNote: Whisper will download models on first use (~100MB for base model)\n")
+    demo.queue()
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=False,
+        show_error=True
+    )

requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+numpy
+rouge_score
+fire
+openai
+git+https://github.com/zphang/transformers.git@68d640f7c368bcaaaecfc678f11908ebbd3d6176
+torch
+sentencepiece
+tokenizers==0.13.3
+wandb
+accelerate

teak.py ADDED Viewed

	@@ -0,0 +1,103 @@

+import os, json, itertools, bisect, gc
+from transformers import LlamaTokenizer, LlamaForCausalLM
+from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig
+import transformers
+import torch
+from accelerate import Accelerator
+import accelerate
+import time
+model = None
+tokenizer = None
+generator = None
+os.environ["CUDA_VISIBLE_DEVICES"]="0"
+def load_model(model_name, eight_bit=0, device_map="auto"):
+    global model, tokenizer, generator
+    print("Loading "+model_name+"...")
+    if device_map == "zero":
+        device_map = "balanced_low_0"
+    # config
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    gpu_count = torch.cuda.device_count() if torch.cuda.is_available() else 0
+    print('gpu_count', gpu_count)
+    tokenizer = LlamaTokenizer.from_pretrained(model_name)
+    model = LlamaForCausalLM.from_pretrained(
+        model_name,
+        #device_map=device_map,
+        #device_map="auto",
+        torch_dtype=torch.float16 if device == "cuda" else torch.float32,
+        #max_memory = {0: "14GB", 1: "14GB", 2: "14GB", 3: "14GB",4: "14GB",5: "14GB",6: "14GB",7: "14GB"},
+        #load_in_8bit=eight_bit,
+        #from_tf=True,
+        low_cpu_mem_usage=True,
+        load_in_8bit=False,
+        cache_dir="cache"
+    ).to(device)
+    generator = model.generate
+load_model(r"C:\Users\JAY\Downloads\Chatdoc\ChatDoctor\pretrained")
+First_chat = "ChatDoctor: I am ChatDoctor, what medical questions do you have?"
+print(First_chat)
+history = []
+history.append(First_chat)
+def go():
+    invitation = "ChatDoctor: "
+    human_invitation = "Patient: "
+    # input
+    msg = input(human_invitation)
+    print("")
+    history.append(human_invitation + msg)
+    fulltext = "If you are a doctor, please answer the medical questions based on the patient's description. \n\n" + "\n\n".join(history) + "\n\n" + invitation
+    #fulltext = "\n\n".join(history) + "\n\n" + invitation
+    #print('SENDING==========')
+    #print(fulltext)
+    #print('==========')
+    generated_text = ""
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    gen_in = tokenizer(fulltext, return_tensors="pt").input_ids.to(device)
+    in_tokens = len(gen_in)
+    with torch.no_grad():
+            generated_ids = generator(
+                gen_in,
+                max_new_tokens=200,
+                use_cache=True,
+                pad_token_id=tokenizer.eos_token_id,
+                num_return_sequences=1,
+                do_sample=True,
+                repetition_penalty=1.1, # 1.0 means 'off'. unfortunately if we penalize it it will not output Sphynx:
+                temperature=0.5, # default: 1.0
+                top_k = 50, # default: 50
+                top_p = 1.0, # default: 1.0
+                early_stopping=True,
+            )
+            generated_text = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0] # for some reason, batch_decode returns an array of one element?
+            text_without_prompt = generated_text[len(fulltext):]
+    response = text_without_prompt
+    response = response.split(human_invitation)[0]
+    response.strip()
+    print(invitation + response)
+    print("")
+    history.append(invitation + response)
+while True:
+    go()

test.py ADDED Viewed

	@@ -0,0 +1,328 @@

+import os
+import gc
+import torch
+import gradio as gr
+from transformers import LlamaTokenizer, LlamaForCausalLM, StoppingCriteria, StoppingCriteriaList
+# =============================
+# Configuration
+# =============================
+MODEL_PATH = r"C:\Users\JAY\Downloads\Chatdoc\ChatDoctor\pretrained"
+MAX_NEW_TOKENS = 200
+TEMPERATURE = 0.5
+TOP_K = 50
+REPETITION_PENALTY = 1.1
+# Detect device
+device = "cuda" if torch.cuda.is_available() else "cpu"
+print(f"Loading model from {MODEL_PATH} on {device}...")
+# =============================
+# Load Tokenizer and Model
+# =============================
+tokenizer = LlamaTokenizer.from_pretrained(MODEL_PATH)
+model = LlamaForCausalLM.from_pretrained(
+    MODEL_PATH,
+    device_map="auto",
+    torch_dtype=torch.float16,
+    low_cpu_mem_usage=True
+)
+generator = model.generate
+print("✅ ChatDoctor model loaded successfully!\n")
+# =============================
+# System Prompt
+# =============================
+SYSTEM_PROMPT = """
+You are ChatDoctor — a friendly, professional, and caring virtual doctor.
+Whenever a patient describes their symptoms:
+1. Always include a recommendation for diet, fluids, and proteins appropriate for recovery.
+   - Fruits: citrus (orange, lemon), kiwi, papaya
+   - Vegetables: leafy greens, carrots, spinach
+   - Fluids: warm soups, herbal teas, coconut water
+   - Proteins: boiled eggs, lentils, fish, chicken soup
+   - Extras: garlic, ginger, turmeric
+2. Recommend safe over-the-counter medicines if applicable (e.g., paracetamol for fever).
+3. Ask follow-up questions if needed to understand the patient's condition better.
+4. Always encourage the patient to see a real doctor if symptoms persist, worsen, or are serious.
+5. Provide clear, warm, and empathetic advice.
+6. Make your response structured and easy to understand.
+7. Even if the patient only mentions a symptom, always include diet, fluids, protein, and care suggestions automatically.
+"""
+# =============================
+# Stopping Criteria
+# =============================
+class StopOnTokens(StoppingCriteria):
+    def __init__(self, stop_ids):
+        self.stop_ids = stop_ids
+    def __call__(self, input_ids, scores, **kwargs):
+        for stop_id_seq in self.stop_ids:
+            if len(stop_id_seq) == 1:
+                if input_ids[0][-1] == stop_id_seq[0]:
+                    return True
+            else:
+                if len(input_ids[0]) >= len(stop_id_seq):
+                    if input_ids[0][-len(stop_id_seq):].tolist() == stop_id_seq:
+                        return True
+        return False
+# =============================
+# Chat History (Global)
+# =============================
+conversation_history = []
+# =============================
+# Get Response Function
+# =============================
+def get_response(user_input, history_context):
+    """Generate response from ChatDoctor model"""
+    # Build conversation from history
+    history_text = []
+    for human, assistant in history_context:
+        if human:
+            history_text.append("Patient: " + human)
+        if assistant:
+            history_text.append("ChatDoctor: " + assistant)
+    # Add current user input
+    history_text.append("Patient: " + user_input)
+    # Build full prompt including system instructions
+    prompt = SYSTEM_PROMPT + "\n\nConversation so far:\n" + "\n".join(history_text) + "\nChatDoctor:"
+    input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
+    # Define stop words and their token IDs
+    stop_words = ["Patient:", "\nPatient:", "Patient :", "\n\nPatient"]
+    stop_ids = [tokenizer.encode(word, add_special_tokens=False) for word in stop_words]
+    stopping_criteria = StoppingCriteriaList([StopOnTokens(stop_ids)])
+    # Generate model response
+    with torch.no_grad():
+        output_ids = generator(
+            input_ids,
+            max_new_tokens=MAX_NEW_TOKENS,
+            do_sample=True,
+            temperature=TEMPERATURE,
+            top_k=TOP_K,
+            repetition_penalty=REPETITION_PENALTY,
+            stopping_criteria=stopping_criteria,
+            pad_token_id=tokenizer.eos_token_id,
+            eos_token_id=tokenizer.eos_token_id
+        )
+    # Decode and clean response
+    full_output = tokenizer.decode(output_ids[0], skip_special_tokens=True)
+    response = full_output[len(prompt):].strip()
+    # Remove any "Patient:" that might have slipped through
+    for stop_word in ["Patient:", "Patient :", "\nPatient:", "\nPatient", "Patient"]:
+        if stop_word in response:
+            response = response.split(stop_word)[0].strip()
+            break
+    # Free memory
+    del input_ids, output_ids
+    gc.collect()
+    torch.cuda.empty_cache()
+    return response
+# =============================
+# Gradio Chat Function
+# =============================
+def chat_function(message, history):
+    """Gradio chat interface function"""
+    if not message.strip():
+        return ""
+    try:
+        response = get_response(message, history)
+        return response
+    except Exception as e:
+        return f"Error: {str(e)}"
+# =============================
+# Custom CSS
+# =============================
+custom_css = """
+#header {
+    text-align: center;
+    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+    color: white;
+    padding: 20px;
+    border-radius: 10px;
+    margin-bottom: 20px;
+}
+#header h1 {
+    margin: 0;
+    font-size: 2.5em;
+}
+#header p {
+    margin: 10px 0 0 0;
+    font-size: 1.1em;
+    opacity: 0.9;
+}
+.disclaimer {
+    background-color: #fff3cd;
+    border: 1px solid #ffc107;
+    border-radius: 8px;
+    padding: 15px;
+    margin: 20px 0;
+    color: #856404;
+}
+.disclaimer h3 {
+    margin-top: 0;
+    color: #856404;
+}
+footer {
+    text-align: center;
+    margin-top: 30px;
+    color: #666;
+    font-size: 0.9em;
+}
+"""
+# =============================
+# Gradio Interface
+# =============================
+with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
+    # Header
+    gr.HTML("""
+        <div id="header">
+            <h1>🩺 ChatDoctor AI Assistant</h1>
+            <p>Your AI-powered medical conversation partner</p>
+        </div>
+    """)
+    # Disclaimer
+    gr.HTML("""
+        <div class="disclaimer">
+            <h3>⚠️ Medical Disclaimer</h3>
+            <p><strong>Important:</strong> This AI assistant is for informational and educational purposes only.
+            It is NOT a substitute for professional medical advice, diagnosis, or treatment.
+            Always seek the advice of your physician or other qualified health provider with any questions
+            you may have regarding a medical condition. Never disregard professional medical advice or
+            delay in seeking it because of something you have read here.</p>
+        </div>
+    """)
+    # Chatbot Interface
+    chatbot = gr.Chatbot(
+        height=500,
+        placeholder="<div style='text-align: center; padding: 40px;'><h3>👋 Welcome to ChatDoctor!</h3><p>I'm here to discuss your health concerns. How can I assist you today?</p></div>",
+        show_label=False,
+        avatar_images=(None, "🤖"),
+    )
+    with gr.Row():
+        msg = gr.Textbox(
+            placeholder="Type your message here... (e.g., 'I have a headache')",
+            show_label=False,
+            scale=9,
+            container=False
+        )
+        submit_btn = gr.Button("Send 📤", scale=1, variant="primary")
+    with gr.Row():
+        clear_btn = gr.Button("🗑️ Clear Chat", scale=1)
+        retry_btn = gr.Button("🔄 Retry", scale=1)
+    # Examples
+    gr.Examples(
+        examples=[
+            "I have a persistent headache for 3 days. What should I do?",
+            "What are the symptoms of diabetes?",
+            "How can I improve my sleep quality?",
+            "I have a fever and sore throat. Should I be concerned?",
+            "What are some natural ways to reduce stress?",
+        ],
+        inputs=msg,
+        label="💡 Example Questions"
+    )
+    # Settings (collapsed by default)
+    with gr.Accordion("⚙️ Advanced Settings", open=False):
+        temperature_slider = gr.Slider(
+            minimum=0.1,
+            maximum=1.0,
+            value=TEMPERATURE,
+            step=0.1,
+            label="Temperature (Creativity)",
+            info="Higher values make responses more creative but less focused"
+        )
+        max_tokens_slider = gr.Slider(
+            minimum=50,
+            maximum=500,
+            value=MAX_NEW_TOKENS,
+            step=50,
+            label="Max Response Length",
+            info="Maximum number of tokens in response"
+        )
+        top_k_slider = gr.Slider(
+            minimum=1,
+            maximum=100,
+            value=TOP_K,
+            step=1,
+            label="Top K",
+            info="Limits vocabulary selection"
+        )
+    # Footer
+    gr.HTML(f"""
+        <footer>
+            <p>Powered by ChatDoctor Model | Built with Gradio</p>
+            <p>Device: {device.upper()} | Model: LLaMA-based Medical AI</p>
+        </footer>
+    """)
+    # Event handlers
+    def user_message(user_msg, history):
+        return "", history + [[user_msg, None]]
+    def bot_response(history, temp, max_tok, top_k_val):
+        global TEMPERATURE, MAX_NEW_TOKENS, TOP_K
+        TEMPERATURE = temp
+        MAX_NEW_TOKENS = int(max_tok)
+        TOP_K = int(top_k_val)
+        user_msg = history[-1][0]
+        bot_msg = chat_function(user_msg, history[:-1])
+        history[-1][1] = bot_msg
+        return history
+    # Connect events
+    msg.submit(user_message, [msg, chatbot], [msg, chatbot], queue=False).then(
+        bot_response, [chatbot, temperature_slider, max_tokens_slider, top_k_slider], chatbot
+    )
+    submit_btn.click(user_message, [msg, chatbot], [msg, chatbot], queue=False).then(
+        bot_response, [chatbot, temperature_slider, max_tokens_slider, top_k_slider], chatbot
+    )
+    clear_btn.click(lambda: None, None, chatbot, queue=False)
+    def retry_last():
+        return None
+    retry_btn.click(retry_last, None, chatbot, queue=False)
+# =============================
+# Launch Interface
+# =============================
+if __name__ == "__main__":
+    print("\n🚀 Launching ChatDoctor Gradio Interface...")
+    demo.queue()
+    demo.launch(
+        server_name="0.0.0.0",  # Accessible from network
+        server_port=7860,
+        share=False,  # Set to True to create public link
+        show_error=True
+    )

train.py ADDED Viewed

	@@ -0,0 +1,231 @@

+#    Copyright 2023 Rohan Taori, Ishaan Gulrajani, Tianyi Zhang, Yann Dubois, Xuechen Li
+#
+#    Licensed under the Apache License, Version 2.0 (the "License");
+#    you may not use this file except in compliance with the License.
+#    You may obtain a copy of the License at
+#
+#        http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+import copy
+import logging
+from dataclasses import dataclass, field
+from typing import Optional, Dict, Sequence
+import torch
+import transformers
+from torch.utils.data import Dataset
+from transformers import Trainer
+import utils
+IGNORE_INDEX = -100
+DEFAULT_PAD_TOKEN = "[PAD]"
+DEFAULT_EOS_TOKEN = "</s>"
+DEFAULT_BOS_TOKEN = "</s>"
+DEFAULT_UNK_TOKEN = "</s>"
+PROMPT_DICT = {
+    "prompt_input": (
+        "Below is an instruction that describes a task, paired with an input that provides further context. "
+        "Write a response that appropriately completes the request.\n\n"
+        "### Instruction:\n{instruction}\n\n### Input:\n{input}\n\n### Response:"
+    ),
+    "prompt_no_input": (
+        "Below is an instruction that describes a task. "
+        "Write a response that appropriately completes the request.\n\n"
+        "### Instruction:\n{instruction}\n\n### Response:"
+    ),
+}
+@dataclass
+class ModelArguments:
+    model_name_or_path: Optional[str] = field(default="facebook/opt-125m")
+@dataclass
+class DataArguments:
+    data_path: str = field(default=None, metadata={"help": "Path to the training data."})
+@dataclass
+class TrainingArguments(transformers.TrainingArguments):
+    cache_dir: Optional[str] = field(default=None)
+    optim: str = field(default="adamw_torch")
+    model_max_length: int = field(
+        default=512,
+        metadata={"help": "Maximum sequence length. Sequences will be right padded (and possibly truncated)."},
+    )
+def safe_save_model_for_hf_trainer(trainer: transformers.Trainer, output_dir: str):
+    """Collects the state dict and dump to disk."""
+    state_dict = trainer.model.state_dict()
+    if trainer.args.should_save:
+        cpu_state_dict = {key: value.cpu() for key, value in state_dict.items()}
+        del state_dict
+        trainer._save(output_dir, state_dict=cpu_state_dict)  # noqa
+def smart_tokenizer_and_embedding_resize(
+    special_tokens_dict: Dict,
+    tokenizer: transformers.PreTrainedTokenizer,
+    model: transformers.PreTrainedModel,
+):
+    """Resize tokenizer and embedding.
+    Note: This is the unoptimized version that may make your embedding size not be divisible by 64.
+    """
+    num_new_tokens = tokenizer.add_special_tokens(special_tokens_dict)
+    model.resize_token_embeddings(len(tokenizer))
+    if num_new_tokens > 0:
+        input_embeddings = model.get_input_embeddings().weight.data
+        output_embeddings = model.get_output_embeddings().weight.data
+        input_embeddings_avg = input_embeddings[:-num_new_tokens].mean(dim=0, keepdim=True)
+        output_embeddings_avg = output_embeddings[:-num_new_tokens].mean(dim=0, keepdim=True)
+        input_embeddings[-num_new_tokens:] = input_embeddings_avg
+        output_embeddings[-num_new_tokens:] = output_embeddings_avg
+def _tokenize_fn(strings: Sequence[str], tokenizer: transformers.PreTrainedTokenizer) -> Dict:
+    """Tokenize a list of strings."""
+    tokenized_list = [
+        tokenizer(
+            text,
+            return_tensors="pt",
+            padding="longest",
+            max_length=tokenizer.model_max_length,
+            truncation=True,
+        )
+        for text in strings
+    ]
+    input_ids = labels = [tokenized.input_ids[0] for tokenized in tokenized_list]
+    input_ids_lens = labels_lens = [
+        tokenized.input_ids.ne(tokenizer.pad_token_id).sum().item() for tokenized in tokenized_list
+    ]
+    return dict(
+        input_ids=input_ids,
+        labels=labels,
+        input_ids_lens=input_ids_lens,
+        labels_lens=labels_lens,
+    )
+def preprocess(
+    sources: Sequence[str],
+    targets: Sequence[str],
+    tokenizer: transformers.PreTrainedTokenizer,
+) -> Dict:
+    """Preprocess the data by tokenizing."""
+    examples = [s + t for s, t in zip(sources, targets)]
+    examples_tokenized, sources_tokenized = [_tokenize_fn(strings, tokenizer) for strings in (examples, sources)]
+    input_ids = examples_tokenized["input_ids"]
+    labels = copy.deepcopy(input_ids)
+    for label, source_len in zip(labels, sources_tokenized["input_ids_lens"]):
+        label[:source_len] = IGNORE_INDEX
+    return dict(input_ids=input_ids, labels=labels)
+class SupervisedDataset(Dataset):
+    """Dataset for supervised fine-tuning."""
+    def __init__(self, data_path: str, tokenizer: transformers.PreTrainedTokenizer):
+        super(SupervisedDataset, self).__init__()
+        logging.warning("Loading data...")
+        list_data_dict = utils.jload(data_path)
+        logging.warning("Formatting inputs...")
+        prompt_input, prompt_no_input = PROMPT_DICT["prompt_input"], PROMPT_DICT["prompt_no_input"]
+        sources = [
+            prompt_input.format_map(example) if example.get("input", "") != "" else prompt_no_input.format_map(example)
+            for example in list_data_dict
+        ]
+        targets = [f"{example['output']}{tokenizer.eos_token}" for example in list_data_dict]
+        logging.warning("Tokenizing inputs... This may take some time...")
+        data_dict = preprocess(sources, targets, tokenizer)
+        self.input_ids = data_dict["input_ids"]
+        self.labels = data_dict["labels"]
+    def __len__(self):
+        return len(self.input_ids)
+    def __getitem__(self, i) -> Dict[str, torch.Tensor]:
+        return dict(input_ids=self.input_ids[i], labels=self.labels[i])
+@dataclass
+class DataCollatorForSupervisedDataset(object):
+    """Collate examples for supervised fine-tuning."""
+    tokenizer: transformers.PreTrainedTokenizer
+    def __call__(self, instances: Sequence[Dict]) -> Dict[str, torch.Tensor]:
+        input_ids, labels = tuple([instance[key] for instance in instances] for key in ("input_ids", "labels"))
+        input_ids = torch.nn.utils.rnn.pad_sequence(
+            input_ids, batch_first=True, padding_value=self.tokenizer.pad_token_id
+        )
+        labels = torch.nn.utils.rnn.pad_sequence(labels, batch_first=True, padding_value=IGNORE_INDEX)
+        return dict(
+            input_ids=input_ids,
+            labels=labels,
+            attention_mask=input_ids.ne(self.tokenizer.pad_token_id),
+        )
+def make_supervised_data_module(tokenizer: transformers.PreTrainedTokenizer, data_args) -> Dict:
+    """Make dataset and collator for supervised fine-tuning."""
+    train_dataset = SupervisedDataset(tokenizer=tokenizer, data_path=data_args.data_path)
+    data_collator = DataCollatorForSupervisedDataset(tokenizer=tokenizer)
+    return dict(train_dataset=train_dataset, eval_dataset=None, data_collator=data_collator)
+def train():
+    parser = transformers.HfArgumentParser((ModelArguments, DataArguments, TrainingArguments))
+    model_args, data_args, training_args = parser.parse_args_into_dataclasses()
+    model = transformers.AutoModelForCausalLM.from_pretrained(
+        model_args.model_name_or_path,
+        cache_dir=training_args.cache_dir,
+    )
+    tokenizer = transformers.AutoTokenizer.from_pretrained(
+        model_args.model_name_or_path,
+        cache_dir=training_args.cache_dir,
+        model_max_length=training_args.model_max_length,
+        padding_side="right",
+        use_fast=False,
+    )
+    if tokenizer.pad_token is None:
+        smart_tokenizer_and_embedding_resize(
+            special_tokens_dict=dict(pad_token=DEFAULT_PAD_TOKEN),
+            tokenizer=tokenizer,
+            model=model,
+        )
+    if "llama" in model_args.model_name_or_path:
+        tokenizer.add_special_tokens(
+            {
+                "eos_token": DEFAULT_EOS_TOKEN,
+                "bos_token": DEFAULT_BOS_TOKEN,
+                "unk_token": DEFAULT_UNK_TOKEN,
+            }
+        )
+    data_module = make_supervised_data_module(tokenizer=tokenizer, data_args=data_args)
+    trainer = Trainer(model=model, tokenizer=tokenizer, args=training_args, **data_module)
+    trainer.train()
+    trainer.save_state()
+    safe_save_model_for_hf_trainer(trainer=trainer, output_dir=training_args.output_dir)
+if __name__ == "__main__":
+    train()

train_lora.py ADDED Viewed

	@@ -0,0 +1,321 @@

+import os
+import sys
+from typing import List
+import fire
+import torch
+import transformers
+from datasets import load_dataset
+from typing import List, Optional, Union
+"""
+Unused imports:
+import torch.nn as nn
+import bitsandbytes as bnb
+"""
+from peft import (  # noqa: E402
+    LoraConfig,
+    BottleneckConfig,
+    get_peft_model,
+    get_peft_model_state_dict,
+    prepare_model_for_int8_training,
+    set_peft_model_state_dict,
+)
+from transformers import AutoModelForCausalLM, AutoTokenizer, LLaMATokenizer  # noqa: F402
+def train(
+        # model/data params
+        base_model: str = "",  # the only required argument
+        data_path: str = "yahma/alpaca-cleaned",
+        output_dir: str = "./lora-alpaca",
+        adapter_name: str = "lora",
+        # training hyperparams
+        batch_size: int = 128,
+        micro_batch_size: int = 4,
+        num_epochs: int = 3,
+        learning_rate: float = 3e-4,
+        cutoff_len: int = 256,
+        val_set_size: int = 2000,
+        use_gradient_checkpointing: bool = False,
+        eval_step: int = 200,
+        save_step: int = 200,
+        # lora hyperparams
+        lora_r: int = 8,
+        lora_alpha: int = 16,
+        lora_dropout: float = 0.05,
+        lora_target_modules: List[str] = None,
+        # bottleneck adapter hyperparams
+        bottleneck_size: int = 256,
+        non_linearity: str = "tanh",
+        adapter_dropout: float = 0.0,
+        use_parallel_adapter: bool = False,
+        use_adapterp: bool = False,
+        target_modules: List[str] = None,
+        scaling: Union[float, str] = 1.0,
+        # llm hyperparams
+        train_on_inputs: bool = True,  # if False, masks out inputs in loss
+        group_by_length: bool = False,  # faster, but produces an odd training loss curve
+        # wandb params
+        wandb_project: str = "",
+        wandb_run_name: str = "",
+        wandb_watch: str = "",  # options: false | gradients | all
+        wandb_log_model: str = "",  # options: false | true
+        resume_from_checkpoint: str = None,  # either training checkpoint or final adapter
+):
+    print(
+        f"Finetuning model with params:\n"
+        f"base_model: {base_model}\n"
+        f"data_path: {data_path}\n"
+        f"output_dir: {output_dir}\n"
+        f"batch_size: {batch_size}\n"
+        f"micro_batch_size: {micro_batch_size}\n"
+        f"num_epochs: {num_epochs}\n"
+        f"learning_rate: {learning_rate}\n"
+        f"cutoff_len: {cutoff_len}\n"
+        f"val_set_size: {val_set_size}\n"
+        f"use_gradient_checkpointing: {use_gradient_checkpointing}\n"
+        f"lora_r: {lora_r}\n"
+        f"lora_alpha: {lora_alpha}\n"
+        f"lora_dropout: {lora_dropout}\n"
+        f"lora_target_modules: {lora_target_modules}\n"
+        f"bottleneck_size: {bottleneck_size}\n"
+        f"non_linearity: {non_linearity}\n"
+        f"adapter_dropout: {adapter_dropout}\n"
+        f"use_parallel_adapter: {use_parallel_adapter}\n"
+        f"use_adapterp: {use_adapterp}\n"
+        f"train_on_inputs: {train_on_inputs}\n"
+        f"scaling: {scaling}\n"
+        f"adapter_name: {adapter_name}\n"
+        f"target_modules: {target_modules}\n"
+        f"group_by_length: {group_by_length}\n"
+        f"wandb_project: {wandb_project}\n"
+        f"wandb_run_name: {wandb_run_name}\n"
+        f"wandb_watch: {wandb_watch}\n"
+        f"wandb_log_model: {wandb_log_model}\n"
+        f"resume_from_checkpoint: {resume_from_checkpoint}\n"
+    )
+    assert (
+        base_model
+    ), "Please specify a --base_model, e.g. --base_model='decapoda-research/LLaMA-7b-hf'"
+    gradient_accumulation_steps = batch_size // micro_batch_size
+    device_map = "auto"
+    world_size = int(os.environ.get("WORLD_SIZE", 1))
+    ddp = world_size != 1
+    if ddp:
+        device_map = {"": int(os.environ.get("LOCAL_RANK") or 0)}
+        gradient_accumulation_steps = gradient_accumulation_steps // world_size
+    # Check if parameter passed or if set within environ
+    use_wandb = len(wandb_project) > 0 or (
+            "WANDB_PROJECT" in os.environ and len(os.environ["WANDB_PROJECT"]) > 0
+    )
+    # Only overwrite environ if wandb param passed
+    if len(wandb_project) > 0:
+        os.environ["WANDB_PROJECT"] = wandb_project
+    if len(wandb_watch) > 0:
+        os.environ["WANDB_WATCH"] = wandb_watch
+    if len(wandb_log_model) > 0:
+        os.environ["WANDB_LOG_MODEL"] = wandb_log_model
+    model = AutoModelForCausalLM.from_pretrained(
+        base_model,
+        load_in_8bit=True,
+        torch_dtype=torch.float16,
+        device_map=device_map,
+    )
+    if model.config.model_type == "LLaMA":
+        # Due to the name of transformers' LLaMATokenizer, we have to do this
+        tokenizer = LLaMATokenizer.from_pretrained(base_model)
+    else:
+        tokenizer = AutoTokenizer.from_pretrained(base_model)
+    tokenizer.pad_token_id = (
+        0  # unk. we want this to be different from the eos token
+    )
+    tokenizer.padding_side = "left"  # Allow batched inference
+    def tokenize(prompt, add_eos_token=True):
+        # there's probably a way to do this with the tokenizer settings
+        # but again, gotta move fast
+        result = tokenizer(
+            prompt,
+            truncation=True,
+            max_length=cutoff_len,
+            padding=False,
+            return_tensors=None,
+        )
+        if (
+                result["input_ids"][-1] != tokenizer.eos_token_id
+                and len(result["input_ids"]) < cutoff_len
+                and add_eos_token
+        ):
+            result["input_ids"].append(tokenizer.eos_token_id)
+            result["attention_mask"].append(1)
+        result["labels"] = result["input_ids"].copy()
+        return result
+    def generate_and_tokenize_prompt(data_point):
+        full_prompt = generate_prompt(data_point)
+        tokenized_full_prompt = tokenize(full_prompt)
+        if not train_on_inputs:
+            user_prompt = generate_prompt({**data_point, "output": ""})
+            tokenized_user_prompt = tokenize(user_prompt, add_eos_token=False)
+            user_prompt_len = len(tokenized_user_prompt["input_ids"])
+            tokenized_full_prompt["labels"] = [
+                                                  -100
+                                              ] * user_prompt_len + tokenized_full_prompt["labels"][
+                                                                    user_prompt_len:
+                                                                    ]  # could be sped up, probably
+        return tokenized_full_prompt
+    model = prepare_model_for_int8_training(model, use_gradient_checkpointing=use_gradient_checkpointing)
+    if adapter_name == "lora":
+        config = LoraConfig(
+            r=lora_r,
+            lora_alpha=lora_alpha,
+            target_modules=lora_target_modules,
+            lora_dropout=lora_dropout,
+            bias="none",
+            task_type="CAUSAL_LM",
+        )
+    elif adapter_name == "bottleneck":
+        config = BottleneckConfig(
+            bottleneck_size=bottleneck_size,
+            non_linearity=non_linearity,
+            adapter_dropout=adapter_dropout,
+            use_parallel_adapter=use_parallel_adapter,
+            use_adapterp=use_adapterp,
+            target_modules=target_modules,
+            scaling=scaling,
+            bias="none",
+            task_type="CAUSAL_LM",
+        )
+    model = get_peft_model(model, config)
+    if data_path.endswith(".json"):  # todo: support jsonl
+        data = load_dataset("json", data_files=data_path)
+    else:
+        data = load_dataset(data_path)
+    if resume_from_checkpoint:
+        # Check the available weights and load them
+        checkpoint_name = os.path.join(
+            resume_from_checkpoint, "pytorch_model.bin"
+        )  # Full checkpoint
+        if not os.path.exists(checkpoint_name):
+            checkpoint_name = os.path.join(
+                resume_from_checkpoint, "adapter_model.bin"
+            )  # only LoRA model - LoRA config above has to fit
+            resume_from_checkpoint = (
+                False  # So the trainer won't try loading its state
+            )
+        # The two files above have a different name depending on how they were saved, but are actually the same.
+        if os.path.exists(checkpoint_name):
+            print(f"Restarting from {checkpoint_name}")
+            adapters_weights = torch.load(checkpoint_name)
+            model = set_peft_model_state_dict(model, adapters_weights)
+        else:
+            print(f"Checkpoint {checkpoint_name} not found")
+    model.print_trainable_parameters()  # Be more transparent about the % of trainable params.
+    if val_set_size > 0:
+        train_val = data["train"].train_test_split(
+            test_size=val_set_size, shuffle=True, seed=42
+        )
+        train_data = (
+            train_val["train"].shuffle().map(generate_and_tokenize_prompt)
+        )
+        val_data = (
+            train_val["test"].shuffle().map(generate_and_tokenize_prompt)
+        )
+    else:
+        train_data = data["train"].shuffle().map(generate_and_tokenize_prompt)
+        val_data = None
+    if not ddp and torch.cuda.device_count() > 1:
+        # keeps Trainer from trying its own DataParallelism when more than 1 gpu is available
+        model.is_parallelizable = True
+        model.model_parallel = True
+    trainer = transformers.Trainer(
+        model=model,
+        train_dataset=train_data,
+        eval_dataset=val_data,
+        args=transformers.TrainingArguments(
+            per_device_train_batch_size=micro_batch_size,
+            gradient_accumulation_steps=gradient_accumulation_steps,
+            warmup_steps=100,
+            num_train_epochs=num_epochs,
+            learning_rate=learning_rate,
+            fp16=True,
+            logging_steps=10,
+            optim="adamw_torch",
+            evaluation_strategy="steps" if val_set_size > 0 else "no",
+            save_strategy="steps",
+            eval_steps=eval_step if val_set_size > 0 else None,
+            save_steps=save_step,
+            output_dir=output_dir,
+            save_total_limit=3,
+            load_best_model_at_end=True if val_set_size > 0 else False,
+            ddp_find_unused_parameters=False if ddp else None,
+            group_by_length=group_by_length,
+            report_to="wandb" if use_wandb else None,
+            run_name=wandb_run_name if use_wandb else None,
+        ),
+        data_collator=transformers.DataCollatorForSeq2Seq(
+            tokenizer, pad_to_multiple_of=8, return_tensors="pt", padding=True
+        ),
+    )
+    model.config.use_cache = False
+    old_state_dict = model.state_dict
+    model.state_dict = (
+        lambda self, *_, **__: get_peft_model_state_dict(
+            self, old_state_dict()
+        )
+    ).__get__(model, type(model))
+    if torch.__version__ >= "2" and sys.platform != "win32":
+        model = torch.compile(model)
+    trainer.train(resume_from_checkpoint=resume_from_checkpoint)
+    model.save_pretrained(output_dir)
+    print(
+        "\n If there's a warning about missing keys above, please disregard :)"
+    )
+def generate_prompt(data_point):
+    # sorry about the formatting disaster gotta move fast
+    if data_point["input"]:
+        return f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
+                ### Instruction:
+                {data_point["instruction"]}
+                ### Input:
+                {data_point["input"]}
+                ### Response:
+                {data_point["output"]}""" # noqa: E501
+    else:
+        return f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
+                ### Instruction:
+                {data_point["instruction"]}
+                ### Response:
+                {data_point["output"]}""" # noqa: E501
+if __name__ == "__main__":
+    fire.Fire(train)

utils.py ADDED Viewed

	@@ -0,0 +1,174 @@

+import dataclasses
+import logging
+import math
+import os
+import io
+import sys
+import time
+import json
+from typing import Optional, Sequence, Union
+import openai
+import tqdm
+from openai import openai_object
+import copy
+StrOrOpenAIObject = Union[str, openai_object.OpenAIObject]
+openai.api_key =''
+openai_org = os.getenv("OPENAI_ORG")
+if openai_org is not None:
+    openai.organization = openai_org
+    logging.warning(f"Switching to organization: {openai_org} for OAI API key.")
+@dataclasses.dataclass
+class OpenAIDecodingArguments(object):
+    max_tokens: int = 1800
+    temperature: float = 0.2
+    top_p: float = 1.0
+    n: int = 1
+    stream: bool = False
+    stop: Optional[Sequence[str]] = None
+    presence_penalty: float = 0.0
+    frequency_penalty: float = 0.0
+    suffix: Optional[str] = None
+    logprobs: Optional[int] = None
+    echo: bool = False
+def openai_completion(
+    prompts: Union[str, Sequence[str], Sequence[dict[str, str]], dict[str, str]],
+    decoding_args: OpenAIDecodingArguments,
+    model_name="text-davinci-003",
+    sleep_time=2,
+    batch_size=1,
+    max_instances=sys.maxsize,
+    max_batches=sys.maxsize,
+    return_text=False,
+    **decoding_kwargs,
+) -> Union[Union[StrOrOpenAIObject], Sequence[StrOrOpenAIObject], Sequence[Sequence[StrOrOpenAIObject]],]:
+    """Decode with OpenAI API.
+    Args:
+        prompts: A string or a list of strings to complete. If it is a chat model the strings should be formatted
+            as explained here: https://github.com/openai/openai-python/blob/main/chatml.md. If it is a chat model
+            it can also be a dictionary (or list thereof) as explained here:
+            https://github.com/openai/openai-cookbook/blob/main/examples/How_to_format_inputs_to_ChatGPT_models.ipynb
+        decoding_args: Decoding arguments.
+        model_name: Model name. Can be either in the format of "org/model" or just "model".
+        sleep_time: Time to sleep once the rate-limit is hit.
+        batch_size: Number of prompts to send in a single request. Only for non chat model.
+        max_instances: Maximum number of prompts to decode.
+        max_batches: Maximum number of batches to decode. This argument will be deprecated in the future.
+        return_text: If True, return text instead of full completion object (which contains things like logprob).
+        decoding_kwargs: Additional decoding arguments. Pass in `best_of` and `logit_bias` if you need them.
+    Returns:
+        A completion or a list of completions.
+        Depending on return_text, return_openai_object, and decoding_args.n, the completion type can be one of
+            - a string (if return_text is True)
+            - an openai_object.OpenAIObject object (if return_text is False)
+            - a list of objects of the above types (if decoding_args.n > 1)
+    """
+    is_single_prompt = isinstance(prompts, (str, dict))
+    if is_single_prompt:
+        prompts = [prompts]
+    if max_batches < sys.maxsize:
+        logging.warning(
+            "`max_batches` will be deprecated in the future, please use `max_instances` instead."
+            "Setting `max_instances` to `max_batches * batch_size` for now."
+        )
+        max_instances = max_batches * batch_size
+    prompts = prompts[:max_instances]
+    num_prompts = len(prompts)
+    prompt_batches = [
+        prompts[batch_id * batch_size : (batch_id + 1) * batch_size]
+        for batch_id in range(int(math.ceil(num_prompts / batch_size)))
+    ]
+    completions = []
+    for batch_id, prompt_batch in tqdm.tqdm(
+        enumerate(prompt_batches),
+        desc="prompt_batches",
+        total=len(prompt_batches),
+    ):
+        batch_decoding_args = copy.deepcopy(decoding_args)  # cloning the decoding_args
+        while True:
+            try:
+                shared_kwargs = dict(
+                    model=model_name,
+                    **batch_decoding_args.__dict__,
+                    **decoding_kwargs,
+                )
+                completion_batch = openai.Completion.create(prompt=prompt_batch, **shared_kwargs)
+                choices = completion_batch.choices
+                for choice in choices:
+                    choice["total_tokens"] = completion_batch.usage.total_tokens
+                completions.extend(choices)
+                break
+            except openai.error.OpenAIError as e:
+                logging.warning(f"OpenAIError: {e}.")
+                if "Please reduce your prompt" in str(e):
+                    batch_decoding_args.max_tokens = int(batch_decoding_args.max_tokens * 0.8)
+                    logging.warning(f"Reducing target length to {batch_decoding_args.max_tokens}, Retrying...")
+                else:
+                    logging.warning("Hit request rate limit; retrying...")
+                    time.sleep(sleep_time)  # Annoying rate limit on requests.
+    if return_text:
+        completions = [completion.text for completion in completions]
+    if decoding_args.n > 1:
+        # make completions a nested list, where each entry is a consecutive decoding_args.n of original entries.
+        completions = [completions[i : i + decoding_args.n] for i in range(0, len(completions), decoding_args.n)]
+    if is_single_prompt:
+        # Return non-tuple if only 1 input and 1 generation.
+        (completions,) = completions
+    return completions
+def _make_w_io_base(f, mode: str):
+    if not isinstance(f, io.IOBase):
+        f_dirname = os.path.dirname(f)
+        if f_dirname != "":
+            os.makedirs(f_dirname, exist_ok=True)
+        f = open(f, mode=mode)
+    return f
+def _make_r_io_base(f, mode: str):
+    if not isinstance(f, io.IOBase):
+        f = open(f, mode=mode)
+    return f
+def jdump(obj, f, mode="w", indent=4, default=str):
+    """Dump a str or dictionary to a file in json format.
+    Args:
+        obj: An object to be written.
+        f: A string path to the location on disk.
+        mode: Mode for opening the file.
+        indent: Indent for storing json dictionaries.
+        default: A function to handle non-serializable entries; defaults to `str`.
+    """
+    f = _make_w_io_base(f, mode)
+    if isinstance(obj, (dict, list)):
+        json.dump(obj, f, indent=indent, default=default)
+    elif isinstance(obj, str):
+        f.write(obj)
+    else:
+        raise ValueError(f"Unexpected type: {type(obj)}")
+    f.close()
+def jload(f, mode="r"):
+    """Load a .json file into a dictionary."""
+    f = _make_r_io_base(f, mode)
+    jdict = json.load(f)
+    f.close()
+    return jdict