Spaces:
Sleeping
Sleeping
| import os | |
| import comet_llm | |
| from opik import track | |
| # ✅ Set all relevant cache directories to a writable location | |
| os.environ["HF_HOME"] = "/tmp/cache" | |
| os.environ["TRANSFORMERS_CACHE"] = "/tmp/cache/transformers" | |
| os.environ["SENTENCE_TRANSFORMERS_HOME"] = "/tmp/cache/sentence_transformers" | |
| os.environ["HF_DATASETS_CACHE"] = "/tmp/cache/hf_datasets" | |
| os.environ["TORCH_HOME"] = "/tmp/cache/torch" | |
| # ✅ Create the directories if they don't exist | |
| for path in [ | |
| "/tmp/cache", | |
| "/tmp/cache/transformers", | |
| "/tmp/cache/sentence_transformers", | |
| "/tmp/cache/hf_datasets", | |
| "/tmp/cache/torch" | |
| ]: | |
| os.makedirs(path, exist_ok=True) | |
| import json | |
| import torch | |
| import openai | |
| import os | |
| from sentence_transformers import SentenceTransformer, util | |
| import streamlit as st | |
| from pathlib import Path | |
| # === CONFIG === | |
| # Set the API key | |
| client = openai.OpenAI(api_key=os.getenv("OPENAI_API_KEY")) | |
| #openai.api_key = os.getenv("OPENAI_API_KEY") | |
| # REMEDI_PATH = "ReMeDi-base.json" | |
| BASE_DIR = Path(__file__).parent | |
| REMEDI_PATH = BASE_DIR / "ReMeDi-base.json" | |
| # Check if file exists | |
| if not REMEDI_PATH.exists(): | |
| raise FileNotFoundError(f"❌ File not found: {REMEDI_PATH}") | |
| # Load the file | |
| with open(REMEDI_PATH, "r", encoding="utf-8") as f: | |
| data = json.load(f) | |
| # === LOAD EMBEDDING MODEL === | |
| def load_model(): | |
| return SentenceTransformer("sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2") | |
| #return model | |
| def load_data(): | |
| with open(REMEDI_PATH, "r", encoding="utf-8") as f: | |
| data = json.load(f) | |
| dialogue_pairs = [] | |
| for conversation in data: | |
| turns = conversation["information"] | |
| for i in range(len(turns)-1): | |
| if turns[i]["role"] == "patient" and turns[i+1]["role"] == "doctor": | |
| dialogue_pairs.append({ | |
| "patient": turns[i]["sentence"], | |
| "doctor": turns[i+1]["sentence"] | |
| }) | |
| return dialogue_pairs | |
| # ------------------------- | |
| # Build cached embeddings | |
| # ------------------------- | |
| def build_embeddings(dialogue_pairs, _model): | |
| patient_sentences = [pair["patient"] for pair in dialogue_pairs] | |
| embeddings = _model.encode(patient_sentences, convert_to_tensor=True) | |
| return embeddings | |
| # === TRANSLATE USING GPT === | |
| #@track | |
| #def translate_to_chinese(english_text: str) -> str: | |
| # """Translate English -> Chinese using OpenAI. Returns Chinese string or raises.""" | |
| # if client is None: | |
| # return english_text # fallback: no translation possible | |
| # prompt = f"Translate the following medical symptom description into natural Chinese (simplified):\n\n{english_text}" | |
| #try: | |
| # resp = client.chat.completions.create( | |
| # model="gpt-3.5-turbo", # using cheaper model for quick response, | |
| # messages=[{"role": "user", "content": prompt}], | |
| # temperature=0.0, | |
| # ) | |
| # return resp.choices[0].message.content.strip() | |
| # except Exception as e: | |
| # st.error(f"Translation (EN->ZH) failed: {e}") | |
| # return english_text | |
| def translate_to_english(chinese_text): | |
| prompt = f"Translate the following Chinese medical response to English:\n\n{chinese_text}" | |
| try: | |
| response = client.chat.completions.create( | |
| model="gpt-4", | |
| messages=[{"role": "user", "content": prompt}], | |
| temperature=0.2, | |
| ) | |
| return response.choices[0].message.content | |
| #return response.choices[0].message["content"].strip() | |
| except Exception as e: | |
| return f"Translation failed: {str(e)}" | |
| def gpt_direct_response(user_input): | |
| prompt = f"You are a knowledgeable and compassionate medical assistant. Answer the following patient question clearly and concisely:\n\n{user_input}" | |
| try: | |
| response = client.chat.completions.create( | |
| model="gpt-4", # or "gpt-3.5-turbo" to save credits | |
| messages=[{"role": "user", "content": prompt}], | |
| temperature=0.5 | |
| ) | |
| return response.choices[0].message.content | |
| except Exception as e: | |
| return f"GPT response failed: {str(e)}" | |
| # === CHATBOT FUNCTION === | |
| # ------------------------- | |
| # Retrieval function (translates input -> Chinese then embed & search) | |
| # ------------------------- | |
| #Chatbot retrieval function | |
| def chatbot_response(user_input, _model, dialogue_pairs, patient_embeddings, top_k=1): | |
| """ | |
| Returns dict containing: | |
| - matched_patient (Chinese) | |
| - original_doctor (Chinese) | |
| - translated_doctor (English) | |
| """ | |
| # Directly embed the user query in English (multilingual model handles it) | |
| user_embedding = _model.encode(user_input, convert_to_tensor=True) | |
| # Cosine similarity search | |
| similarities = util.cos_sim(user_embedding, patient_embeddings)[0] | |
| top_scores, top_indices = torch.topk(similarities, k=top_k) | |
| top_idx = top_indices[0].item() | |
| match = dialogue_pairs[top_idx] | |
| translated_patient = translate_to_english(match["patient"]) | |
| translated = translate_to_english(match["doctor"]) | |
| return { | |
| "matched_question": match["patient"], | |
| "translated_patient_question": translated_patient, | |
| "original_response": match["doctor"], | |
| "translated_response": translated, | |
| "similarity_score": top_scores[0].item() | |
| } | |
| # === MAIN APP === | |
| st.set_page_config(page_title="Dr_Q_bot", layout="centered") | |
| st.title("🩺 Dr_Q_bot - Medical Chatbot") | |
| st.write("Ask about a symptom and get an example doctor response and enhanced GPT-4 LLM Doctor's response") | |
| # Add author info in the sidebar | |
| with st.sidebar: | |
| st.markdown("## 👤👤Authors") | |
| st.markdown("**Vasan Iyer**") | |
| st.markdown("**Eric J Giacomucci**") | |
| st.markdown("[GitHub](https://github.com/Vaiy108)") | |
| st.markdown("[LinkedIn](https://linkedin.com/in/vasan-iyer)") | |
| # Load resources | |
| model = load_model() | |
| dialogue_pairs = load_data() | |
| patient_embeddings = build_embeddings(dialogue_pairs, model) | |
| # Chat UI | |
| user_input = st.text_input("Describe your symptom or question in English:") | |
| if st.button("Submit") and user_input: | |
| with st.spinner("Thinking..."): | |
| # Retrieval (direct multilingual search) | |
| result = chatbot_response(user_input, model, dialogue_pairs, patient_embeddings) | |
| # GPT direct answer | |
| gpt_response = gpt_direct_response(user_input) | |
| # Display GPT response | |
| st.markdown("## ✅ GPT-4 Doctor's Response") | |
| st.success(gpt_response) | |
| st.markdown("---") | |
| st.markdown("## Retrieval Example (from ReMeDi dataset)") | |
| # Check similarity threshold before showing retrieval | |
| if result.get("similarity_score", 0) >= 0.4: | |
| st.markdown("## Example Historical Dialogue") | |
| st.markdown("### 🧑⚕️ Matched or Closest Patient Question") | |
| st.write(result["matched_question"]) | |
| st.markdown("### 🌐 Translated Patient Question (English)") | |
| st.success(result["translated_patient_question"]) | |
| st.markdown("### 🩺 🇨🇳 Original Doctor Response (Chinese)") | |
| st.write(result["original_response"]) | |
| st.markdown("### 🌐 Translated Doctor Response (English)") | |
| st.success(result["translated_response"]) | |
| else: | |
| st.warning("No close match found in dataset. Using GPT response only.") | |
| st.markdown("---") | |
| st.warning("This chatbot uses real dialogue data for research and educational use only. Not a substitute for professional medical advice.") | |
| # Small debug / utility | |
| #st.caption("Model: all-MiniLM-L6-v2 · Retrieval threshold: {:.2f}".format(SIMILARITY_THRESHOLD)) |