Spaces:

dedp
/

harbor

Sleeping

App Files Files Community

amitashukla commited on Mar 14

Commit

e169c4c

1 Parent(s): 6b71f6c

use merged finetuned Qwen 2.5 model

Browse files

Files changed (4) hide show

app.py +8 -51
requirements.txt +6 -5
src/chat.py +74 -71
src/config.py +5 -1

app.py CHANGED Viewed

@@ -1,16 +1,5 @@
 """
 Gradio Web Interface for Harbor Treatment Navigation Chatbot
-Landing page offers three paths:
-1. Quick Recommendations — enter a zip code, get nearby options inline
-2. Talk to a Human — compact crisis callout with phone number
-3. Get Personalized Advice — leads to the AI chatbot
-Run locally:
-    python app.py
-Access in browser:
-    http://localhost:7860
 """
 import os
@@ -240,12 +229,10 @@ ZIPCODE_RE = re.compile(r"^\d{5}$")
 def is_valid_zip(zipcode: str) -> bool:
-    """Return True if zipcode is exactly 5 digits."""
     return bool(ZIPCODE_RE.match(zipcode.strip()))
 def _load_resources_once():
-    """Load resource CSVs once and cache."""
     if not hasattr(_load_resources_once, "_cache"):
         current_dir = os.path.dirname(os.path.abspath(__file__))
         paths = [
@@ -257,23 +244,14 @@ def _load_resources_once():
 def get_recommendations(zipcode: str) -> list[dict]:
-    """
-    Return a list of treatment recommendations for the given zip code.
-    Uses the same filter/score logic as the chatbot, but with a minimal
-    profile containing only the zipcode.
-    """
     profile = create_empty_profile()
     profile["logistics"]["zipcode"] = zipcode.strip()
     resources = _load_resources_once()
     filtered = filter_resources(resources, profile)
-    top = score_resources(filtered, profile)
-    return top
 def format_recommendations(zipcode: str, results: list[dict]) -> str:
-    """Render recommendations as an HTML snippet for display."""
     if not results:
         return (
             f"<div class='harbor-results'>"
@@ -287,12 +265,10 @@ def format_recommendations(zipcode: str, results: list[dict]) -> str:
     items_html = ""
     for r in results:
         name = r.get("name", "Unknown Facility")
-        # Build address from parts
         addr_parts = [r.get("address", ""), r.get("city", ""),
                       r.get("state", ""), r.get("zip", "")]
         address = ", ".join(p.strip() for p in addr_parts if p.strip())
         phone = r.get("phone", "").strip()
-        # Type from primary_focus
         focus = r.get("primary_focus", "").strip()
         type_label = ", ".join(
             v.strip().replace("_", " ").title() for v in focus.split("|")
@@ -324,25 +300,13 @@ def format_recommendations(zipcode: str, results: list[dict]) -> str:
 # ── App ───────────────────────────────────────────────────────────────────────
 def create_chatbot():
-    """Creates the Harbor interface with a landing page and chatbot."""
-    _load_resources_once()          # pre-load CSVs so first zip lookup is fast
     chatbot = Chatbot()
     def chat(message, history):
-        """
-        Generate a response for the current message.
-        Args:
-            message (str): The current message from the user
-            history (list): List of previous [user, assistant] message pairs
-        Returns:
-            str: The assistant's response
-        """
         return chatbot.get_response(message)
     def handle_zip_submit(zipcode: str):
-        """Validate zip and return inline results HTML."""
         zipcode = zipcode.strip()
         if not is_valid_zip(zipcode):
             return gr.update(
@@ -350,15 +314,12 @@ def create_chatbot():
                 visible=True,
             )
         results = get_recommendations(zipcode)
-        # Log recommendations to console
         if results:
             print(f"[Harbor] Zip lookup ({zipcode}) — {len(results)} recommendation(s):")
             for i, r in enumerate(results, 1):
                 print(f"  {i}. {r.get('name', 'Unknown')} — {r.get('city', '')}, {r.get('state', '')} {r.get('zip', '')}")
         else:
             print(f"[Harbor] Zip lookup ({zipcode}) — no results found.")
         return gr.update(value=format_recommendations(zipcode, results), visible=True)
     def show_chat():
@@ -369,12 +330,10 @@ def create_chatbot():
     with gr.Blocks(title="Harbor", theme=THEME, css=CSS) as demo:
-        # ── Landing Page ──────────────────────────────────────────────
         with gr.Column(visible=True) as landing_page:
             with gr.Column(elem_classes="harbor-wrap"):
                 gr.HTML(HEADER_MD)
-                # Card 1 — Quick Recommendations (featured)
                 with gr.Group(elem_classes="harbor-card harbor-card-featured"):
                     gr.HTML("<div class='harbor-card-title'>📍 Find Options Near You</div>")
                     gr.HTML(
@@ -395,14 +354,10 @@ def create_chatbot():
                             scale=1,
                             elem_classes="harbor-zip-btn",
                         )
-                # Results rendered outside the card so the loading spinner
-                # does not overlay the input card above.
                 results_html = gr.HTML(visible=False, elem_id="zip-results")
-                # Card 2 — Crisis callout (compact)
                 gr.HTML(CRISIS_CALLOUT_HTML)
-                # Card 3 — Chatbot
                 with gr.Group(elem_classes="harbor-card"):
                     gr.HTML(CHATBOT_CARD_MD)
                     start_chat_btn = gr.Button(
@@ -414,7 +369,6 @@ def create_chatbot():
                 gr.HTML(FOOTER_MD)
-        # ── Chat Page ─────────────────────────────────────────────────
         with gr.Column(visible=False) as chat_page:
             with gr.Column(elem_classes="chat-header"):
                 back_btn = gr.Button(
@@ -438,7 +392,6 @@ def create_chatbot():
                 ],
             )
-        # ── Events ────────────────────────────────────────────────────
         zip_btn.click(handle_zip_submit, inputs=zip_input, outputs=results_html)
         zip_input.submit(handle_zip_submit, inputs=zip_input, outputs=results_html)
         start_chat_btn.click(show_chat, outputs=[landing_page, chat_page])
@@ -448,5 +401,9 @@ def create_chatbot():
 if __name__ == "__main__":
-    demo = create_chatbot()
-    demo.launch()

 """
 Gradio Web Interface for Harbor Treatment Navigation Chatbot
 """
 import os
 def is_valid_zip(zipcode: str) -> bool:
     return bool(ZIPCODE_RE.match(zipcode.strip()))
 def _load_resources_once():
     if not hasattr(_load_resources_once, "_cache"):
         current_dir = os.path.dirname(os.path.abspath(__file__))
         paths = [
 def get_recommendations(zipcode: str) -> list[dict]:
     profile = create_empty_profile()
     profile["logistics"]["zipcode"] = zipcode.strip()
     resources = _load_resources_once()
     filtered = filter_resources(resources, profile)
+    return score_resources(filtered, profile)
 def format_recommendations(zipcode: str, results: list[dict]) -> str:
     if not results:
         return (
             f"<div class='harbor-results'>"
     items_html = ""
     for r in results:
         name = r.get("name", "Unknown Facility")
         addr_parts = [r.get("address", ""), r.get("city", ""),
                       r.get("state", ""), r.get("zip", "")]
         address = ", ".join(p.strip() for p in addr_parts if p.strip())
         phone = r.get("phone", "").strip()
         focus = r.get("primary_focus", "").strip()
         type_label = ", ".join(
             v.strip().replace("_", " ").title() for v in focus.split("|")
 # ── App ───────────────────────────────────────────────────────────────────────
 def create_chatbot():
+    _load_resources_once()
     chatbot = Chatbot()
     def chat(message, history):
         return chatbot.get_response(message)
     def handle_zip_submit(zipcode: str):
         zipcode = zipcode.strip()
         if not is_valid_zip(zipcode):
             return gr.update(
                 visible=True,
             )
         results = get_recommendations(zipcode)
         if results:
             print(f"[Harbor] Zip lookup ({zipcode}) — {len(results)} recommendation(s):")
             for i, r in enumerate(results, 1):
                 print(f"  {i}. {r.get('name', 'Unknown')} — {r.get('city', '')}, {r.get('state', '')} {r.get('zip', '')}")
         else:
             print(f"[Harbor] Zip lookup ({zipcode}) — no results found.")
         return gr.update(value=format_recommendations(zipcode, results), visible=True)
     def show_chat():
     with gr.Blocks(title="Harbor", theme=THEME, css=CSS) as demo:
         with gr.Column(visible=True) as landing_page:
             with gr.Column(elem_classes="harbor-wrap"):
                 gr.HTML(HEADER_MD)
                 with gr.Group(elem_classes="harbor-card harbor-card-featured"):
                     gr.HTML("<div class='harbor-card-title'>📍 Find Options Near You</div>")
                     gr.HTML(
                             scale=1,
                             elem_classes="harbor-zip-btn",
                         )
                 results_html = gr.HTML(visible=False, elem_id="zip-results")
                 gr.HTML(CRISIS_CALLOUT_HTML)
                 with gr.Group(elem_classes="harbor-card"):
                     gr.HTML(CHATBOT_CARD_MD)
                     start_chat_btn = gr.Button(
                 gr.HTML(FOOTER_MD)
         with gr.Column(visible=False) as chat_page:
             with gr.Column(elem_classes="chat-header"):
                 back_btn = gr.Button(
                 ],
             )
         zip_btn.click(handle_zip_submit, inputs=zip_input, outputs=results_html)
         zip_input.submit(handle_zip_submit, inputs=zip_input, outputs=results_html)
         start_chat_btn.click(show_chat, outputs=[landing_page, chat_page])
 if __name__ == "__main__":
+    try:
+        demo = create_chatbot()
+        demo.launch()
+    except Exception as e:
+        import traceback
+        traceback.print_exc()

requirements.txt CHANGED Viewed

@@ -1,5 +1,6 @@
-gradio==5.23.3
-huggingface_hub
-python-dotenv
-pandas
-requests

+gradio>=4.0.0
+transformers>=5.0.0
+torch>=2.0.0
+accelerate>=0.26.0
+huggingface_hub>=0.20.0
+python-dotenv

src/chat.py CHANGED Viewed

@@ -1,121 +1,124 @@
-from huggingface_hub import InferenceClient
-from src.config import BASE_MODEL, MY_MODEL, HF_TOKEN
 import os
-from src.utils.profile import load_schema, create_empty_profile, extract_profile_updates, merge_profile, profile_to_summary
 from src.utils.resources import load_resources, filter_resources, score_resources, format_recommendations
 class Chatbot:
     def __init__(self):
-        """
-        Initialize the chatbot with a HF model ID
-        """
-        model_id = MY_MODEL if MY_MODEL else BASE_MODEL # define MY_MODEL in config.py if you create a new model in the HuggingFace Hub
-        self.client = InferenceClient(model=model_id, token=HF_TOKEN)
-        # Initialize user profile
         current_dir = os.path.dirname(os.path.abspath(__file__))
         data_dir = os.path.join(current_dir, '..', 'data')
         self.profile_schema = load_schema(os.path.join(data_dir, 'user_profile_schema.json'))
         self.user_profile = create_empty_profile()
-        # Load treatment resources once
         knowledge_dir = os.path.join(data_dir, '..', 'references', 'knowledge')
-        resources_paths = [
             os.path.join(knowledge_dir, 'ma_resources.csv'),
             os.path.join(knowledge_dir, 'boston_resources.csv'),
-        ]
-        self.resources = load_resources(resources_paths)
-    def update_profile(self, user_input):
-        """
-        Scan user input for profile-relevant information and merge it
-        into the running user profile.
-        Args:
-            user_input (str): The user's message text.
-        """
         updates = extract_profile_updates(self.profile_schema, user_input)
         merge_profile(self.user_profile, updates)
-    def format_prompt(self, user_input):
-        """
-        Format the user's input into a list of chat messages with system context.
-        Updates the user profile with any new information detected.
-        This method:
-        1. Loads system prompt from system_prompt.md
-        2. Updates user profile from schema-based keyword matching
-        3. Injects profile summary into the system prompt so the model knows what's been gathered
-        4. Returns a list of message dicts for the chat completion API
-        Args:
-            user_input (str): The user's question
-        Returns:
-            list[dict]: A list of message dicts with 'role' and 'content' keys
-        """
-        # Get the directory where this file is located
         current_dir = os.path.dirname(os.path.abspath(__file__))
-        # Load system prompt
         system_prompt_path = os.path.join(current_dir, '../data/system_prompt.md')
         with open(system_prompt_path, 'r', encoding='utf-8') as f:
             system_prompt = f.read().strip()
-        # Update user profile from this message
         self.update_profile(user_input)
-        # Build profile summary for the prompt
         profile_summary = profile_to_summary(self.user_profile)
-        # Build system message with profile context
         system_content = system_prompt
         if profile_summary:
-            system_content = system_content + "\n\n" + profile_summary
-        # Return structured messages for chat completion API
-        messages = [
             {"role": "system", "content": system_content},
             {"role": "user", "content": user_input},
         ]
-        return messages
-    def get_response(self, user_input):
-        """
-        Generate a response to the user's question, with resource recommendations
-        appended when the user profile contains enough information to match.
-        Args:
-            user_input (str): The user's question
-        Returns:
-            str: The chatbot's response, optionally followed by top 3 resources
-        """
-        # 1. Format messages (also updates profile)
         messages = self.format_prompt(user_input)
-        # 2. Generate LLM response via chat completion API
-        result = self.client.chat_completion(
-            messages=messages,
-            max_tokens=512,
             temperature=0.7,
         )
-        response = result.choices[0].message.content.strip()
-        # 3. Filter resources by profile, score, and append top 3
         filtered = filter_resources(self.resources, self.user_profile)
         top_resources = score_resources(filtered, self.user_profile)
         recommendations = format_recommendations(top_resources)
-        # Log recommendations to console
         if top_resources:
-            print(f"[Harbor] Chat recommendations ({len(top_resources)}) for profile:")
             for i, r in enumerate(top_resources, 1):
                 print(f"  {i}. {r.get('name', 'Unknown')} — {r.get('city', '')}, {r.get('state', '')} {r.get('zip', '')}")
         else:
             print("[Harbor] No recommendations matched current profile.")
         if recommendations:
-            response = response + "\n\n" + recommendations
         return response

 import os
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
+from src.config import BASE_MODEL, MY_MODEL, LOCAL_MODEL, HF_TOKEN
+from src.utils.profile import (
+    load_schema, create_empty_profile,
+    extract_profile_updates, merge_profile, profile_to_summary,
+)
 from src.utils.resources import load_resources, filter_resources, score_resources, format_recommendations
+def _load_pipeline(model_id: str):
+    """Load a text-generation pipeline, using the best available device."""
+    print(f"[Harbor] Loading model: {model_id}")
+    tokenizer = AutoTokenizer.from_pretrained(model_id, token=HF_TOKEN)
+    if torch.cuda.is_available():
+        dtype = torch.bfloat16
+        device_map = "auto"
+        device = None
+        device_label = "CUDA"
+    elif torch.backends.mps.is_available():
+        # bitsandbytes does not support MPS; float16 on 18 GB can OOM.
+        # Fall back to CPU with float32.
+        dtype = torch.float32
+        device_map = None
+        device = -1
+        device_label = "CPU"
+    else:
+        dtype = torch.float32
+        device_map = None
+        device = -1
+        device_label = "CPU"
+    model = AutoModelForCausalLM.from_pretrained(
+        model_id,
+        dtype=dtype,
+        device_map=device_map,
+        token=HF_TOKEN,
+    )
+    pipe = pipeline(
+        "text-generation",
+        model=model,
+        tokenizer=tokenizer,
+        device=device,
+    )
+    print(f"[Harbor] Model ready on {device_label}: {model_id}")
+    return pipe, tokenizer
 class Chatbot:
     def __init__(self):
+        # LOCAL_MODEL is used for local development to avoid OOM on 18 GB machines.
+        # On HF Spaces (CUDA), MY_MODEL (the merged finetuned model) is used.
+        model_id = LOCAL_MODEL or MY_MODEL or BASE_MODEL
+        self.pipe, self.tokenizer = _load_pipeline(model_id)
         current_dir = os.path.dirname(os.path.abspath(__file__))
         data_dir = os.path.join(current_dir, '..', 'data')
         self.profile_schema = load_schema(os.path.join(data_dir, 'user_profile_schema.json'))
         self.user_profile = create_empty_profile()
         knowledge_dir = os.path.join(data_dir, '..', 'references', 'knowledge')
+        self.resources = load_resources([
             os.path.join(knowledge_dir, 'ma_resources.csv'),
             os.path.join(knowledge_dir, 'boston_resources.csv'),
+        ])
+    def update_profile(self, user_input: str):
         updates = extract_profile_updates(self.profile_schema, user_input)
         merge_profile(self.user_profile, updates)
+    def format_prompt(self, user_input: str) -> list[dict]:
         current_dir = os.path.dirname(os.path.abspath(__file__))
         system_prompt_path = os.path.join(current_dir, '../data/system_prompt.md')
         with open(system_prompt_path, 'r', encoding='utf-8') as f:
             system_prompt = f.read().strip()
         self.update_profile(user_input)
         profile_summary = profile_to_summary(self.user_profile)
         system_content = system_prompt
         if profile_summary:
+            system_content += "\n\n" + profile_summary
+        return [
             {"role": "system", "content": system_content},
             {"role": "user", "content": user_input},
         ]
+    def get_response(self, user_input: str) -> str:
         messages = self.format_prompt(user_input)
+        prompt = self.tokenizer.apply_chat_template(
+            messages,
+            tokenize=False,
+            add_generation_prompt=True,
+        )
+        output = self.pipe(
+            prompt,
+            max_new_tokens=512,
             temperature=0.7,
+            do_sample=True,
+            return_full_text=False,
         )
+        response = output[0]["generated_text"].strip()
         filtered = filter_resources(self.resources, self.user_profile)
         top_resources = score_resources(filtered, self.user_profile)
         recommendations = format_recommendations(top_resources)
         if top_resources:
+            print(f"[Harbor] {len(top_resources)} recommendation(s) for current profile:")
             for i, r in enumerate(top_resources, 1):
                 print(f"  {i}. {r.get('name', 'Unknown')} — {r.get('city', '')}, {r.get('state', '')} {r.get('zip', '')}")
         else:
             print("[Harbor] No recommendations matched current profile.")
         if recommendations:
+            response += "\n\n" + recommendations
         return response

src/config.py CHANGED Viewed

@@ -12,6 +12,10 @@ BASE_MODEL = "Qwen/Qwen2.5-7B-Instruct"
 # BASE_MODEL = "HuggingFaceH4/zephyr-7b-beta"       # ungated
 # If you finetune the model or change it in any way, save it to huggingface hub, then set MY_MODEL to your model ID. The model ID is in the format "your-username/your-model-name".
-MY_MODEL = "" #"amitashukla/harbor-qwn25-lora"
 HF_TOKEN = os.getenv("HF_TOKEN")

 # BASE_MODEL = "HuggingFaceH4/zephyr-7b-beta"       # ungated
 # If you finetune the model or change it in any way, save it to huggingface hub, then set MY_MODEL to your model ID. The model ID is in the format "your-username/your-model-name".
+MY_MODEL = "amitashukla/harbor-qwn25-merged"
+# Used locally to avoid OOM on 18 GB unified memory.
+# Set to None (or remove) when deploying to HF Spaces.
+LOCAL_MODEL = None #"Qwen/Qwen2.5-1.5B-Instruct"
 HF_TOKEN = os.getenv("HF_TOKEN")