Spaces:

Wtvman
/

Real_Estate_Agent_Chatbot

Runtime error

+from .agent2_tenancy_faq import handle_tenancy_query
+# Image analysis + troubleshooting agent
+CLARITY_THRESHOLD = 0.1
+CLARIFYING_QUESTION_PROMPT = (
+    "I observed something in the image, but I'm not entirely sure what the issue is. "
+    "Could you tell me more about what concerns you in this image?"
+)
+from utils.captioning import ImageCaptioning
+from utils.llm_utils import LLaMAHelper
+captioner = ImageCaptioning()
+llm = LLaMAHelper()
+def handle_image_issue(user_input, image, history=[], context={}):
+    if context.get("last_caption_data"):
+        caption, confidence = context["last_caption_data"]
+        include_caption = False  # image was already processed
+    else:
+        caption, confidence = captioner.get_best_caption(image)
+        context["last_caption_data"] = (caption, confidence)
+        include_caption = True  # this is the first time we're seeing this image
+    if confidence < CLARITY_THRESHOLD:
+        return CLARIFYING_QUESTION_PROMPT
+    user_context = "\n".join(f"User: {q}\nBot: {a}" for q, a in history)
+    full_input = ""
+    if include_caption:
+        full_input += f"Possible Image description: {caption}\n"
+    full_input += (
+        f"User Input: {user_input}\n"
+        f"Previous context of the conversation (keep it in hindsight): {user_context}\n"
+    )
+    system_prompt = """You are a property expert who analyzes property images, user inputs, and context to identify visible issues and suggest practical fixes immediately.
+Your goals:
+- Identify any clear issue from the image or the current user input.
+- Suggest practical, actionable steps to fix or investigate the issue — **as soon as it's identifiable**.
+- Use the previous conversation context **to support your understanding**, but **always prioritize the most recent user input if it contradicts earlier context**.
+- Ask a follow-up question **only if you absolutely need more detail to provide a helpful or safe recommendation**.
+IMPORTANT:
+- You are speaking directly to the user — do not use third-person language.
+- Assume the user is always concerned with knowing the fixes to the problem being discussed or diagnosed. That’s what you should stay focused on.
+- Do not get carried away — focus on diagnosing the issue and providing clear fixes.
+- Previous conversation context is provided **only** to help you make better suggestions — **do not reference the "previous conversation" explicitly to the user**.
+- Your primary task is to help the user by giving practical suggestions, solutions, and fixes.
+- **Do not delay suggestions** if you already have enough information to make a confident recommendation.
+- If the user's latest input contradicts earlier context, **trust the current input and clarify only if needed**.
+- Avoid unnecessary follow-up questions — ask only if you truly need more details to help effectively.
+Now, analyze the image, user input, and context. Suggest a fix immediately if you can. Use context to support your response, but **always prioritize the user's most recent input**. Ask follow-up questions only if absolutely necessary and only when required to provide a helpful recommendation.
+"""
+    reply = llm.chat(system_prompt, full_input, temperature=0.46)
+    return reply

agents/agent2_tenancy_faq.py ADDED Viewed

	@@ -0,0 +1,52 @@

+from utils.llm_utils import LLaMAHelper
+import spacy
+from geotext import GeoText
+# Initialize LLaMA and spaCy
+llm = LLaMAHelper()
+nlp = spacy.load("en_core_web_sm")
+def extract_location(text, method="spacy"):
+    if method == "spacy":
+        doc = nlp(text)
+        locations = [ent.text for ent in doc.ents if ent.label_ == "GPE"]
+        return locations[0] if locations else ""
+    elif method == "geotext":
+        geo = GeoText(text)
+        locations = geo.countries + geo.cities
+        return locations[0] if locations else ""
+    return ""
+def get_cached_location_from_history(history, method="spacy"):
+    for question, _ in reversed(history):
+        location = extract_location(question, method)
+        if location:
+            return location
+    return ""
+def handle_tenancy_query(user_query, user_context, history=[], location_method="spacy"):
+    # Use stored location if available
+    location = user_context.get("location", "")
+    # Otherwise, extract from current or previous queries
+    if not location:
+        location = extract_location(user_query, location_method)
+        if not location:
+            location = get_cached_location_from_history(history, location_method)
+        if location:
+            user_context["location"] = location
+    system_prompt = "You are a legal assistant specializing in tenancy laws."
+    prompt=""
+    if location:
+        prompt += f" The user is from {location}."
+    if history:
+        chat_context = "\n".join(f"User: {q}\nBot: {a}" for q, a in history)
+        prompt += f"\n\nPrevious conversation:\n{chat_context}"
+    prompt += f"\n\nUser's current question: {user_query}\n\nGive a concise and helpful answer. If needed, ask a follow-up question to clarify."
+    print(f'prompt for tenacy faq is {prompt}')
+    reply = llm.chat(system_prompt, prompt, temperature=0.7)
+    return reply

app.py ADDED Viewed

	@@ -0,0 +1,137 @@

+import gradio as gr
+from utils.routing import route_agent
+from agents.agent1_image_issue import handle_image_issue
+from agents.agent2_tenancy_faq import handle_tenancy_query
+from PIL import Image
+import torch
+import hashlib
+# Helper to generate MD5 hash from image
+def get_image_hash(image):
+    return hashlib.md5(image.tobytes()).hexdigest()
+# Main query handler
+def handle_query(user_input, image=None, location='', history=[], context={}):
+    try:
+        response_ui_msg = ""
+        # Initialize context if missing
+        context.setdefault("images", [])
+        context.setdefault("image_hashes", [])
+        context.setdefault("last_agent", None)
+        context.setdefault("last_caption_data", None)
+        # If there's a new image
+        if image is not None:
+            new_hash = get_image_hash(image)
+            if len(context["image_hashes"]) == 0 or new_hash != context["image_hashes"][-1]:
+                context["images"].append(image)
+                context["image_hashes"].append(new_hash)
+                context["location"] = ""
+                context["last_caption_data"] = None  # Reset cached caption
+                response_ui_msg = "(New image attached. Starting image-related discussion.)"
+        # If image is removed mid-chat
+        if image is None and context["images"]:
+            response_ui_msg = "(Image removed. Continuing as text-only query.)"
+        # Use location if no image context
+        if not context["images"] and location:
+            context["location"] = location
+        # Determine which agent should handle the query
+        is_image_context = bool(context["images"])
+        agent = route_agent(user_input, is_image_context)
+        # Agent switch handling
+        if context["last_agent"] == 'agent1' and agent == 'agent2':
+            response_ui_msg += "\n(Switching to tenancy discussion based on your query...)"
+        elif context["last_agent"] == 'agent2' and agent == 'agent1':
+            response_ui_msg += "\n(Detected switch to image-based issue. Starting a new conversation...)"
+            history.clear()
+            context.clear()
+            context["images"] = [image] if image else []
+            context["image_hashes"] = [get_image_hash(image)] if image else []
+            context["last_caption_data"] = None
+            context["last_agent"] = None
+            context["location"] = location or ""
+        # Update current agent
+        context["last_agent"] = agent
+        # Run the correct agent
+        if agent == 'agent1':
+            if context["images"]:
+                result = handle_image_issue(user_input, context["images"][-1], history, context)
+            else:
+                result = "No image found to analyze."
+        else:
+            result = handle_tenancy_query(user_input, {"location": context.get("location")}, history)
+        # Add message to response
+        if response_ui_msg:
+            result = f"{response_ui_msg}\n\n{result}"
+        history.append((user_input, result))
+        return result, history, context, "🟢 Chat Ongoing"
+    except RuntimeError as e:
+        if "CUDA out of memory" in str(e):
+            error_msg = "⚠️ CUDA Out of Memory! Please try again later or reduce the image size."
+            return error_msg, history, context, "🔴 Error"
+        else:
+            raise e
+# Reset function
+def reset_chat():
+    return "", "", None, [], {"location": "", "images": [], "image_hashes": []}, "🟡 New Chat Started", ""
+# Clear just the conversation history
+def clear_chat_history():
+    return [], "", "🧹 Chat history cleared"
+# Build the Gradio interface
+with gr.Blocks() as demo:
+    conversation_history = gr.State([])
+    user_context = gr.State({"location": "", "images": [], "image_hashes": []})
+    session_state = gr.State("🟡 New Chat Started")
+    gr.Markdown("# 🏠 Multi-Agent Real Estate Chatbot")
+    gr.Markdown("Ask about property issues (with images) or tenancy questions!")
+    with gr.Row():
+        with gr.Column():
+            user_input = gr.Textbox(label="Enter your question:")
+            location_input = gr.Textbox(label="Enter your city or country (optional):")
+            image_input = gr.Image(type="pil", label="Upload an image (optional):")
+            submit_btn = gr.Button("Submit")
+            new_chat_btn = gr.Button("🔁 Start New Chat")
+            clear_history_btn = gr.Button("🧹 Clear Chat History")
+        with gr.Column():
+            chatbot_output = gr.Textbox(label="Chatbot Response:", interactive=False, lines=8)
+            session_indicator = gr.Textbox(label="Session Status", interactive=False)
+    # Hook button logic
+    submit_btn.click(
+        handle_query,
+        inputs=[user_input, image_input, location_input, conversation_history, user_context],
+        outputs=[chatbot_output, conversation_history, user_context, session_indicator]
+    )
+    new_chat_btn.click(
+        reset_chat,
+        inputs=[],
+        outputs=[user_input, location_input, image_input, conversation_history, user_context, session_indicator, chatbot_output]
+    )
+    clear_history_btn.click(
+        clear_chat_history,
+        inputs=[],
+        outputs=[conversation_history, chatbot_output, session_indicator]
+    )
+# Launch app
+demo.launch(share=True)

config/city_law_data.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "New York": {"notice_period": "30 days"}
+}

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+streamlit
+torch
+transformers
+Pillow
+gradio
+ultralytics
+spacy
+geotext

utils/__pycache__/captioning.cpython-311.pyc ADDED Viewed

Binary file (6.7 kB). View file

utils/__pycache__/llm_utils.cpython-311.pyc ADDED Viewed

Binary file (2.31 kB). View file

utils/__pycache__/routing.cpython-311.pyc ADDED Viewed

Binary file (845 Bytes). View file

utils/captioning.py ADDED Viewed

	@@ -0,0 +1,107 @@

+from transformers import BlipProcessor, BlipForConditionalGeneration, AutoProcessor, AutoModelForCausalLM
+from PIL import Image
+import torch
+from ultralytics import YOLO  # You need to install: pip install ultralytics
+from transformers import CLIPProcessor, CLIPModel
+class ImageCaptioning:
+    def __init__(self):
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        # Load BLIP
+        self.blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
+        self.blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to(self.device)
+        # Load GIT
+        self.git_processor = AutoProcessor.from_pretrained("microsoft/git-base")
+        self.git_model = AutoModelForCausalLM.from_pretrained("microsoft/git-base").to(self.device)
+        self.clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
+        self.clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(self.device)
+        # # Load YOLO
+        # self.yolo_model = YOLO("yolov8n.pt")  # You can use yolov8s.pt or others
+    def generate_caption_blip(self, image):
+        inputs = self.blip_processor(images=image, return_tensors="pt").to(self.device)
+        print(f"Inputs keys: {inputs.keys()}")
+        with torch.no_grad():
+            output = self.blip_model.generate(**inputs)
+        caption = self.blip_processor.decode(output[0], skip_special_tokens=True)
+        return caption, self.compute_logprob(self.blip_model, inputs, output, self.blip_processor)
+    def generate_caption_git(self, image):
+        inputs = self.git_processor(images=image, return_tensors="pt").to(self.device)
+        print(f"Inputs keys: {inputs.keys()}")
+        with torch.no_grad():
+            generated_ids = self.git_model.generate(**inputs)
+        caption = self.git_processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+        return caption, self.compute_logprob(self.git_model, inputs, generated_ids, self.git_processor)
+    # def generate_caption_yolo(self, image):
+    #     # Run YOLO detection
+    #     results = self.yolo_model(image)
+    #     detections = results[0].boxes.data  # [x1, y1, x2, y2, conf, class]
+    #     names = results[0].names
+    #     if len(detections) == 0:
+    #         return "No objects detected", 0.0
+    #     # Get top class labels with confidence
+    #     label_conf_pairs = [(names[int(cls)], float(conf)) for *_, conf, cls in detections]
+    #     label_conf_pairs.sort(key=lambda x: x[1], reverse=True)
+    #     top_labels = list({label for label, _ in label_conf_pairs[:5]})  # top 5 unique labels
+    #     avg_conf = sum([conf for _, conf in label_conf_pairs[:5]]) / len(top_labels)
+    #     caption = "Image contains: " + ", ".join(top_labels)
+    #     return caption, avg_conf
+    def generate_caption_clip(self, image):
+        # Step 1: Generate caption candidates
+        caption_blip = self.generate_caption_blip(image)
+        caption_git = self.generate_caption_git(image)
+        candidates = [caption_blip, caption_git]
+        # Extract text-only for CLIP scoring
+        captions_only = [c[0] for c in candidates]
+        # Step 2: Score them with CLIP
+        inputs = self.clip_processor(text=captions_only, images=image, return_tensors="pt", padding=True).to(self.device)
+        with torch.no_grad():
+            outputs = self.clip_model(**inputs)
+            scores = outputs.logits_per_image[0]  # shape: (num_captions,)
+            scores = scores.softmax(dim=0)  # optional: normalize scores
+        best_idx = scores.argmax().item()
+        best_caption = candidates[best_idx]
+        best_score = scores[best_idx].item()
+        return best_caption[0], best_score  # returning the caption text and score
+    def compute_logprob(self, model, inputs, generated_ids, processor):
+        # Decode the generated tokens to text
+        caption_text = processor.decode(generated_ids[0], skip_special_tokens=True)
+        # Tokenize the caption (text) to get labels and input_ids
+        text_inputs = processor(text=caption_text, return_tensors="pt").to(self.device)
+        labels = text_inputs["input_ids"]
+        # Combine image inputs with the new input_ids (needed for loss computation)
+        model_inputs = {**inputs, "input_ids": text_inputs["input_ids"]}
+        # Compute the loss
+        with torch.no_grad():
+            outputs = model(**model_inputs, labels=labels)
+        return -outputs.loss.item()  # Higher is better
+    def get_best_caption(self, image):
+    # This runs BLIP and GIT, then scores both with CLIP to pick the best caption
+        caption, score = self.generate_caption_clip(image)
+        print(f"Selected Caption: {caption} | Confidence: {score}")
+        return caption, score

utils/llm_utils.py ADDED Viewed

	@@ -0,0 +1,45 @@

+from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
+import torch
+import os
+class LLaMAHelper:
+    def __init__(self, hf_token=None):
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.model_id = "meta-llama/Llama-3.2-3B-Instruct"
+        hf_token = hf_token or os.getenv("HUGGINGFACE_TOKEN")
+        self.tokenizer = AutoTokenizer.from_pretrained(self.model_id, token=hf_token)
+        self.model = AutoModelForCausalLM.from_pretrained(
+            self.model_id,
+            token=hf_token,
+            torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
+        ).to(self.device)
+        self.pipe = pipeline(
+            "text-generation",
+            model=self.model,
+            tokenizer=self.tokenizer,
+            device=0 if torch.cuda.is_available() else -1
+        )
+    #     self.text_classifier = pipeline("zero-shot-classification", model="MoritzLaurer/DeBERTa-v3-base-mnli-fever-anli")
+    # def classifier(self, text, candidate_labels):
+    #     return self.text_classifier(text, candidate_labels)
+    def chat(self, system_prompt, prompt, max_new_tokens=300, temperature=0.5):
+        messages = [
+                    {"role": "system", "content": system_prompt},
+                    {"role": "user", "content": prompt},
+                ]
+        outputs = self.pipe(messages, max_new_tokens=max_new_tokens, do_sample=True, temperature=temperature)
+        if outputs:
+            if "content" in outputs[0]["generated_text"][-1]:
+                full_response = outputs[0]["generated_text"][-1]["content"].lower()
+            else:
+                full_response = outputs[0]["generated_text"][-1].lower()
+        print('response from LLM is', full_response)
+        return full_response.replace(prompt, "").strip()

utils/routing.py ADDED Viewed

	@@ -0,0 +1,32 @@

+# agent_router.py
+import re
+# Router to manage multi-agent classification
+# agent_router.py
+import re
+def route_agent(text, has_image):
+    """
+    Determines which agent should handle the query based on image presence and content type.
+    """
+    if has_image:
+        return "agent1"  # Image-based input => Image issue agent
+    # Check for tenancy-related keywords
+    tenancy_keywords = [
+        "rent", "lease", "tenant", "landlord", "agreement", "deposit",
+        "eviction", "notice", "contract", "housing law", "tenancy", "sublet"
+    ]
+    # Lowercase and look for keywords
+    if any(word in text.lower() for word in tenancy_keywords):
+        return "agent2"
+    # Fallback — you could route this to a clarification step instead
+    return "agent2"
+def clarify_prompt():
+    return (
+        "Just to clarify, are you asking about a visible issue with a property (you can also upload an image),\n"
+        "or is this a general question about renting, laws, or agreements?"
+    )