AsherKnight commited on
Commit
bddd1de
·
1 Parent(s): bebed16

Initial commit

Browse files
.huggingface/spaces.yaml ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ sdk: streamlit
2
+ hardware: gpu
agents/__pycache__/agent1_image_issue.cpython-311.pyc ADDED
Binary file (1.67 kB). View file
 
agents/__pycache__/agent2_tenancy_faq.cpython-311.pyc ADDED
Binary file (2.85 kB). View file
 
agents/agent1_image_issue.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .agent2_tenancy_faq import handle_tenancy_query
2
+ # Image analysis + troubleshooting agent
3
+ CLARITY_THRESHOLD = 0.1
4
+
5
+ CLARIFYING_QUESTION_PROMPT = (
6
+ "I observed something in the image, but I'm not entirely sure what the issue is. "
7
+ "Could you tell me more about what concerns you in this image?"
8
+ )
9
+
10
+ from utils.captioning import ImageCaptioning
11
+ from utils.llm_utils import LLaMAHelper
12
+
13
+ captioner = ImageCaptioning()
14
+ llm = LLaMAHelper()
15
+
16
+ def handle_image_issue(user_input, image, history=[], context={}):
17
+ if context.get("last_caption_data"):
18
+ caption, confidence = context["last_caption_data"]
19
+ include_caption = False # image was already processed
20
+ else:
21
+ caption, confidence = captioner.get_best_caption(image)
22
+ context["last_caption_data"] = (caption, confidence)
23
+ include_caption = True # this is the first time we're seeing this image
24
+
25
+ if confidence < CLARITY_THRESHOLD:
26
+ return CLARIFYING_QUESTION_PROMPT
27
+
28
+ user_context = "\n".join(f"User: {q}\nBot: {a}" for q, a in history)
29
+
30
+ full_input = ""
31
+ if include_caption:
32
+ full_input += f"Possible Image description: {caption}\n"
33
+ full_input += (
34
+ f"User Input: {user_input}\n"
35
+ f"Previous context of the conversation (keep it in hindsight): {user_context}\n"
36
+ )
37
+
38
+ system_prompt = """You are a property expert who analyzes property images, user inputs, and context to identify visible issues and suggest practical fixes immediately.
39
+
40
+ Your goals:
41
+ - Identify any clear issue from the image or the current user input.
42
+ - Suggest practical, actionable steps to fix or investigate the issue — **as soon as it's identifiable**.
43
+ - Use the previous conversation context **to support your understanding**, but **always prioritize the most recent user input if it contradicts earlier context**.
44
+ - Ask a follow-up question **only if you absolutely need more detail to provide a helpful or safe recommendation**.
45
+
46
+ IMPORTANT:
47
+ - You are speaking directly to the user — do not use third-person language.
48
+ - Assume the user is always concerned with knowing the fixes to the problem being discussed or diagnosed. That’s what you should stay focused on.
49
+ - Do not get carried away — focus on diagnosing the issue and providing clear fixes.
50
+ - Previous conversation context is provided **only** to help you make better suggestions — **do not reference the "previous conversation" explicitly to the user**.
51
+ - Your primary task is to help the user by giving practical suggestions, solutions, and fixes.
52
+ - **Do not delay suggestions** if you already have enough information to make a confident recommendation.
53
+ - If the user's latest input contradicts earlier context, **trust the current input and clarify only if needed**.
54
+ - Avoid unnecessary follow-up questions — ask only if you truly need more details to help effectively.
55
+
56
+ Now, analyze the image, user input, and context. Suggest a fix immediately if you can. Use context to support your response, but **always prioritize the user's most recent input**. Ask follow-up questions only if absolutely necessary and only when required to provide a helpful recommendation.
57
+ """
58
+
59
+
60
+ reply = llm.chat(system_prompt, full_input, temperature=0.46)
61
+ return reply
agents/agent2_tenancy_faq.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from utils.llm_utils import LLaMAHelper
2
+ import spacy
3
+ from geotext import GeoText
4
+
5
+ # Initialize LLaMA and spaCy
6
+ llm = LLaMAHelper()
7
+ nlp = spacy.load("en_core_web_sm")
8
+
9
+ def extract_location(text, method="spacy"):
10
+ if method == "spacy":
11
+ doc = nlp(text)
12
+ locations = [ent.text for ent in doc.ents if ent.label_ == "GPE"]
13
+ return locations[0] if locations else ""
14
+ elif method == "geotext":
15
+ geo = GeoText(text)
16
+ locations = geo.countries + geo.cities
17
+ return locations[0] if locations else ""
18
+ return ""
19
+
20
+ def get_cached_location_from_history(history, method="spacy"):
21
+ for question, _ in reversed(history):
22
+ location = extract_location(question, method)
23
+ if location:
24
+ return location
25
+ return ""
26
+
27
+ def handle_tenancy_query(user_query, user_context, history=[], location_method="spacy"):
28
+ # Use stored location if available
29
+ location = user_context.get("location", "")
30
+
31
+ # Otherwise, extract from current or previous queries
32
+ if not location:
33
+ location = extract_location(user_query, location_method)
34
+ if not location:
35
+ location = get_cached_location_from_history(history, location_method)
36
+
37
+ if location:
38
+ user_context["location"] = location
39
+
40
+ system_prompt = "You are a legal assistant specializing in tenancy laws."
41
+ prompt=""
42
+ if location:
43
+ prompt += f" The user is from {location}."
44
+
45
+ if history:
46
+ chat_context = "\n".join(f"User: {q}\nBot: {a}" for q, a in history)
47
+ prompt += f"\n\nPrevious conversation:\n{chat_context}"
48
+
49
+ prompt += f"\n\nUser's current question: {user_query}\n\nGive a concise and helpful answer. If needed, ask a follow-up question to clarify."
50
+ print(f'prompt for tenacy faq is {prompt}')
51
+ reply = llm.chat(system_prompt, prompt, temperature=0.7)
52
+ return reply
app.py ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from utils.routing import route_agent
3
+ from agents.agent1_image_issue import handle_image_issue
4
+ from agents.agent2_tenancy_faq import handle_tenancy_query
5
+ from PIL import Image
6
+ import torch
7
+ import hashlib
8
+
9
+ # Helper to generate MD5 hash from image
10
+ def get_image_hash(image):
11
+ return hashlib.md5(image.tobytes()).hexdigest()
12
+
13
+ # Main query handler
14
+ def handle_query(user_input, image=None, location='', history=[], context={}):
15
+ try:
16
+ response_ui_msg = ""
17
+
18
+ # Initialize context if missing
19
+ context.setdefault("images", [])
20
+ context.setdefault("image_hashes", [])
21
+ context.setdefault("last_agent", None)
22
+ context.setdefault("last_caption_data", None)
23
+
24
+ # If there's a new image
25
+ if image is not None:
26
+ new_hash = get_image_hash(image)
27
+ if len(context["image_hashes"]) == 0 or new_hash != context["image_hashes"][-1]:
28
+ context["images"].append(image)
29
+ context["image_hashes"].append(new_hash)
30
+ context["location"] = ""
31
+ context["last_caption_data"] = None # Reset cached caption
32
+ response_ui_msg = "(New image attached. Starting image-related discussion.)"
33
+
34
+ # If image is removed mid-chat
35
+ if image is None and context["images"]:
36
+ response_ui_msg = "(Image removed. Continuing as text-only query.)"
37
+
38
+ # Use location if no image context
39
+ if not context["images"] and location:
40
+ context["location"] = location
41
+
42
+ # Determine which agent should handle the query
43
+ is_image_context = bool(context["images"])
44
+ agent = route_agent(user_input, is_image_context)
45
+
46
+ # Agent switch handling
47
+ if context["last_agent"] == 'agent1' and agent == 'agent2':
48
+ response_ui_msg += "\n(Switching to tenancy discussion based on your query...)"
49
+
50
+ elif context["last_agent"] == 'agent2' and agent == 'agent1':
51
+ response_ui_msg += "\n(Detected switch to image-based issue. Starting a new conversation...)"
52
+ history.clear()
53
+ context.clear()
54
+ context["images"] = [image] if image else []
55
+ context["image_hashes"] = [get_image_hash(image)] if image else []
56
+ context["last_caption_data"] = None
57
+ context["last_agent"] = None
58
+ context["location"] = location or ""
59
+
60
+ # Update current agent
61
+ context["last_agent"] = agent
62
+
63
+ # Run the correct agent
64
+ if agent == 'agent1':
65
+ if context["images"]:
66
+ result = handle_image_issue(user_input, context["images"][-1], history, context)
67
+ else:
68
+ result = "No image found to analyze."
69
+ else:
70
+ result = handle_tenancy_query(user_input, {"location": context.get("location")}, history)
71
+
72
+ # Add message to response
73
+ if response_ui_msg:
74
+ result = f"{response_ui_msg}\n\n{result}"
75
+
76
+ history.append((user_input, result))
77
+ return result, history, context, "🟢 Chat Ongoing"
78
+
79
+ except RuntimeError as e:
80
+ if "CUDA out of memory" in str(e):
81
+ error_msg = "⚠️ CUDA Out of Memory! Please try again later or reduce the image size."
82
+ return error_msg, history, context, "🔴 Error"
83
+ else:
84
+ raise e
85
+
86
+ # Reset function
87
+ def reset_chat():
88
+ return "", "", None, [], {"location": "", "images": [], "image_hashes": []}, "🟡 New Chat Started", ""
89
+
90
+ # Clear just the conversation history
91
+ def clear_chat_history():
92
+ return [], "", "🧹 Chat history cleared"
93
+
94
+ # Build the Gradio interface
95
+ with gr.Blocks() as demo:
96
+ conversation_history = gr.State([])
97
+ user_context = gr.State({"location": "", "images": [], "image_hashes": []})
98
+ session_state = gr.State("🟡 New Chat Started")
99
+
100
+ gr.Markdown("# 🏠 Multi-Agent Real Estate Chatbot")
101
+ gr.Markdown("Ask about property issues (with images) or tenancy questions!")
102
+
103
+ with gr.Row():
104
+ with gr.Column():
105
+ user_input = gr.Textbox(label="Enter your question:")
106
+ location_input = gr.Textbox(label="Enter your city or country (optional):")
107
+ image_input = gr.Image(type="pil", label="Upload an image (optional):")
108
+
109
+ submit_btn = gr.Button("Submit")
110
+ new_chat_btn = gr.Button("🔁 Start New Chat")
111
+ clear_history_btn = gr.Button("🧹 Clear Chat History")
112
+
113
+ with gr.Column():
114
+ chatbot_output = gr.Textbox(label="Chatbot Response:", interactive=False, lines=8)
115
+ session_indicator = gr.Textbox(label="Session Status", interactive=False)
116
+
117
+ # Hook button logic
118
+ submit_btn.click(
119
+ handle_query,
120
+ inputs=[user_input, image_input, location_input, conversation_history, user_context],
121
+ outputs=[chatbot_output, conversation_history, user_context, session_indicator]
122
+ )
123
+
124
+ new_chat_btn.click(
125
+ reset_chat,
126
+ inputs=[],
127
+ outputs=[user_input, location_input, image_input, conversation_history, user_context, session_indicator, chatbot_output]
128
+ )
129
+
130
+ clear_history_btn.click(
131
+ clear_chat_history,
132
+ inputs=[],
133
+ outputs=[conversation_history, chatbot_output, session_indicator]
134
+ )
135
+
136
+ # Launch app
137
+ demo.launch(share=True)
config/city_law_data.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "New York": {"notice_period": "30 days"}
3
+ }
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ torch
3
+ transformers
4
+ Pillow
5
+ gradio
6
+ ultralytics
7
+ spacy
8
+ geotext
utils/__pycache__/captioning.cpython-311.pyc ADDED
Binary file (6.7 kB). View file
 
utils/__pycache__/llm_utils.cpython-311.pyc ADDED
Binary file (2.31 kB). View file
 
utils/__pycache__/routing.cpython-311.pyc ADDED
Binary file (845 Bytes). View file
 
utils/captioning.py ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import BlipProcessor, BlipForConditionalGeneration, AutoProcessor, AutoModelForCausalLM
2
+ from PIL import Image
3
+ import torch
4
+ from ultralytics import YOLO # You need to install: pip install ultralytics
5
+ from transformers import CLIPProcessor, CLIPModel
6
+
7
+ class ImageCaptioning:
8
+ def __init__(self):
9
+ self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
10
+
11
+ # Load BLIP
12
+ self.blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
13
+ self.blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to(self.device)
14
+
15
+ # Load GIT
16
+ self.git_processor = AutoProcessor.from_pretrained("microsoft/git-base")
17
+ self.git_model = AutoModelForCausalLM.from_pretrained("microsoft/git-base").to(self.device)
18
+
19
+ self.clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
20
+ self.clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(self.device)
21
+
22
+ # # Load YOLO
23
+ # self.yolo_model = YOLO("yolov8n.pt") # You can use yolov8s.pt or others
24
+
25
+ def generate_caption_blip(self, image):
26
+ inputs = self.blip_processor(images=image, return_tensors="pt").to(self.device)
27
+ print(f"Inputs keys: {inputs.keys()}")
28
+ with torch.no_grad():
29
+ output = self.blip_model.generate(**inputs)
30
+ caption = self.blip_processor.decode(output[0], skip_special_tokens=True)
31
+ return caption, self.compute_logprob(self.blip_model, inputs, output, self.blip_processor)
32
+
33
+ def generate_caption_git(self, image):
34
+ inputs = self.git_processor(images=image, return_tensors="pt").to(self.device)
35
+ print(f"Inputs keys: {inputs.keys()}")
36
+ with torch.no_grad():
37
+ generated_ids = self.git_model.generate(**inputs)
38
+ caption = self.git_processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
39
+ return caption, self.compute_logprob(self.git_model, inputs, generated_ids, self.git_processor)
40
+
41
+ # def generate_caption_yolo(self, image):
42
+ # # Run YOLO detection
43
+ # results = self.yolo_model(image)
44
+ # detections = results[0].boxes.data # [x1, y1, x2, y2, conf, class]
45
+ # names = results[0].names
46
+
47
+ # if len(detections) == 0:
48
+ # return "No objects detected", 0.0
49
+
50
+ # # Get top class labels with confidence
51
+ # label_conf_pairs = [(names[int(cls)], float(conf)) for *_, conf, cls in detections]
52
+ # label_conf_pairs.sort(key=lambda x: x[1], reverse=True)
53
+
54
+ # top_labels = list({label for label, _ in label_conf_pairs[:5]}) # top 5 unique labels
55
+ # avg_conf = sum([conf for _, conf in label_conf_pairs[:5]]) / len(top_labels)
56
+
57
+ # caption = "Image contains: " + ", ".join(top_labels)
58
+ # return caption, avg_conf
59
+
60
+ def generate_caption_clip(self, image):
61
+ # Step 1: Generate caption candidates
62
+ caption_blip = self.generate_caption_blip(image)
63
+ caption_git = self.generate_caption_git(image)
64
+ candidates = [caption_blip, caption_git]
65
+
66
+ # Extract text-only for CLIP scoring
67
+ captions_only = [c[0] for c in candidates]
68
+
69
+ # Step 2: Score them with CLIP
70
+ inputs = self.clip_processor(text=captions_only, images=image, return_tensors="pt", padding=True).to(self.device)
71
+ with torch.no_grad():
72
+ outputs = self.clip_model(**inputs)
73
+ scores = outputs.logits_per_image[0] # shape: (num_captions,)
74
+ scores = scores.softmax(dim=0) # optional: normalize scores
75
+
76
+ best_idx = scores.argmax().item()
77
+ best_caption = candidates[best_idx]
78
+ best_score = scores[best_idx].item()
79
+
80
+ return best_caption[0], best_score # returning the caption text and score
81
+
82
+
83
+ def compute_logprob(self, model, inputs, generated_ids, processor):
84
+ # Decode the generated tokens to text
85
+ caption_text = processor.decode(generated_ids[0], skip_special_tokens=True)
86
+
87
+ # Tokenize the caption (text) to get labels and input_ids
88
+ text_inputs = processor(text=caption_text, return_tensors="pt").to(self.device)
89
+ labels = text_inputs["input_ids"]
90
+
91
+ # Combine image inputs with the new input_ids (needed for loss computation)
92
+ model_inputs = {**inputs, "input_ids": text_inputs["input_ids"]}
93
+
94
+ # Compute the loss
95
+ with torch.no_grad():
96
+ outputs = model(**model_inputs, labels=labels)
97
+
98
+ return -outputs.loss.item() # Higher is better
99
+
100
+
101
+
102
+ def get_best_caption(self, image):
103
+ # This runs BLIP and GIT, then scores both with CLIP to pick the best caption
104
+ caption, score = self.generate_caption_clip(image)
105
+ print(f"Selected Caption: {caption} | Confidence: {score}")
106
+ return caption, score
107
+
utils/llm_utils.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
2
+ import torch
3
+ import os
4
+
5
+ class LLaMAHelper:
6
+ def __init__(self, hf_token=None):
7
+ self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
8
+ self.model_id = "meta-llama/Llama-3.2-3B-Instruct"
9
+
10
+ hf_token = hf_token or os.getenv("HUGGINGFACE_TOKEN")
11
+
12
+ self.tokenizer = AutoTokenizer.from_pretrained(self.model_id, token=hf_token)
13
+ self.model = AutoModelForCausalLM.from_pretrained(
14
+ self.model_id,
15
+ token=hf_token,
16
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
17
+ ).to(self.device)
18
+
19
+ self.pipe = pipeline(
20
+ "text-generation",
21
+ model=self.model,
22
+ tokenizer=self.tokenizer,
23
+ device=0 if torch.cuda.is_available() else -1
24
+ )
25
+
26
+ # self.text_classifier = pipeline("zero-shot-classification", model="MoritzLaurer/DeBERTa-v3-base-mnli-fever-anli")
27
+
28
+ # def classifier(self, text, candidate_labels):
29
+ # return self.text_classifier(text, candidate_labels)
30
+
31
+ def chat(self, system_prompt, prompt, max_new_tokens=300, temperature=0.5):
32
+ messages = [
33
+ {"role": "system", "content": system_prompt},
34
+ {"role": "user", "content": prompt},
35
+ ]
36
+ outputs = self.pipe(messages, max_new_tokens=max_new_tokens, do_sample=True, temperature=temperature)
37
+ if outputs:
38
+ if "content" in outputs[0]["generated_text"][-1]:
39
+ full_response = outputs[0]["generated_text"][-1]["content"].lower()
40
+ else:
41
+ full_response = outputs[0]["generated_text"][-1].lower()
42
+
43
+ print('response from LLM is', full_response)
44
+ return full_response.replace(prompt, "").strip()
45
+
utils/routing.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # agent_router.py
2
+ import re
3
+
4
+ # Router to manage multi-agent classification
5
+ # agent_router.py
6
+ import re
7
+
8
+ def route_agent(text, has_image):
9
+ """
10
+ Determines which agent should handle the query based on image presence and content type.
11
+ """
12
+ if has_image:
13
+ return "agent1" # Image-based input => Image issue agent
14
+
15
+ # Check for tenancy-related keywords
16
+ tenancy_keywords = [
17
+ "rent", "lease", "tenant", "landlord", "agreement", "deposit",
18
+ "eviction", "notice", "contract", "housing law", "tenancy", "sublet"
19
+ ]
20
+
21
+ # Lowercase and look for keywords
22
+ if any(word in text.lower() for word in tenancy_keywords):
23
+ return "agent2"
24
+
25
+ # Fallback — you could route this to a clarification step instead
26
+ return "agent2"
27
+
28
+ def clarify_prompt():
29
+ return (
30
+ "Just to clarify, are you asking about a visible issue with a property (you can also upload an image),\n"
31
+ "or is this a general question about renting, laws, or agreements?"
32
+ )