Spaces:
Runtime error
Runtime error
AsherKnight commited on
Commit ·
c2c5906
1
Parent(s): ad2bdfb
changes
Browse files- agents/agent2_tenancy_faq.py +1 -3
- app.py +7 -0
- utils/captioning.py +0 -28
- utils/llm_utils.py +1 -8
- utils/routing.py +1 -7
agents/agent2_tenancy_faq.py
CHANGED
|
@@ -4,7 +4,6 @@ from geotext import GeoText
|
|
| 4 |
import spacy
|
| 5 |
import spacy.cli
|
| 6 |
|
| 7 |
-
# Initialize LLaMA and spaCy
|
| 8 |
llm = LLaMAHelper()
|
| 9 |
|
| 10 |
try:
|
|
@@ -60,6 +59,5 @@ def handle_tenancy_query(user_query, user_context, history=[], location_method="
|
|
| 60 |
prompt += f"\n\nPrevious conversation:\n{chat_context}"
|
| 61 |
|
| 62 |
prompt += f"\n\nUser's current question: {user_query}\n\nGive a concise and helpful answer. If needed, ask a follow-up question to clarify."
|
| 63 |
-
|
| 64 |
-
reply = llm.chat(system_prompt, prompt, temperature=0.7)
|
| 65 |
return reply
|
|
|
|
| 4 |
import spacy
|
| 5 |
import spacy.cli
|
| 6 |
|
|
|
|
| 7 |
llm = LLaMAHelper()
|
| 8 |
|
| 9 |
try:
|
|
|
|
| 59 |
prompt += f"\n\nPrevious conversation:\n{chat_context}"
|
| 60 |
|
| 61 |
prompt += f"\n\nUser's current question: {user_query}\n\nGive a concise and helpful answer. If needed, ask a follow-up question to clarify."
|
| 62 |
+
reply = llm.chat(system_prompt, prompt, temperature=0.6)
|
|
|
|
| 63 |
return reply
|
app.py
CHANGED
|
@@ -5,6 +5,13 @@ from agents.agent2_tenancy_faq import handle_tenancy_query
|
|
| 5 |
from PIL import Image
|
| 6 |
import torch
|
| 7 |
import hashlib
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
|
| 9 |
# Helper to generate MD5 hash from image
|
| 10 |
def get_image_hash(image):
|
|
|
|
| 5 |
from PIL import Image
|
| 6 |
import torch
|
| 7 |
import hashlib
|
| 8 |
+
import subprocess
|
| 9 |
+
import sys
|
| 10 |
+
import os
|
| 11 |
+
|
| 12 |
+
# Install requirements if needed
|
| 13 |
+
requirements_file = os.path.join(os.path.dirname(__file__), "requirements.txt")
|
| 14 |
+
subprocess.check_call([sys.executable, "-m", "pip", "install", "-r", requirements_file])
|
| 15 |
|
| 16 |
# Helper to generate MD5 hash from image
|
| 17 |
def get_image_hash(image):
|
utils/captioning.py
CHANGED
|
@@ -1,7 +1,6 @@
|
|
| 1 |
from transformers import BlipProcessor, BlipForConditionalGeneration, AutoProcessor, AutoModelForCausalLM
|
| 2 |
from PIL import Image
|
| 3 |
import torch
|
| 4 |
-
from ultralytics import YOLO # You need to install: pip install ultralytics
|
| 5 |
from transformers import CLIPProcessor, CLIPModel
|
| 6 |
|
| 7 |
class ImageCaptioning:
|
|
@@ -19,12 +18,8 @@ class ImageCaptioning:
|
|
| 19 |
self.clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
|
| 20 |
self.clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(self.device)
|
| 21 |
|
| 22 |
-
# # Load YOLO
|
| 23 |
-
# self.yolo_model = YOLO("yolov8n.pt") # You can use yolov8s.pt or others
|
| 24 |
-
|
| 25 |
def generate_caption_blip(self, image):
|
| 26 |
inputs = self.blip_processor(images=image, return_tensors="pt").to(self.device)
|
| 27 |
-
print(f"Inputs keys: {inputs.keys()}")
|
| 28 |
with torch.no_grad():
|
| 29 |
output = self.blip_model.generate(**inputs)
|
| 30 |
caption = self.blip_processor.decode(output[0], skip_special_tokens=True)
|
|
@@ -32,30 +27,10 @@ class ImageCaptioning:
|
|
| 32 |
|
| 33 |
def generate_caption_git(self, image):
|
| 34 |
inputs = self.git_processor(images=image, return_tensors="pt").to(self.device)
|
| 35 |
-
print(f"Inputs keys: {inputs.keys()}")
|
| 36 |
with torch.no_grad():
|
| 37 |
generated_ids = self.git_model.generate(**inputs)
|
| 38 |
caption = self.git_processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
| 39 |
return caption, self.compute_logprob(self.git_model, inputs, generated_ids, self.git_processor)
|
| 40 |
-
|
| 41 |
-
# def generate_caption_yolo(self, image):
|
| 42 |
-
# # Run YOLO detection
|
| 43 |
-
# results = self.yolo_model(image)
|
| 44 |
-
# detections = results[0].boxes.data # [x1, y1, x2, y2, conf, class]
|
| 45 |
-
# names = results[0].names
|
| 46 |
-
|
| 47 |
-
# if len(detections) == 0:
|
| 48 |
-
# return "No objects detected", 0.0
|
| 49 |
-
|
| 50 |
-
# # Get top class labels with confidence
|
| 51 |
-
# label_conf_pairs = [(names[int(cls)], float(conf)) for *_, conf, cls in detections]
|
| 52 |
-
# label_conf_pairs.sort(key=lambda x: x[1], reverse=True)
|
| 53 |
-
|
| 54 |
-
# top_labels = list({label for label, _ in label_conf_pairs[:5]}) # top 5 unique labels
|
| 55 |
-
# avg_conf = sum([conf for _, conf in label_conf_pairs[:5]]) / len(top_labels)
|
| 56 |
-
|
| 57 |
-
# caption = "Image contains: " + ", ".join(top_labels)
|
| 58 |
-
# return caption, avg_conf
|
| 59 |
|
| 60 |
def generate_caption_clip(self, image):
|
| 61 |
# Step 1: Generate caption candidates
|
|
@@ -97,11 +72,8 @@ class ImageCaptioning:
|
|
| 97 |
|
| 98 |
return -outputs.loss.item() # Higher is better
|
| 99 |
|
| 100 |
-
|
| 101 |
-
|
| 102 |
def get_best_caption(self, image):
|
| 103 |
# This runs BLIP and GIT, then scores both with CLIP to pick the best caption
|
| 104 |
caption, score = self.generate_caption_clip(image)
|
| 105 |
-
print(f"Selected Caption: {caption} | Confidence: {score}")
|
| 106 |
return caption, score
|
| 107 |
|
|
|
|
| 1 |
from transformers import BlipProcessor, BlipForConditionalGeneration, AutoProcessor, AutoModelForCausalLM
|
| 2 |
from PIL import Image
|
| 3 |
import torch
|
|
|
|
| 4 |
from transformers import CLIPProcessor, CLIPModel
|
| 5 |
|
| 6 |
class ImageCaptioning:
|
|
|
|
| 18 |
self.clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
|
| 19 |
self.clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(self.device)
|
| 20 |
|
|
|
|
|
|
|
|
|
|
| 21 |
def generate_caption_blip(self, image):
|
| 22 |
inputs = self.blip_processor(images=image, return_tensors="pt").to(self.device)
|
|
|
|
| 23 |
with torch.no_grad():
|
| 24 |
output = self.blip_model.generate(**inputs)
|
| 25 |
caption = self.blip_processor.decode(output[0], skip_special_tokens=True)
|
|
|
|
| 27 |
|
| 28 |
def generate_caption_git(self, image):
|
| 29 |
inputs = self.git_processor(images=image, return_tensors="pt").to(self.device)
|
|
|
|
| 30 |
with torch.no_grad():
|
| 31 |
generated_ids = self.git_model.generate(**inputs)
|
| 32 |
caption = self.git_processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
| 33 |
return caption, self.compute_logprob(self.git_model, inputs, generated_ids, self.git_processor)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
|
| 35 |
def generate_caption_clip(self, image):
|
| 36 |
# Step 1: Generate caption candidates
|
|
|
|
| 72 |
|
| 73 |
return -outputs.loss.item() # Higher is better
|
| 74 |
|
|
|
|
|
|
|
| 75 |
def get_best_caption(self, image):
|
| 76 |
# This runs BLIP and GIT, then scores both with CLIP to pick the best caption
|
| 77 |
caption, score = self.generate_caption_clip(image)
|
|
|
|
| 78 |
return caption, score
|
| 79 |
|
utils/llm_utils.py
CHANGED
|
@@ -22,13 +22,8 @@ class LLaMAHelper:
|
|
| 22 |
tokenizer=self.tokenizer,
|
| 23 |
device=0 if torch.cuda.is_available() else -1
|
| 24 |
)
|
| 25 |
-
|
| 26 |
-
# self.text_classifier = pipeline("zero-shot-classification", model="MoritzLaurer/DeBERTa-v3-base-mnli-fever-anli")
|
| 27 |
-
|
| 28 |
-
# def classifier(self, text, candidate_labels):
|
| 29 |
-
# return self.text_classifier(text, candidate_labels)
|
| 30 |
|
| 31 |
-
def chat(self, system_prompt, prompt, max_new_tokens=
|
| 32 |
messages = [
|
| 33 |
{"role": "system", "content": system_prompt},
|
| 34 |
{"role": "user", "content": prompt},
|
|
@@ -39,7 +34,5 @@ class LLaMAHelper:
|
|
| 39 |
full_response = outputs[0]["generated_text"][-1]["content"].lower()
|
| 40 |
else:
|
| 41 |
full_response = outputs[0]["generated_text"][-1].lower()
|
| 42 |
-
|
| 43 |
-
print('response from LLM is', full_response)
|
| 44 |
return full_response.replace(prompt, "").strip()
|
| 45 |
|
|
|
|
| 22 |
tokenizer=self.tokenizer,
|
| 23 |
device=0 if torch.cuda.is_available() else -1
|
| 24 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
|
| 26 |
+
def chat(self, system_prompt, prompt, max_new_tokens=1200, temperature=0.5):
|
| 27 |
messages = [
|
| 28 |
{"role": "system", "content": system_prompt},
|
| 29 |
{"role": "user", "content": prompt},
|
|
|
|
| 34 |
full_response = outputs[0]["generated_text"][-1]["content"].lower()
|
| 35 |
else:
|
| 36 |
full_response = outputs[0]["generated_text"][-1].lower()
|
|
|
|
|
|
|
| 37 |
return full_response.replace(prompt, "").strip()
|
| 38 |
|
utils/routing.py
CHANGED
|
@@ -1,28 +1,22 @@
|
|
| 1 |
# agent_router.py
|
| 2 |
import re
|
| 3 |
|
| 4 |
-
# Router to manage multi-agent classification
|
| 5 |
-
# agent_router.py
|
| 6 |
-
import re
|
| 7 |
|
| 8 |
def route_agent(text, has_image):
|
| 9 |
"""
|
| 10 |
Determines which agent should handle the query based on image presence and content type.
|
| 11 |
"""
|
| 12 |
if has_image:
|
| 13 |
-
return "agent1"
|
| 14 |
|
| 15 |
-
# Check for tenancy-related keywords
|
| 16 |
tenancy_keywords = [
|
| 17 |
"rent", "lease", "tenant", "landlord", "agreement", "deposit",
|
| 18 |
"eviction", "notice", "contract", "housing law", "tenancy", "sublet"
|
| 19 |
]
|
| 20 |
|
| 21 |
-
# Lowercase and look for keywords
|
| 22 |
if any(word in text.lower() for word in tenancy_keywords):
|
| 23 |
return "agent2"
|
| 24 |
|
| 25 |
-
# Fallback — you could route this to a clarification step instead
|
| 26 |
return "agent2"
|
| 27 |
|
| 28 |
def clarify_prompt():
|
|
|
|
| 1 |
# agent_router.py
|
| 2 |
import re
|
| 3 |
|
|
|
|
|
|
|
|
|
|
| 4 |
|
| 5 |
def route_agent(text, has_image):
|
| 6 |
"""
|
| 7 |
Determines which agent should handle the query based on image presence and content type.
|
| 8 |
"""
|
| 9 |
if has_image:
|
| 10 |
+
return "agent1"
|
| 11 |
|
|
|
|
| 12 |
tenancy_keywords = [
|
| 13 |
"rent", "lease", "tenant", "landlord", "agreement", "deposit",
|
| 14 |
"eviction", "notice", "contract", "housing law", "tenancy", "sublet"
|
| 15 |
]
|
| 16 |
|
|
|
|
| 17 |
if any(word in text.lower() for word in tenancy_keywords):
|
| 18 |
return "agent2"
|
| 19 |
|
|
|
|
| 20 |
return "agent2"
|
| 21 |
|
| 22 |
def clarify_prompt():
|