Spaces:
Runtime error
Runtime error
| import os | |
| import faiss | |
| import torch | |
| import pandas as pd | |
| from sentence_transformers import SentenceTransformer | |
| from flask import Flask, request, jsonify, render_template | |
| from flask_cors import CORS | |
| from pyngrok import ngrok | |
| import requests | |
| import cloudinary | |
| import cloudinary.uploader | |
| import cloudinary.api | |
| from transformers import AutoTokenizer, AutoModelForCausalLM, AutoConfig | |
| from peft import PeftModel, PeftConfig | |
| import speech_recognition as sr | |
| from pydub import AudioSegment | |
| from happytransformer import HappyTextToText, TTSettings | |
| import io | |
| import logging | |
| import geocoder | |
| from geopy.distance import geodesic | |
| import webrtcvad | |
| import collections | |
| import time | |
| from werkzeug.utils import secure_filename | |
| from geopy.geocoders import Nominatim | |
| import pickle | |
| import numpy as np | |
| import tempfile | |
| from pathlib import Path | |
| # Update the numpy version check | |
| if not hasattr(np, '__version__') or tuple(map(int, np.__version__.split('.'))) != (1, 23, 5): | |
| print(f"Warning: Using numpy version {np.__version__}. Expected version 1.23.5") | |
| # Configure logging | |
| logging.basicConfig(level=logging.INFO) | |
| # Load environment variables | |
| API_KEY = os.getenv("AIzaSyC5FSchUVhKWetUIYPMe92B_1oRqhGplqI") | |
| CSE_ID = os.getenv("c03c5384c2c5d424b") | |
| CLOUDINARY_CLOUD_NAME = os.getenv("dn4rackei") | |
| CLOUDINARY_API_KEY = os.getenv("599266248716888") | |
| CLOUDINARY_API_SECRET = os.getenv("DRAaasqskCvfAhJhcKB6AKxrD7U") | |
| # Define paths | |
| load_dir = "./models/new_rag_model/" | |
| model_path = os.path.join(load_dir, "model_state_dict.pth") | |
| faiss_index_path = os.path.join(load_dir, "property_faiss.index") | |
| dataset_path = os.path.join(load_dir, "property_data.csv") | |
| model_dir = "./models/llm_model" | |
| # model_dir = "/content/drive/MyDrive/newllmmodel/final_model" | |
| # model_dir = "/content/drive/MyDrive/real_estate_model/final_model" | |
| # model_dir = "/content/drive/MyDrive/rag" | |
| # Check device | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| print(f"Using device: {device}") | |
| # Configure cache directories | |
| os.environ['TRANSFORMERS_CACHE'] = '/cache' | |
| os.environ['HF_HOME'] = '/cache' | |
| os.environ['XDG_CACHE_HOME'] = '/cache' | |
| # Load SentenceTransformer model | |
| def load_sentence_transformer(): | |
| print("Loading SentenceTransformer model...") | |
| try: | |
| # Create cache directory if it doesn't exist | |
| cache_dir = Path('/cache') | |
| cache_dir.mkdir(parents=True, exist_ok=True) | |
| # Import einops here to ensure it's available | |
| try: | |
| import einops | |
| except ImportError: | |
| raise ImportError("einops is required. Please install it with 'pip install einops'") | |
| model_embedding = SentenceTransformer( | |
| "jinaai/jina-embeddings-v3", | |
| trust_remote_code=True, | |
| cache_folder=str(cache_dir) | |
| ).to(device) | |
| if os.path.exists(model_path): | |
| state_dict = torch.load(model_path, map_location=device) | |
| # Handle tensor types | |
| for key, tensor in state_dict.items(): | |
| if hasattr(tensor, 'dequantize'): | |
| state_dict[key] = tensor.dequantize().to(dtype=torch.float32) | |
| elif tensor.dtype == torch.bfloat16: | |
| state_dict[key] = tensor.to(dtype=torch.float32) | |
| model_embedding.load_state_dict(state_dict) | |
| print("SentenceTransformer model loaded successfully.") | |
| else: | |
| print(f"Warning: Model file not found at {model_path}") | |
| return model_embedding | |
| except Exception as e: | |
| print(f"Error loading model: {str(e)}") | |
| raise | |
| # Load FAISS index | |
| def load_faiss_index(): | |
| print("Loading FAISS index...") | |
| try: | |
| index = faiss.read_index(faiss_index_path) | |
| # Ensure the index is on CPU | |
| if hasattr(faiss, 'StandardGpuResources'): | |
| index = faiss.index_gpu_to_cpu(index) | |
| print("FAISS index loaded successfully.") | |
| return index | |
| except Exception as e: | |
| print(f"Error loading FAISS index: {str(e)}") | |
| raise | |
| # Load dataset | |
| def load_dataset(): | |
| print("Loading dataset...") | |
| df = pd.read_csv(dataset_path) | |
| print("Dataset loaded successfully.") | |
| return df | |
| # Custom Retriever Class | |
| class CustomRagRetriever: | |
| def __init__(self, faiss_index, model): | |
| self.index = faiss_index | |
| self.model = model | |
| self.pca = None | |
| # Load PCA if it exists | |
| pca_path = os.path.join(os.path.dirname(model_path), "pca_model.pkl") | |
| if os.path.exists(pca_path): | |
| try: | |
| with open(pca_path, 'rb') as f: | |
| self.pca = pickle.load(f) | |
| except ModuleNotFoundError: | |
| print("Warning: Could not load PCA model due to numpy version mismatch. Continuing without PCA.") | |
| self.pca = None | |
| except Exception as e: | |
| print(f"Warning: Error loading PCA model: {str(e)}. Continuing without PCA.") | |
| self.pca = None | |
| def retrieve(self, query, top_k=10): | |
| print(f"Retrieving properties for query: {query}") | |
| try: | |
| # Get query embedding with optimizations | |
| with torch.no_grad(): | |
| query_embedding = self.model.encode( | |
| [query], | |
| convert_to_numpy=True, | |
| device=device, | |
| normalize_embeddings=True | |
| ) | |
| # Convert to FP32 | |
| query_embedding = query_embedding.astype(np.float32) | |
| # Only apply PCA if it was successfully loaded | |
| if self.pca is not None: | |
| try: | |
| query_embedding = self.pca.transform(query_embedding) | |
| except Exception as e: | |
| print(f"Warning: Error applying PCA transformation: {str(e)}") | |
| distances, indices = self.index.search(query_embedding, top_k) | |
| retrieved_properties = [] | |
| for idx, dist in zip(indices[0], distances[0]): | |
| property_data = df.iloc[idx] | |
| retrieved_properties.append({ | |
| "property": property_data, | |
| "image_url": property_data["property_image"], | |
| "distance": float(dist) | |
| }) | |
| print(f"Retrieved {len(retrieved_properties)} properties") | |
| return retrieved_properties | |
| except Exception as e: | |
| print(f"Error in retrieve: {str(e)}") | |
| raise | |
| # Initialize components | |
| df = load_dataset() | |
| model_embedding = load_sentence_transformer() | |
| index = load_faiss_index() | |
| retriever = CustomRagRetriever(index, model_embedding) | |
| # Load tokenizer and LLM model | |
| def load_tokenizer_and_model(): | |
| print("Loading tokenizer...") | |
| try: | |
| # Load base model first | |
| base_model_name = "unsloth/llama-3.2-3b-instruct-unsloth-bnb-4bit" | |
| tokenizer = AutoTokenizer.from_pretrained(base_model_name, trust_remote_code=True) | |
| print("Tokenizer loaded successfully.") | |
| print("Loading LLM model...") | |
| # Load the base model with 4-bit quantization | |
| base_model = AutoModelForCausalLM.from_pretrained( | |
| base_model_name, | |
| trust_remote_code=True, | |
| load_in_4bit=True, | |
| bnb_4bit_quant_type="nf4", | |
| bnb_4bit_compute_dtype=torch.float16, | |
| device_map="auto" | |
| ) | |
| # Load the PEFT adapter | |
| model_llm = PeftModel.from_pretrained( | |
| base_model, | |
| model_dir, | |
| device_map="auto", | |
| is_trainable=False | |
| ) | |
| print("LLM model loaded successfully.") | |
| return tokenizer, model_llm | |
| except Exception as e: | |
| print(f"Error loading model: {str(e)}") | |
| raise | |
| tokenizer, model_llm = load_tokenizer_and_model() | |
| # Configure Cloudinary | |
| def configure_cloudinary(): | |
| print("Configuring Cloudinary...") | |
| cloudinary.config( | |
| cloud_name=CLOUDINARY_CLOUD_NAME, | |
| api_key=CLOUDINARY_API_KEY, | |
| api_secret=CLOUDINARY_API_SECRET | |
| ) | |
| print("Cloudinary configured successfully.") | |
| configure_cloudinary() | |
| # Search real estate properties | |
| def search_real_estate(query, retriever, top_k=10, raw_results=False): | |
| print(f"Searching real estate properties for query: {query}") | |
| search_results = retriever.retrieve(query, top_k) | |
| if raw_results: | |
| return search_results | |
| formatted_results = [] | |
| for result in search_results: | |
| property_info = result['property'] | |
| formatted_result = { | |
| "Property Name": property_info.get('PropertyName', 'N/A'), | |
| "Address": property_info.get('Address', 'N/A'), | |
| "ZipCode": int(float(property_info.get('ZipCode', 0))), | |
| "LeasableSquareFeet": int(float(property_info.get('LeasableSquareFeet', 0))), | |
| "YearBuilt": int(float(property_info.get('YearBuilt', 0))), | |
| "NumberOfRooms": int(float(property_info.get('NumberOfRooms', 0))), | |
| "ParkingSpaces": int(float(property_info.get('ParkingSpaces', 0))), | |
| "PropertyManager": property_info.get('PropertyManager', 'N/A'), | |
| "MarketValue": float(property_info.get('MarketValue', 0)), | |
| "TaxAssessmentNumber": property_info.get('TaxAssessmentNumber', 'N/A'), | |
| "Latitude": float(property_info.get('Latitude', 0)), | |
| "Longitude": float(property_info.get('Longitude', 0)), | |
| "CreateDate": property_info.get('CreateDate', 'N/A'), | |
| "LastModifiedDate": property_info.get('LastModifiedDate', 'N/A'), | |
| "City": property_info.get('City', 'N/A'), | |
| "State": property_info.get('State', 'N/A'), | |
| "Country": property_info.get('Country', 'N/A'), | |
| "PropertyType": property_info.get('PropertyType', 'N/A'), | |
| "PropertyStatus": property_info.get('PropertyStatus', 'N/A'), | |
| "Description": property_info.get('Description', 'N/A'), | |
| "ViewNumber": int(float(property_info.get('ViewNumber', 0))), | |
| "Contact": int(float(property_info.get('Contact', 0))), | |
| "TotalSquareFeet": int(float(property_info.get('TotalSquareFeet', 0))), | |
| "IsDeleted": bool(property_info.get('IsDeleted', False)), | |
| "Beds": int(float(property_info.get('Beds', 0))), | |
| "Baths": int(float(property_info.get('Baths', 0))), | |
| "AgentName": property_info.get('AgentName', 'N/A'), | |
| "AgentPhoneNumber": property_info.get('AgentPhoneNumber', 'N/A'), | |
| "AgentEmail": property_info.get('AgentEmail', 'N/A'), | |
| "KeyFeatures": property_info.get('KeyFeatures', 'N/A'), | |
| "NearbyAmenities": property_info.get('NearbyAmenities', 'N/A'), | |
| "Property Image": result['image_url'], | |
| "Distance": result['distance'] | |
| } | |
| formatted_results.append(formatted_result) | |
| print(f"Found {len(formatted_results)} matching properties") | |
| return formatted_results | |
| # Generate response with optimized parameters | |
| def generate_response(query, max_new_tokens=100, temperature=0.7, top_k=30, top_p=0.8, repetition_penalty=1.05): | |
| print(f"\nGenerating response for query: {query}\n") | |
| # Print parameter settings | |
| print("Generation Parameters:") | |
| print(f"- Max New Tokens: {max_new_tokens}") | |
| print(f"- Temperature: {temperature}") | |
| print(f"- Top-K Sampling: {top_k}") | |
| print(f"- Top-P Sampling: {top_p}") | |
| print(f"- Repetition Penalty: {repetition_penalty}") | |
| print(f"- Sampling Enabled: True (do_sample=True)\n") | |
| input_text = f"User: {query}\nAssistant:" | |
| inputs = tokenizer(input_text, return_tensors="pt").to(device) | |
| start_time = time.time() # Record start time | |
| try: | |
| outputs = model_llm.generate( | |
| inputs.input_ids, | |
| max_new_tokens=max_new_tokens, | |
| temperature=temperature, | |
| top_k=top_k, | |
| top_p=top_p, | |
| repetition_penalty=repetition_penalty, | |
| do_sample=True, | |
| eos_token_id=tokenizer.eos_token_id, | |
| pad_token_id=tokenizer.pad_token_id | |
| ) | |
| response = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| response = response.replace(input_text, "").strip() | |
| end_time = time.time() # Record end time | |
| duration = end_time - start_time # Calculate duration | |
| print(f"\nGenerated Response:\n{response}\n") | |
| print(f"Time taken to generate response: {duration:.2f} seconds\n") | |
| return response, duration | |
| except Exception as e: | |
| logging.error(f"Error generating response: {e}") | |
| return "An error occurred while generating the response.", None | |
| # Combined model response with optimized parameters | |
| def combined_model_response(query, retriever, top_k=5, max_new_tokens=512, temperature=0.5, top_k_sampling=30, repetition_penalty=1.0): | |
| print(f"Generating combined model response for query: {query}") | |
| retrieved_results = search_real_estate(query, retriever, top_k, raw_results=True) | |
| if not retrieved_results: | |
| return "No relevant properties found." | |
| combined_property_details = [] | |
| for i, result in enumerate(retrieved_results, 1): | |
| property_info = result['property'] | |
| property_details = ( | |
| f"Property {i}:\n" | |
| f"Property Name: {property_info['PropertyName']}\n" | |
| f"Address: {property_info['Address']}, {property_info['City']}, {property_info['State']}, {property_info['ZipCode']}, {property_info['Country']}\n" | |
| f"Leasable Area: {property_info['LeasableSquareFeet']} sqft\n" | |
| f"Year Built: {property_info['YearBuilt']}\n" | |
| f"Beds: {property_info['Beds']} Baths: {property_info['Baths']}\n" | |
| f"Parking Spaces: {property_info['ParkingSpaces']}\n" | |
| f"Market Value: {property_info['MarketValue']}\n" | |
| # f"Tax Assessment Number: {property_info['TaxAssessmentNumber']}\n" | |
| # f"Coordinates: {property_info['Latitude']}, {property_info['Longitude']}\n" | |
| f"Property Type: {property_info['PropertyType']}\n" | |
| f"Property Status: {property_info['PropertyStatus']}\n" | |
| f"Description: {property_info['Description']}\n" | |
| # f"View Count: {property_info['ViewNumber']}\n" | |
| f"Contact: {property_info['Contact']}\n" | |
| f"Total Square Feet: {property_info['TotalSquareFeet']} sqft\n" | |
| # f"Deleted: {'Yes' if property_info['IsDeleted'] else 'No'}\n" | |
| f"Agent Name: {property_info['AgentName']}\n" | |
| f"Agent Phone Number: {property_info['AgentPhoneNumber']}\n" | |
| f"Agent Email: {property_info['AgentEmail']}\n" | |
| f"Key Features: {property_info['KeyFeatures']}\n" | |
| f"Nearby Amenities: {property_info['NearbyAmenities']}\n" | |
| f"Created Date: {property_info['CreateDate']}\n" | |
| f"Last Modified Date: {property_info['LastModifiedDate']}\n" | |
| ) | |
| combined_property_details.append(property_details) | |
| prompt = f"User Query: {query}\nProperty Details:\n" + "\n".join(combined_property_details) + "\nGenerate a concise response based on the user's query and retrieved property details." | |
| print(f"User Query: {query}") | |
| response, duration = generate_response(prompt, max_new_tokens=max_new_tokens) | |
| print(f"Combined model response: {response}") | |
| print(f"Time taken to generate combined model response: {duration:.2f} seconds\n") | |
| return response, duration | |
| # VAD Audio Class | |
| class VADAudio: | |
| def __init__(self, aggressiveness=3): | |
| self.vad = webrtcvad.Vad(aggressiveness) | |
| self.sample_rate = 16000 | |
| self.frame_duration_ms = 30 | |
| def frame_generator(self, audio, frame_duration_ms, sample_rate): | |
| n = int(sample_rate * (frame_duration_ms / 1000.0)) | |
| offset = 0 | |
| while offset + n < len(audio): | |
| yield audio[offset:offset + n] | |
| offset += n | |
| def vad_collector(self, audio, sample_rate, frame_duration_ms, padding_duration_ms=300, aggressiveness=3): | |
| vad = webrtcvad.Vad(aggressiveness) | |
| num_padding_frames = int(padding_duration_ms / frame_duration_ms) | |
| ring_buffer = collections.deque(maxlen=num_padding_frames) | |
| triggered = False | |
| for frame in self.frame_generator(audio, frame_duration_ms, sample_rate): | |
| is_speech = vad.is_speech(frame, sample_rate) | |
| if not triggered: | |
| ring_buffer.append((frame, is_speech)) | |
| num_voiced = len([f for f, speech in ring_buffer if speech]) | |
| if num_voiced > 0.9 * ring_buffer.maxlen: | |
| triggered = True | |
| for f, s in ring_buffer: | |
| yield f | |
| ring_buffer.clear() | |
| else: | |
| yield frame | |
| ring_buffer.append((frame, is_speech)) | |
| num_unvoiced = len([f for f, speech in ring_buffer if not speech]) | |
| if num_unvoiced > 0.9 * ring_buffer.maxlen: | |
| triggered = False | |
| yield b''.join([f for f in ring_buffer]) | |
| ring_buffer.clear() | |
| # Transcribe with VAD | |
| def transcribe_with_vad(audio_file): | |
| vad_audio = VADAudio() | |
| audio = AudioSegment.from_file(audio_file) | |
| audio = audio.set_frame_rate(vad_audio.sample_rate).set_channels(1) | |
| raw_audio = audio.raw_data | |
| frames = vad_audio.vad_collector(raw_audio, vad_audio.sample_rate, vad_audio.frame_duration_ms) | |
| for frame in frames: | |
| if len(frame) > 0: | |
| recognizer = sr.Recognizer() | |
| audio_data = sr.AudioData(frame, vad_audio.sample_rate, audio.sample_width) | |
| try: | |
| text = recognizer.recognize_google(audio_data) | |
| print(f"Transcription: {text}") | |
| return text | |
| except sr.UnknownValueError: | |
| print("Google Speech Recognition could not understand the audio") | |
| except sr.RequestError as e: | |
| print(f"Could not request results from Google Speech Recognition service; {e}") | |
| return "" | |
| # Flask app | |
| app = Flask(__name__, template_folder="sample_data/templates") | |
| conversation_context = {} | |
| # Configure CORS | |
| CORS(app, resources={ | |
| r"/*": { | |
| "origins": ["http://localhost:4200", "https://localhost:4200"], | |
| "methods": ["GET", "POST", "OPTIONS"], | |
| "allow_headers": ["Content-Type", "X-Session-ID"] | |
| } | |
| }) | |
| def handle_preflight(): | |
| if request.method == 'OPTIONS': | |
| response = app.make_default_options_response() | |
| response.headers.add('Access-Control-Allow-Headers', 'Content-Type, X-Session-ID') | |
| response.headers.add('Access-Control-Allow-Methods', 'GET, POST, OPTIONS') | |
| return response | |
| def index(): | |
| print("Rendering index page") | |
| return render_template('index.html') | |
| def search(): | |
| try: | |
| data = request.json | |
| query = data.get('query') | |
| session_id = data.get('session_id') | |
| continue_conversation = data.get('continue', False) | |
| if not query: | |
| return jsonify({"error": "Query parameter is missing"}), 400 | |
| if session_id not in conversation_context or not continue_conversation: | |
| search_results = retriever.retrieve(query) | |
| formatted_results = [] | |
| for result in search_results: | |
| property_info = result['property'] | |
| formatted_result = { | |
| "Property Name": property_info.get('PropertyName', 'N/A'), | |
| "Address": property_info.get('Address', 'N/A'), | |
| "ZipCode": int(float(property_info.get('ZipCode', 0))), | |
| "LeasableSquareFeet": int(float(property_info.get('LeasableSquareFeet', 0))), | |
| "YearBuilt": int(float(property_info.get('YearBuilt', 0))), | |
| "NumberOfRooms": int(float(property_info.get('NumberOfRooms', 0))), | |
| "ParkingSpaces": int(float(property_info.get('ParkingSpaces', 0))), | |
| "PropertyManager": property_info.get('PropertyManager', 'N/A'), | |
| "MarketValue": float(property_info.get('MarketValue', 0)), | |
| "TaxAssessmentNumber": property_info.get('TaxAssessmentNumber', 'N/A'), | |
| "City": property_info.get('City', 'N/A'), | |
| "State": property_info.get('State', 'N/A'), | |
| "Country": property_info.get('Country', 'N/A'), | |
| "PropertyType": property_info.get('PropertyType', 'N/A'), | |
| "PropertyStatus": property_info.get('PropertyStatus', 'N/A'), | |
| "Description": property_info.get('Description', 'N/A'), | |
| "ViewNumber": int(float(property_info.get('ViewNumber', 0))), | |
| "Contact": int(float(property_info.get('Contact', 0))), | |
| "TotalSquareFeet": int(float(property_info.get('TotalSquareFeet', 0))), | |
| "IsDeleted": bool(property_info.get('IsDeleted', False)), | |
| "Beds": int(float(property_info.get('Beds', 0))), | |
| "Baths": int(float(property_info.get('Baths', 0))), | |
| "AgentName": property_info.get('AgentName', 'N/A'), | |
| "AgentPhoneNumber": property_info.get('AgentPhoneNumber', 'N/A'), | |
| "AgentEmail": property_info.get('AgentEmail', 'N/A'), | |
| "KeyFeatures": property_info.get('KeyFeatures', 'N/A'), | |
| "NearbyAmenities": property_info.get('NearbyAmenities', 'N/A'), | |
| "Property Image": result['image_url'], | |
| "Distance": float(result['distance']) | |
| } | |
| formatted_results.append(formatted_result) | |
| conversation_context[session_id] = formatted_results | |
| else: | |
| formatted_results = conversation_context[session_id] | |
| print(f"Returning {len(formatted_results)} search results") | |
| return jsonify(formatted_results) | |
| except Exception as e: | |
| logging.error(f"Error in search endpoint: {str(e)}") | |
| return jsonify({"error": f"An error occurred: {str(e)}"}), 500 | |
| def transcribe(): | |
| if 'audio' not in request.files: | |
| return jsonify({"error": "No audio file provided"}), 400 | |
| audio_file = request.files['audio'] | |
| # Ensure the file has an allowed extension | |
| allowed_extensions = {'wav', 'mp3', 'ogg', 'webm'} | |
| if '.' not in audio_file.filename or \ | |
| audio_file.filename.rsplit('.', 1)[1].lower() not in allowed_extensions: | |
| return jsonify({"error": "Invalid audio file format"}), 400 | |
| try: | |
| # Save the uploaded file temporarily | |
| temp_dir = os.path.join(os.getcwd(), 'temp') | |
| os.makedirs(temp_dir, exist_ok=True) | |
| temp_path = os.path.join(temp_dir, 'temp_audio.' + audio_file.filename.rsplit('.', 1)[1].lower()) | |
| audio_file.save(temp_path) | |
| # Convert audio to proper format if needed | |
| audio = AudioSegment.from_file(temp_path) | |
| audio = audio.set_channels(1) # Convert to mono | |
| audio = audio.set_frame_rate(16000) # Set sample rate to 16kHz | |
| # Save as WAV for speech recognition | |
| wav_path = os.path.join(temp_dir, 'temp_audio.wav') | |
| audio.export(wav_path, format="wav") | |
| # Perform speech recognition | |
| recognizer = sr.Recognizer() | |
| with sr.AudioFile(wav_path) as source: | |
| audio_data = recognizer.record(source) | |
| text = recognizer.recognize_google(audio_data) | |
| # Clean up temporary files | |
| os.remove(temp_path) | |
| os.remove(wav_path) | |
| # Grammar correction | |
| happy_tt = HappyTextToText("T5", "vennify/t5-base-grammar-correction") | |
| settings = TTSettings(do_sample=True, top_k=50, temperature=0.7) | |
| corrected_text = happy_tt.generate_text(f"grammar: {text}", args=settings) | |
| print(f"Original Transcription: {text}") | |
| print(f"Corrected Transcription: {corrected_text.text}") | |
| return jsonify({ | |
| "transcription": corrected_text.text, | |
| "original": text | |
| }) | |
| except sr.UnknownValueError: | |
| return jsonify({"error": "Could not understand audio"}), 400 | |
| except sr.RequestError as e: | |
| return jsonify({"error": f"Google Speech Recognition error: {str(e)}"}), 500 | |
| except Exception as e: | |
| logging.error(f"Error processing audio: {str(e)}") | |
| return jsonify({"error": f"Audio processing error: {str(e)}"}), 500 | |
| finally: | |
| # Ensure temp files are cleaned up even if an error occurs | |
| if 'temp_path' in locals() and os.path.exists(temp_path): | |
| os.remove(temp_path) | |
| if 'wav_path' in locals() and os.path.exists(wav_path): | |
| os.remove(wav_path) | |
| def generate(): | |
| data = request.json | |
| query = data.get('query') | |
| session_id = data.get('session_id') | |
| continue_conversation = data.get('continue', False) | |
| if not query: | |
| return jsonify({"error": "Query parameter is missing"}), 400 | |
| if session_id in conversation_context and continue_conversation: | |
| previous_results = conversation_context[session_id] | |
| combined_query = f"Based on previous results:{previous_results}New Query: {query}" | |
| response, duration = generate_response(combined_query) | |
| else: | |
| response, duration = generate_response(query) | |
| conversation_context[session_id] = response | |
| print(f"Generated response: {response}") | |
| print(f"Time taken to generate response: {duration:.2f} seconds\n") | |
| return jsonify({"response": response, "duration": duration}) | |
| def recommend(): | |
| data = request.json | |
| query = data.get('query') | |
| session_id = data.get('session_id') | |
| continue_conversation = data.get('continue', False) | |
| if not query: | |
| return jsonify({"error": "Query parameter is missing"}), 400 | |
| if query.lower() == 'hi': | |
| return jsonify({"response": "Do you want to know the properties located near you? (yes/no):"}) | |
| if query.lower() == 'yes': | |
| if session_id in conversation_context and 'location' in conversation_context[session_id]: | |
| latitude, longitude = conversation_context[session_id]['location'] | |
| else: | |
| return jsonify({"error": "Location not available. Please try again."}), 400 | |
| my_location = (latitude, longitude) | |
| # Filter out rows with invalid coordinates before calculating distances | |
| valid_properties = df[ | |
| df['Latitude'].apply(lambda x: isinstance(x, (int, float)) or (isinstance(x, str) and x.replace('.', '').isdigit())) & | |
| df['Longitude'].apply(lambda x: isinstance(x, (int, float)) or (isinstance(x, str) and x.replace('.', '').isdigit())) | |
| ].copy() | |
| # Convert coordinates to float | |
| valid_properties['Latitude'] = valid_properties['Latitude'].astype(float) | |
| valid_properties['Longitude'] = valid_properties['Longitude'].astype(float) | |
| # Calculate distances for valid properties | |
| valid_properties['Distance'] = valid_properties.apply( | |
| lambda row: geodesic(my_location, (row['Latitude'], row['Longitude'])).miles, | |
| axis=1 | |
| ) | |
| # Get 5 nearest properties | |
| nearest_properties = valid_properties.nsmallest(5, 'Distance') | |
| nearest_properties_list = nearest_properties[[ | |
| 'PropertyName', 'Address', 'City', 'Distance', | |
| 'PropertyType', 'AgentPhoneNumber' | |
| ]].to_dict(orient='records') | |
| if not nearest_properties_list: | |
| return jsonify({"response": "No valid properties found near your location."}) | |
| return jsonify({ | |
| "response": "Here are the 5 nearest properties to your location:", | |
| "properties": nearest_properties_list | |
| }) | |
| if session_id in conversation_context and continue_conversation: | |
| previous_results = conversation_context[session_id] | |
| combined_query = f"Based on previous results:{previous_results}New Query: {query}" | |
| response, duration = combined_model_response(combined_query, retriever) | |
| else: | |
| response, duration = combined_model_response(query, retriever) | |
| conversation_context[session_id] = response | |
| print(f"Recommended response: {response}") | |
| print(f"Time taken to generate recommended response: {duration:.2f} seconds\n") | |
| return jsonify({"response": response, "duration": duration}) | |
| def set_location(): | |
| data = request.json | |
| latitude = data.get('latitude') | |
| longitude = data.get('longitude') | |
| session_id = data.get('session_id') | |
| if latitude is None or longitude is None: | |
| return jsonify({"error": "Location parameters are missing"}), 400 | |
| try: | |
| # Initialize the geolocator | |
| geolocator = Nominatim(user_agent="hive_prop") | |
| # Get location details from coordinates | |
| location = geolocator.reverse(f"{latitude}, {longitude}", language='en') | |
| if location and location.raw.get('address'): | |
| address = location.raw['address'] | |
| city = address.get('city') or address.get('town') or address.get('suburb') or address.get('county') | |
| state = address.get('state') | |
| country = address.get('country') | |
| # Store location data in conversation context | |
| conversation_context[session_id] = { | |
| 'location': (latitude, longitude), | |
| 'city': city, | |
| 'state': state, | |
| 'country': country | |
| } | |
| return jsonify({ | |
| "message": "Location set successfully.", | |
| "city": city, | |
| "state": state, | |
| "country": country | |
| }) | |
| else: | |
| return jsonify({"error": "Could not determine city from coordinates"}), 400 | |
| except Exception as e: | |
| logging.error(f"Error getting location details: {str(e)}") | |
| return jsonify({"error": f"Error processing location: {str(e)}"}), 500 | |
| if __name__ == '__main__': | |
| # Remove ngrok configuration | |
| # public_url = ngrok.connect(5000) | |
| # print(f' * ngrok tunnel "http://127.0.0.1:5000" -> "{public_url}"') | |
| # Update to use port 7860 (standard for Spaces) | |
| app.run(host='0.0.0.0', port=7860) | |