Spaces:

sksameermujahid
/

testingnewcode

Runtime error

App Files Files Community

sksameermujahid commited on Mar 24, 2025

Commit

0a1a1bb

verified ·

1 Parent(s): 0c872bc

Upload 4 files

Browse files

Files changed (4) hide show

.gitignore +29 -0
Dockerfile +26 -0
app.py +634 -0
requirements.txt +16 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,29 @@

+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+.DS_Store

Dockerfile ADDED Viewed

	@@ -0,0 +1,26 @@

+FROM python:3.9-slim
+WORKDIR /code
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    build-essential \
+    curl \
+    software-properties-common \
+    git \
+    && rm -rf /var/lib/apt/lists/*
+# Copy requirements first for better caching
+COPY requirements.txt .
+# Install Python dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy the rest of the application
+COPY . .
+# Make port 7860 available
+EXPOSE 7860
+# Run the application
+CMD ["python", "app.py"]

app.py ADDED Viewed

	@@ -0,0 +1,634 @@

+!pip install flask-cors
+!pip install Flask pyngrok requests cloudinary SpeechRecognition pydub happytransformer transformers torch faiss-cpu sentence-transformers pandas unsloth bitsandbytes webrtcvad
+!ngrok config add-authtoken 2nFD4jJkAN642UzGI86nDsSC4qs_2cDEGBUFVpbQ5KaDuu4ys
+import os
+import faiss
+import torch
+import pandas as pd
+from sentence_transformers import SentenceTransformer
+from flask import Flask, request, jsonify, render_template
+from flask_cors import CORS
+from pyngrok import ngrok
+import requests
+import cloudinary
+import cloudinary.uploader
+import cloudinary.api
+from transformers import AutoTokenizer, AutoModelForCausalLM
+import speech_recognition as sr
+from pydub import AudioSegment
+from happytransformer import HappyTextToText, TTSettings
+import io
+import logging
+import geocoder
+from geopy.distance import geodesic
+import webrtcvad
+import collections
+import time
+from werkzeug.utils import secure_filename
+from geopy.geocoders import Nominatim
+import pickle
+import numpy as np
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+# Initialize Flask app
+app = Flask(__name__, template_folder="templates")
+CORS(app)
+# Load environment variables
+API_KEY = os.getenv("API_KEY", "default_key")
+CSE_ID = os.getenv("CSE_ID", "default_cse")
+CLOUDINARY_CLOUD_NAME = os.getenv("CLOUDINARY_CLOUD_NAME", "default_cloud")
+CLOUDINARY_API_KEY = os.getenv("CLOUDINARY_API_KEY", "default_key")
+CLOUDINARY_API_SECRET = os.getenv("CLOUDINARY_API_SECRET", "default_secret")
+# Define paths for models and data
+MODEL_PATH = os.path.join("models", "model_state_dict.pth")
+FAISS_INDEX_PATH = os.path.join("models", "property_faiss.index")
+DATASET_PATH = os.path.join("data", "property_data.csv")
+MODEL_DIR = os.path.join("models", "llm_model")
+# Check device
+device = "cuda" if torch.cuda.is_available() else "cpu"
+print(f"Using device: {device}")
+# Initialize conversation context
+conversation_context = {}
+# Load SentenceTransformer model
+def load_sentence_transformer():
+    print("Loading SentenceTransformer model...")
+    try:
+        model_embedding = SentenceTransformer("jinaai/jina-embeddings-v3", trust_remote_code=True).to(device)
+        # Load and optimize model state dict
+        state_dict = torch.load(MODEL_PATH, map_location=device)
+        # Dequantize if needed
+        for key, tensor in state_dict.items():
+            if hasattr(tensor, 'dequantize'):  # Check if tensor is quantized
+                state_dict[key] = tensor.dequantize().to(dtype=torch.float32)  # Convert to FP32
+            elif tensor.dtype == torch.bfloat16:  # Handle bfloat16 tensors
+                state_dict[key] = tensor.to(dtype=torch.float32)  # Convert to FP32
+        model_embedding.load_state_dict(state_dict)
+        print("SentenceTransformer model loaded successfully.")
+        return model_embedding
+    except Exception as e:
+        print(f"Error loading model: {str(e)}")
+        raise
+# Load FAISS index
+def load_faiss_index():
+    print("Loading FAISS index...")
+    index = faiss.read_index(FAISS_INDEX_PATH)
+    print("FAISS index loaded successfully.")
+    return index
+# Load dataset
+def load_dataset():
+    print("Loading dataset...")
+    df = pd.read_csv(DATASET_PATH)
+    print("Dataset loaded successfully.")
+    return df
+# Custom Retriever Class
+class CustomRagRetriever:
+    def __init__(self, faiss_index, model):
+        self.index = faiss_index
+        self.model = model
+        self.pca = None
+        # Load PCA if it exists
+        pca_path = os.path.join(os.path.dirname(MODEL_PATH), "pca_model.pkl")
+        if os.path.exists(pca_path):
+            with open(pca_path, 'rb') as f:
+                self.pca = pickle.load(f)
+    def retrieve(self, query, top_k=10):
+        print(f"Retrieving properties for query: {query}")
+        try:
+            # Get query embedding with optimizations
+            with torch.no_grad():
+                query_embedding = self.model.encode(
+                    [query],
+                    convert_to_numpy=True,
+                    device=device,
+                    normalize_embeddings=True
+                )
+                # Convert to FP16 after encoding
+                query_embedding = query_embedding.astype(np.float32)
+            if self.pca is not None:
+                query_embedding = self.pca.transform(query_embedding)
+            distances, indices = self.index.search(query_embedding, top_k)
+            retrieved_properties = []
+            for idx, dist in zip(indices[0], distances[0]):
+                property_data = df.iloc[idx]
+                retrieved_properties.append({
+                    "property": property_data,
+                    "image_url": property_data["property_image"],
+                    "distance": float(dist)
+                })
+            print(f"Retrieved {len(retrieved_properties)} properties")
+            return retrieved_properties
+        except Exception as e:
+            print(f"Error in retrieve: {str(e)}")
+            raise
+# Initialize components
+df = load_dataset()
+model_embedding = load_sentence_transformer()
+index = load_faiss_index()
+retriever = CustomRagRetriever(index, model_embedding)
+# Load tokenizer and LLM model
+def load_tokenizer_and_model():
+    print("Loading tokenizer...")
+    tokenizer = AutoTokenizer.from_pretrained(MODEL_DIR)
+    print("Tokenizer loaded successfully.")
+    print("Loading LLM model...")
+    model_llm = AutoModelForCausalLM.from_pretrained(MODEL_DIR).to(device)
+    print("LLM model loaded successfully.")
+    return tokenizer, model_llm
+tokenizer, model_llm = load_tokenizer_and_model()
+# Configure Cloudinary
+def configure_cloudinary():
+    print("Configuring Cloudinary...")
+    cloudinary.config(
+        cloud_name=CLOUDINARY_CLOUD_NAME,
+        api_key=CLOUDINARY_API_KEY,
+        api_secret=CLOUDINARY_API_SECRET
+    )
+    print("Cloudinary configured successfully.")
+configure_cloudinary()
+# Search real estate properties
+def search_real_estate(query, retriever, top_k=10, raw_results=False):
+    print(f"Searching real estate properties for query: {query}")
+    search_results = retriever.retrieve(query, top_k)
+    if raw_results:
+        return search_results
+    formatted_results = []
+    for result in search_results:
+        property_info = result['property']
+        formatted_result = {
+            "Property Name": property_info.get('PropertyName', 'N/A'),
+            "Address": property_info.get('Address', 'N/A'),
+            "ZipCode": int(float(property_info.get('ZipCode', 0))),
+            "LeasableSquareFeet": int(float(property_info.get('LeasableSquareFeet', 0))),
+            "YearBuilt": int(float(property_info.get('YearBuilt', 0))),
+            "NumberOfRooms": int(float(property_info.get('NumberOfRooms', 0))),
+            "ParkingSpaces": int(float(property_info.get('ParkingSpaces', 0))),
+            "PropertyManager": property_info.get('PropertyManager', 'N/A'),
+            "MarketValue": float(property_info.get('MarketValue', 0)),
+            "TaxAssessmentNumber": property_info.get('TaxAssessmentNumber', 'N/A'),
+            "Latitude": float(property_info.get('Latitude', 0)),
+            "Longitude": float(property_info.get('Longitude', 0)),
+            "CreateDate": property_info.get('CreateDate', 'N/A'),
+            "LastModifiedDate": property_info.get('LastModifiedDate', 'N/A'),
+            "City": property_info.get('City', 'N/A'),
+            "State": property_info.get('State', 'N/A'),
+            "Country": property_info.get('Country', 'N/A'),
+            "PropertyType": property_info.get('PropertyType', 'N/A'),
+            "PropertyStatus": property_info.get('PropertyStatus', 'N/A'),
+            "Description": property_info.get('Description', 'N/A'),
+            "ViewNumber": int(float(property_info.get('ViewNumber', 0))),
+            "Contact": int(float(property_info.get('Contact', 0))),
+            "TotalSquareFeet": int(float(property_info.get('TotalSquareFeet', 0))),
+            "IsDeleted": bool(property_info.get('IsDeleted', False)),
+            "Beds": int(float(property_info.get('Beds', 0))),
+            "Baths": int(float(property_info.get('Baths', 0))),
+            "AgentName": property_info.get('AgentName', 'N/A'),
+            "AgentPhoneNumber": property_info.get('AgentPhoneNumber', 'N/A'),
+            "AgentEmail": property_info.get('AgentEmail', 'N/A'),
+            "KeyFeatures": property_info.get('KeyFeatures', 'N/A'),
+            "NearbyAmenities": property_info.get('NearbyAmenities', 'N/A'),
+            "Property Image": result['image_url'],
+            "Distance": result['distance']
+        }
+        formatted_results.append(formatted_result)
+    print(f"Found {len(formatted_results)} matching properties")
+    return formatted_results
+# Generate response with optimized parameters
+def generate_response(query, max_new_tokens=100, temperature=0.7, top_k=30, top_p=0.8, repetition_penalty=1.05):
+    print(f"\nGenerating response for query: {query}\n")
+    # Print parameter settings
+    print("Generation Parameters:")
+    print(f"- Max New Tokens: {max_new_tokens}")
+    print(f"- Temperature: {temperature}")
+    print(f"- Top-K Sampling: {top_k}")
+    print(f"- Top-P Sampling: {top_p}")
+    print(f"- Repetition Penalty: {repetition_penalty}")
+    print(f"- Sampling Enabled: True (do_sample=True)\n")
+    input_text = f"User: {query}\nAssistant:"
+    inputs = tokenizer(input_text, return_tensors="pt").to(device)
+    start_time = time.time()  # Record start time
+    try:
+        outputs = model_llm.generate(
+            inputs.input_ids,
+            max_new_tokens=max_new_tokens,
+            temperature=temperature,
+            top_k=top_k,
+            top_p=top_p,
+            repetition_penalty=repetition_penalty,
+            do_sample=True,
+            eos_token_id=tokenizer.eos_token_id,
+            pad_token_id=tokenizer.pad_token_id
+        )
+        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+        response = response.replace(input_text, "").strip()
+        end_time = time.time()  # Record end time
+        duration = end_time - start_time  # Calculate duration
+        print(f"\nGenerated Response:\n{response}\n")
+        print(f"Time taken to generate response: {duration:.2f} seconds\n")
+        return response, duration
+    except Exception as e:
+        logging.error(f"Error generating response: {e}")
+        return "An error occurred while generating the response.", None
+# Combined model response with optimized parameters
+def combined_model_response(query, retriever, top_k=5, max_new_tokens=512, temperature=0.5, top_k_sampling=30, repetition_penalty=1.0):
+    print(f"Generating combined model response for query: {query}")
+    retrieved_results = search_real_estate(query, retriever, top_k, raw_results=True)
+    if not retrieved_results:
+        return "No relevant properties found."
+    combined_property_details = []
+    for i, result in enumerate(retrieved_results, 1):
+        property_info = result['property']
+        property_details = (
+            f"Property {i}:\n"
+            f"Property Name: {property_info['PropertyName']}\n"
+            f"Address: {property_info['Address']}, {property_info['City']}, {property_info['State']}, {property_info['ZipCode']}, {property_info['Country']}\n"
+            f"Leasable Area: {property_info['LeasableSquareFeet']} sqft\n"
+            f"Year Built: {property_info['YearBuilt']}\n"
+            f"Beds: {property_info['Beds']}  Baths: {property_info['Baths']}\n"
+            f"Parking Spaces: {property_info['ParkingSpaces']}\n"
+            f"Market Value: {property_info['MarketValue']}\n"
+            # f"Tax Assessment Number: {property_info['TaxAssessmentNumber']}\n"
+            # f"Coordinates: {property_info['Latitude']}, {property_info['Longitude']}\n"
+            f"Property Type: {property_info['PropertyType']}\n"
+            f"Property Status: {property_info['PropertyStatus']}\n"
+            f"Description: {property_info['Description']}\n"
+            # f"View Count: {property_info['ViewNumber']}\n"
+            f"Contact: {property_info['Contact']}\n"
+            f"Total Square Feet: {property_info['TotalSquareFeet']} sqft\n"
+            # f"Deleted: {'Yes' if property_info['IsDeleted'] else 'No'}\n"
+            f"Agent Name: {property_info['AgentName']}\n"
+            f"Agent Phone Number: {property_info['AgentPhoneNumber']}\n"
+            f"Agent Email: {property_info['AgentEmail']}\n"
+            f"Key Features: {property_info['KeyFeatures']}\n"
+            f"Nearby Amenities: {property_info['NearbyAmenities']}\n"
+            f"Created Date: {property_info['CreateDate']}\n"
+            f"Last Modified Date: {property_info['LastModifiedDate']}\n"
+        )
+        combined_property_details.append(property_details)
+    prompt = f"User Query: {query}\nProperty Details:\n" + "\n".join(combined_property_details) + "\nGenerate a concise response based on the user's query and retrieved property details."
+    print(f"User Query: {query}")
+    response, duration = generate_response(prompt, max_new_tokens=max_new_tokens)
+    print(f"Combined model response: {response}")
+    print(f"Time taken to generate combined model response: {duration:.2f} seconds\n")
+    return response, duration
+# VAD Audio Class
+class VADAudio:
+    def __init__(self, aggressiveness=3):
+        self.vad = webrtcvad.Vad(aggressiveness)
+        self.sample_rate = 16000
+        self.frame_duration_ms = 30
+    def frame_generator(self, audio, frame_duration_ms, sample_rate):
+        n = int(sample_rate * (frame_duration_ms / 1000.0))
+        offset = 0
+        while offset + n < len(audio):
+            yield audio[offset:offset + n]
+            offset += n
+    def vad_collector(self, audio, sample_rate, frame_duration_ms, padding_duration_ms=300, aggressiveness=3):
+        vad = webrtcvad.Vad(aggressiveness)
+        num_padding_frames = int(padding_duration_ms / frame_duration_ms)
+        ring_buffer = collections.deque(maxlen=num_padding_frames)
+        triggered = False
+        for frame in self.frame_generator(audio, frame_duration_ms, sample_rate):
+            is_speech = vad.is_speech(frame, sample_rate)
+            if not triggered:
+                ring_buffer.append((frame, is_speech))
+                num_voiced = len([f for f, speech in ring_buffer if speech])
+                if num_voiced > 0.9 * ring_buffer.maxlen:
+                    triggered = True
+                    for f, s in ring_buffer:
+                        yield f
+                    ring_buffer.clear()
+            else:
+                yield frame
+                ring_buffer.append((frame, is_speech))
+                num_unvoiced = len([f for f, speech in ring_buffer if not speech])
+                if num_unvoiced > 0.9 * ring_buffer.maxlen:
+                    triggered = False
+                    yield b''.join([f for f in ring_buffer])
+                    ring_buffer.clear()
+# Transcribe with VAD
+def transcribe_with_vad(audio_file):
+    vad_audio = VADAudio()
+    audio = AudioSegment.from_file(audio_file)
+    audio = audio.set_frame_rate(vad_audio.sample_rate).set_channels(1)
+    raw_audio = audio.raw_data
+    frames = vad_audio.vad_collector(raw_audio, vad_audio.sample_rate, vad_audio.frame_duration_ms)
+    for frame in frames:
+        if len(frame) > 0:
+            recognizer = sr.Recognizer()
+            audio_data = sr.AudioData(frame, vad_audio.sample_rate, audio.sample_width)
+            try:
+                text = recognizer.recognize_google(audio_data)
+                print(f"Transcription: {text}")
+                return text
+            except sr.UnknownValueError:
+                print("Google Speech Recognition could not understand the audio")
+            except sr.RequestError as e:
+                print(f"Could not request results from Google Speech Recognition service; {e}")
+    return ""
+@app.route('/')
+def index():
+    return render_template('index.html')
+@app.route('/search', methods=['POST'])
+def search():
+    try:
+        data = request.json
+        query = data.get('query')
+        session_id = data.get('session_id')
+        continue_conversation = data.get('continue', False)
+        if not query:
+            return jsonify({"error": "Query parameter is missing"}), 400
+        if session_id not in conversation_context or not continue_conversation:
+            search_results = retriever.retrieve(query)
+            formatted_results = []
+            for result in search_results:
+                property_info = result['property']
+                formatted_result = {
+                    "Property Name": property_info.get('PropertyName', 'N/A'),
+                    "Address": property_info.get('Address', 'N/A'),
+                    "ZipCode": int(float(property_info.get('ZipCode', 0))),
+                    "LeasableSquareFeet": int(float(property_info.get('LeasableSquareFeet', 0))),
+                    "YearBuilt": int(float(property_info.get('YearBuilt', 0))),
+                    "NumberOfRooms": int(float(property_info.get('NumberOfRooms', 0))),
+                    "ParkingSpaces": int(float(property_info.get('ParkingSpaces', 0))),
+                    "PropertyManager": property_info.get('PropertyManager', 'N/A'),
+                    "MarketValue": float(property_info.get('MarketValue', 0)),
+                    "TaxAssessmentNumber": property_info.get('TaxAssessmentNumber', 'N/A'),
+                    "City": property_info.get('City', 'N/A'),
+                    "State": property_info.get('State', 'N/A'),
+                    "Country": property_info.get('Country', 'N/A'),
+                    "PropertyType": property_info.get('PropertyType', 'N/A'),
+                    "PropertyStatus": property_info.get('PropertyStatus', 'N/A'),
+                    "Description": property_info.get('Description', 'N/A'),
+                    "ViewNumber": int(float(property_info.get('ViewNumber', 0))),
+                    "Contact": int(float(property_info.get('Contact', 0))),
+                    "TotalSquareFeet": int(float(property_info.get('TotalSquareFeet', 0))),
+                    "IsDeleted": bool(property_info.get('IsDeleted', False)),
+                    "Beds": int(float(property_info.get('Beds', 0))),
+                    "Baths": int(float(property_info.get('Baths', 0))),
+                    "AgentName": property_info.get('AgentName', 'N/A'),
+                    "AgentPhoneNumber": property_info.get('AgentPhoneNumber', 'N/A'),
+                    "AgentEmail": property_info.get('AgentEmail', 'N/A'),
+                    "KeyFeatures": property_info.get('KeyFeatures', 'N/A'),
+                    "NearbyAmenities": property_info.get('NearbyAmenities', 'N/A'),
+                    "Property Image": result['image_url'],
+                    "Distance": float(result['distance'])
+                }
+                formatted_results.append(formatted_result)
+            conversation_context[session_id] = formatted_results
+        else:
+            formatted_results = conversation_context[session_id]
+        print(f"Returning {len(formatted_results)} search results")
+        return jsonify(formatted_results)
+    except Exception as e:
+        logging.error(f"Error in search endpoint: {str(e)}")
+        return jsonify({"error": f"An error occurred: {str(e)}"}), 500
+@app.route('/transcribe', methods=['POST'])
+def transcribe():
+    if 'audio' not in request.files:
+        return jsonify({"error": "No audio file provided"}), 400
+    audio_file = request.files['audio']
+    # Ensure the file has an allowed extension
+    allowed_extensions = {'wav', 'mp3', 'ogg', 'webm'}
+    if '.' not in audio_file.filename or \
+       audio_file.filename.rsplit('.', 1)[1].lower() not in allowed_extensions:
+        return jsonify({"error": "Invalid audio file format"}), 400
+    try:
+        # Save the uploaded file temporarily
+        temp_dir = os.path.join(os.getcwd(), 'temp')
+        os.makedirs(temp_dir, exist_ok=True)
+        temp_path = os.path.join(temp_dir, 'temp_audio.' + audio_file.filename.rsplit('.', 1)[1].lower())
+        audio_file.save(temp_path)
+        # Convert audio to proper format if needed
+        audio = AudioSegment.from_file(temp_path)
+        audio = audio.set_channels(1)  # Convert to mono
+        audio = audio.set_frame_rate(16000)  # Set sample rate to 16kHz
+        # Save as WAV for speech recognition
+        wav_path = os.path.join(temp_dir, 'temp_audio.wav')
+        audio.export(wav_path, format="wav")
+        # Perform speech recognition
+        recognizer = sr.Recognizer()
+        with sr.AudioFile(wav_path) as source:
+            audio_data = recognizer.record(source)
+            text = recognizer.recognize_google(audio_data)
+        # Clean up temporary files
+        os.remove(temp_path)
+        os.remove(wav_path)
+        # Grammar correction
+        happy_tt = HappyTextToText("T5", "vennify/t5-base-grammar-correction")
+        settings = TTSettings(do_sample=True, top_k=50, temperature=0.7)
+        corrected_text = happy_tt.generate_text(f"grammar: {text}", args=settings)
+        print(f"Original Transcription: {text}")
+        print(f"Corrected Transcription: {corrected_text.text}")
+        return jsonify({
+            "transcription": corrected_text.text,
+            "original": text
+        })
+    except sr.UnknownValueError:
+        return jsonify({"error": "Could not understand audio"}), 400
+    except sr.RequestError as e:
+        return jsonify({"error": f"Google Speech Recognition error: {str(e)}"}), 500
+    except Exception as e:
+        logging.error(f"Error processing audio: {str(e)}")
+        return jsonify({"error": f"Audio processing error: {str(e)}"}), 500
+    finally:
+        # Ensure temp files are cleaned up even if an error occurs
+        if 'temp_path' in locals() and os.path.exists(temp_path):
+            os.remove(temp_path)
+        if 'wav_path' in locals() and os.path.exists(wav_path):
+            os.remove(wav_path)
+@app.route('/generate', methods=['POST'])
+def generate():
+    data = request.json
+    query = data.get('query')
+    session_id = data.get('session_id')
+    continue_conversation = data.get('continue', False)
+    if not query:
+        return jsonify({"error": "Query parameter is missing"}), 400
+    if session_id in conversation_context and continue_conversation:
+        previous_results = conversation_context[session_id]
+        combined_query = f"Based on previous results:{previous_results}New Query: {query}"
+        response, duration = generate_response(combined_query)
+    else:
+        response, duration = generate_response(query)
+        conversation_context[session_id] = response
+    print(f"Generated response: {response}")
+    print(f"Time taken to generate response: {duration:.2f} seconds\n")
+    return jsonify({"response": response, "duration": duration})
+@app.route('/recommend', methods=['POST'])
+def recommend():
+    data = request.json
+    query = data.get('query')
+    session_id = data.get('session_id')
+    continue_conversation = data.get('continue', False)
+    if not query:
+        return jsonify({"error": "Query parameter is missing"}), 400
+    if query.lower() == 'hi':
+        return jsonify({"response": "Do you want to know the properties located near you? (yes/no):"})
+    if query.lower() == 'yes':
+        if session_id in conversation_context and 'location' in conversation_context[session_id]:
+            latitude, longitude = conversation_context[session_id]['location']
+        else:
+            return jsonify({"error": "Location not available. Please try again."}), 400
+        my_location = (latitude, longitude)
+        # Filter out rows with invalid coordinates before calculating distances
+        valid_properties = df[
+            df['Latitude'].apply(lambda x: isinstance(x, (int, float)) or (isinstance(x, str) and x.replace('.', '').isdigit())) &
+            df['Longitude'].apply(lambda x: isinstance(x, (int, float)) or (isinstance(x, str) and x.replace('.', '').isdigit()))
+        ].copy()
+        # Convert coordinates to float
+        valid_properties['Latitude'] = valid_properties['Latitude'].astype(float)
+        valid_properties['Longitude'] = valid_properties['Longitude'].astype(float)
+        # Calculate distances for valid properties
+        valid_properties['Distance'] = valid_properties.apply(
+            lambda row: geodesic(my_location, (row['Latitude'], row['Longitude'])).miles,
+            axis=1
+        )
+        # Get 5 nearest properties
+        nearest_properties = valid_properties.nsmallest(5, 'Distance')
+        nearest_properties_list = nearest_properties[[
+            'PropertyName', 'Address', 'City', 'Distance',
+            'PropertyType', 'AgentPhoneNumber'
+        ]].to_dict(orient='records')
+        if not nearest_properties_list:
+            return jsonify({"response": "No valid properties found near your location."})
+        return jsonify({
+            "response": "Here are the 5 nearest properties to your location:",
+            "properties": nearest_properties_list
+        })
+    if session_id in conversation_context and continue_conversation:
+        previous_results = conversation_context[session_id]
+        combined_query = f"Based on previous results:{previous_results}New Query: {query}"
+        response, duration = combined_model_response(combined_query, retriever)
+    else:
+        response, duration = combined_model_response(query, retriever)
+        conversation_context[session_id] = response
+    print(f"Recommended response: {response}")
+    print(f"Time taken to generate recommended response: {duration:.2f} seconds\n")
+    return jsonify({"response": response, "duration": duration})
+@app.route('/set-location', methods=['POST'])
+def set_location():
+    data = request.json
+    latitude = data.get('latitude')
+    longitude = data.get('longitude')
+    session_id = data.get('session_id')
+    if latitude is None or longitude is None:
+        return jsonify({"error": "Location parameters are missing"}), 400
+    try:
+        # Initialize the geolocator
+        geolocator = Nominatim(user_agent="hive_prop")
+        # Get location details from coordinates
+        location = geolocator.reverse(f"{latitude}, {longitude}", language='en')
+        if location and location.raw.get('address'):
+            address = location.raw['address']
+            city = address.get('city') or address.get('town') or address.get('suburb') or address.get('county')
+            state = address.get('state')
+            country = address.get('country')
+            # Store location data in conversation context
+            conversation_context[session_id] = {
+                'location': (latitude, longitude),
+                'city': city,
+                'state': state,
+                'country': country
+            }
+            return jsonify({
+                "message": "Location set successfully.",
+                "city": city,
+                "state": state,
+                "country": country
+            })
+        else:
+            return jsonify({"error": "Could not determine city from coordinates"}), 400
+    except Exception as e:
+        logging.error(f"Error getting location details: {str(e)}")
+        return jsonify({"error": f"Error processing location: {str(e)}"}), 500
+if __name__ == '__main__':
+    # For Hugging Face Spaces, we need to listen on 0.0.0.0:7860
+    app.run(host='0.0.0.0', port=7860)

requirements.txt ADDED Viewed

	@@ -0,0 +1,16 @@

+flask==2.0.1
+flask-cors==3.0.10
+torch==2.0.1
+transformers==4.30.2
+sentence-transformers==2.2.2
+faiss-cpu==1.7.4
+pandas==1.5.3
+numpy==1.24.3
+geopy==2.3.0
+geocoder==1.38.1
+cloudinary==1.33.0
+pydub==0.25.1
+SpeechRecognition==3.10.0
+webrtcvad==2.0.10
+happytransformer==2.4.1
+Werkzeug==2.0.3