Spaces:
Sleeping
Sleeping
File size: 2,453 Bytes
b2ed227 54fe94f cedeed1 cd12519 b2ed227 54fe94f cd12519 cedeed1 cd12519 a86df9f b2ed227 3131056 cd12519 cedeed1 54fe94f cd12519 54fe94f a86df9f b2ed227 cedeed1 a86df9f b2ed227 cedeed1 cd12519 3131056 cedeed1 8244968 adbd4ba 8244968 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 | import os
import shutil
import streamlit as st
import chromadb
from chromadb.config import Settings, DEFAULT_TENANT, DEFAULT_DATABASE
from sentence_transformers import SentenceTransformer
from transformers import pipeline
from weather import get_weather_summary
from travel import get_travel_spots
import spacy
# Ensure a fresh Chroma DB directory (temporary path)
CHROMA_PATH = "/tmp/chroma"
if os.path.exists(CHROMA_PATH):
shutil.rmtree(CHROMA_PATH)
os.makedirs(CHROMA_PATH, exist_ok=True)
# Initialize Chroma using the new client API
client = chromadb.PersistentClient(
path=CHROMA_PATH,
settings=Settings(),
tenant=DEFAULT_TENANT,
database=DEFAULT_DATABASE,
)
db = client.get_or_create_collection("disaster_news")
# Load models
embed_model = SentenceTransformer("all-MiniLM-L6-v2")
qa_pipeline = pipeline("text2text-generation", model="google/flan-t5-base")
nlp = spacy.load("en_core_web_sm")
# Keyword lists
weather_keywords = ["weather", "forecast", "rain", "snow", "temperature", "wind", "climate", "humid", "cold", "hot"]
travel_keywords = ["visit", "travel", "tourist", "see", "go", "spots", "places", "explore", "attractions"]
# Extract location and intent
def extract_location_and_intent(query):
doc = nlp(query)
locations = [ent.text for ent in doc.ents if ent.label_ in ("GPE", "LOC")]
location = locations[0] if locations else None
is_weather = any(word in query.lower() for word in weather_keywords)
is_travel = any(word in query.lower() for word in travel_keywords)
return location, is_weather, is_travel
# Fallback: semantic search + QA
def query_rag_system(query):
query_emb = embed_model.encode(query).tolist()
results = db.query(query_embeddings=[query_emb], n_results=5)
retrieved_docs = results.get("documents", [[]])[0]
if not retrieved_docs:
return "No relevant disaster info found in the database."
context = "\n".join(retrieved_docs)
prompt = (
f"You are a helpful assistant. Based on the context below, answer the question.\n\n"
f"Context:\n{context}\n\n"
f"Question: {query}\n\n"
f"Answer clearly and specifically:"
)
output = qa_pipeline(prompt, max_new_tokens=200)[0]["generated_text"]
if output.strip().lower().startswith("here's what i found"):
output = output.replace("Here's what I found:", "").strip()
return output if output else "No relevant answer could be generated."
|