Minerva666's picture
Upload 13 files
fbdac33 verified
import os
import gradio as gr
from sentence_transformers import SentenceTransformer
import faiss
from transformers import pipeline
import threading
# -------------------- PATHS (FLATTENED) --------------------
DENSITY_FILE = "density_context.txt"
DESA_FILE = "desa_context.txt"
CR_GEOLOCS = "desa_release_summary.txt"
HYDRO_FILE = "hydro_context.txt"
SUMMARY1 = "trimmed_summary.txt"
SUMMARY2 = "trimmed_summary2.txt"
DENSITY_VIZ = "orangutan_occ1.png"
DESA_VIZ = "orangutan_desa.png"
CAPTURE_RELEASE = "orangutan_capture_release.png"
RIVERS = "rivers_rds_op1.png"
PAS = "pas.png"
# -------------------- LOAD CONTEXT FILES --------------------
context_texts = []
doc_names = []
context_map = {}
files = [
(DENSITY_FILE, DENSITY_VIZ),
(DESA_FILE, DESA_VIZ),
(CR_GEOLOCS, CAPTURE_RELEASE),
(HYDRO_FILE, RIVERS),
(SUMMARY1, PAS),
(SUMMARY2, PAS),
]
for file_path, img_path in files:
if os.path.exists(file_path):
with open(file_path, "r", encoding="utf-8") as f:
content = f.read()
context_texts.append(content)
doc_names.append(os.path.basename(file_path))
context_map[os.path.basename(file_path)] = (content, img_path)
if not context_texts:
raise ValueError("No context files found. Please upload your context text files.")
# -------------------- BUILD FAISS INDEX --------------------
embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
doc_embeddings = embedder.encode(context_texts, convert_to_numpy=True)
dimension = doc_embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(doc_embeddings)
# -------------------- Q&A FUNCTION --------------------
def translate_with_timeout(text, timeout_sec=15):
result = {'text': None}
def translate():
translator = pipeline("translation", model="Helsinki-NLP/opus-mt-en-id")
chunks = [text[i:i+500] for i in range(0, len(text), 500)]
translated = [translator(chunk)[0]['translation_text'] for chunk in chunks]
result['text'] = " ".join(translated)
thread = threading.Thread(target=translate)
thread.start()
thread.join(timeout_sec)
if thread.is_alive():
return None
return result['text']
def answer_question(question, language):
q_embedding = embedder.encode([question], convert_to_numpy=True)
D, I = index.search(q_embedding, k=6)
ranked_results = sorted(zip(D[0], I[0]), key=lambda x: x[0])[:3]
response_texts = []
response_images = []
for _, idx in ranked_results:
fname = doc_names[idx]
ctx, img = context_map[fname]
response_texts.append(ctx)
response_images.append(img)
final_answer = "\n\n".join(response_texts)
if language == "Bahasa Indonesia":
translated = translate_with_timeout(final_answer, timeout_sec=20)
if translated:
final_answer = translated
else:
final_answer = "(⚠️ Auto-translator timed out. Showing English version.)\n\n" + final_answer
return final_answer, response_images
# -------------------- GRADIO UI --------------------
with gr.Blocks() as demo:
gr.Markdown("# 🏢 Orangutan Conservation Q&A (Multi-Source Data)")
with gr.Tab("Ask a Question"):
q = gr.Textbox(label="Your Question", placeholder="e.g., How far are sightings from rivers?")
lang = gr.Dropdown(["English", "Bahasa Indonesia"], value="English", label="Language")
ans = gr.Textbox(label="Answer", lines=12)
imgs = gr.Gallery(label="Maps", columns=2, height=360)
btn = gr.Button("Get Answer")
btn.click(answer_question, inputs=[q, lang], outputs=[ans, imgs])
# ✅ Launch app
demo.launch()