fiber-assistant / app.py
Roman79's picture
Upload 2 files
5924739 verified
import os
import json
import numpy as np
import pandas as pd
import faiss
import anthropic
import streamlit as st
from sentence_transformers import SentenceTransformer
from pathlib import Path
st.set_page_config(
page_title="DAS / Fiber Optics Research Assistant",
page_icon="",
layout="centered",
)
st.title("DAS & Fiber Optics Research Assistant")
st.caption(
"Ask questions about distributed acoustic sensing (DAS) and fiber optic "
"technology in oilfield applications. Answers are grounded in scientific papers."
)
@st.cache_resource
def load_rag():
index_dir = Path("rag_index")
with open(index_dir / "config.json") as f:
cfg = json.load(f)
model = SentenceTransformer(cfg["embed_model"])
index = faiss.read_index(str(index_dir / "chunks.index"))
chunks = pd.read_parquet(index_dir / "chunks.parquet")
return model, index, chunks
embed_model, faiss_index, df_chunks = load_rag()
def retrieve(query: str, top_k: int = 5):
q_emb = embed_model.encode([query], normalize_embeddings=True).astype(np.float32)
scores, indices = faiss_index.search(q_emb, top_k)
results = df_chunks.iloc[indices[0]].copy()
results["score"] = scores[0]
return results
def build_prompt(query: str, chunks_df) -> str:
parts = []
for i, (_, row) in enumerate(chunks_df.iterrows(), 1):
parts.append(f"[Source {i}] {row.doc_title} ({row.doc_year})\n{row.text}")
context = "\n\n".join(parts)
return (
"You are a technical assistant specializing in fiber optic sensing "
"and DAS for oilfield applications. Answer using ONLY the excerpts below. "
"Cite sources like [Source N].\n\n"
f"--- EXCERPTS ---\n{context}\n--- END ---\n\n"
f"Question: {query}\n\nAnswer:"
)
with st.form("query_form"):
query = st.text_area(
"Your question",
placeholder="e.g. How does DAS detect hydraulic fracture propagation?",
height=100,
)
top_k = st.slider("Number of sources to retrieve", 3, 10, 5)
submitted = st.form_submit_button("Ask", type="primary")
if submitted:
if not query.strip():
st.warning("Please enter a question.")
st.stop()
with st.spinner("Searching papers and generating answer..."):
try:
retrieved = retrieve(query, top_k=top_k)
prompt = build_prompt(query, retrieved)
api_key = os.environ.get("ANTHROPIC_API_KEY", "")
if not api_key:
st.error("ANTHROPIC_API_KEY not set in Space secrets.")
st.stop()
client = anthropic.Anthropic(api_key=api_key)
message = client.messages.create(
model="claude-haiku-4-5-20251001",
max_tokens=1024,
messages=[{"role": "user", "content": prompt}],
)
answer = message.content[0].text
except Exception as e:
st.error(f"Something went wrong: {e}")
st.stop()
st.subheader("Answer")
st.write(answer)
st.subheader("Retrieved sources")
for i, (_, row) in enumerate(retrieved.iterrows(), 1):
with st.expander(f"[Source {i}] {row.doc_title} (score: {row.score:.3f})"):
st.write(f"**Year:** {row.doc_year}")
if row.doc_url.startswith("http"):
st.write(f"**URL:** {row.doc_url}")
st.write(row.text)