Spaces:
Sleeping
Sleeping
| import os | |
| import json | |
| import numpy as np | |
| import pandas as pd | |
| import faiss | |
| import anthropic | |
| import streamlit as st | |
| from sentence_transformers import SentenceTransformer | |
| from pathlib import Path | |
| st.set_page_config( | |
| page_title="DAS / Fiber Optics Research Assistant", | |
| page_icon="", | |
| layout="centered", | |
| ) | |
| st.title("DAS & Fiber Optics Research Assistant") | |
| st.caption( | |
| "Ask questions about distributed acoustic sensing (DAS) and fiber optic " | |
| "technology in oilfield applications. Answers are grounded in scientific papers." | |
| ) | |
| def load_rag(): | |
| index_dir = Path("rag_index") | |
| with open(index_dir / "config.json") as f: | |
| cfg = json.load(f) | |
| model = SentenceTransformer(cfg["embed_model"]) | |
| index = faiss.read_index(str(index_dir / "chunks.index")) | |
| chunks = pd.read_parquet(index_dir / "chunks.parquet") | |
| return model, index, chunks | |
| embed_model, faiss_index, df_chunks = load_rag() | |
| def retrieve(query: str, top_k: int = 5): | |
| q_emb = embed_model.encode([query], normalize_embeddings=True).astype(np.float32) | |
| scores, indices = faiss_index.search(q_emb, top_k) | |
| results = df_chunks.iloc[indices[0]].copy() | |
| results["score"] = scores[0] | |
| return results | |
| def build_prompt(query: str, chunks_df) -> str: | |
| parts = [] | |
| for i, (_, row) in enumerate(chunks_df.iterrows(), 1): | |
| parts.append(f"[Source {i}] {row.doc_title} ({row.doc_year})\n{row.text}") | |
| context = "\n\n".join(parts) | |
| return ( | |
| "You are a technical assistant specializing in fiber optic sensing " | |
| "and DAS for oilfield applications. Answer using ONLY the excerpts below. " | |
| "Cite sources like [Source N].\n\n" | |
| f"--- EXCERPTS ---\n{context}\n--- END ---\n\n" | |
| f"Question: {query}\n\nAnswer:" | |
| ) | |
| with st.form("query_form"): | |
| query = st.text_area( | |
| "Your question", | |
| placeholder="e.g. How does DAS detect hydraulic fracture propagation?", | |
| height=100, | |
| ) | |
| top_k = st.slider("Number of sources to retrieve", 3, 10, 5) | |
| submitted = st.form_submit_button("Ask", type="primary") | |
| if submitted: | |
| if not query.strip(): | |
| st.warning("Please enter a question.") | |
| st.stop() | |
| with st.spinner("Searching papers and generating answer..."): | |
| try: | |
| retrieved = retrieve(query, top_k=top_k) | |
| prompt = build_prompt(query, retrieved) | |
| api_key = os.environ.get("ANTHROPIC_API_KEY", "") | |
| if not api_key: | |
| st.error("ANTHROPIC_API_KEY not set in Space secrets.") | |
| st.stop() | |
| client = anthropic.Anthropic(api_key=api_key) | |
| message = client.messages.create( | |
| model="claude-haiku-4-5-20251001", | |
| max_tokens=1024, | |
| messages=[{"role": "user", "content": prompt}], | |
| ) | |
| answer = message.content[0].text | |
| except Exception as e: | |
| st.error(f"Something went wrong: {e}") | |
| st.stop() | |
| st.subheader("Answer") | |
| st.write(answer) | |
| st.subheader("Retrieved sources") | |
| for i, (_, row) in enumerate(retrieved.iterrows(), 1): | |
| with st.expander(f"[Source {i}] {row.doc_title} (score: {row.score:.3f})"): | |
| st.write(f"**Year:** {row.doc_year}") | |
| if row.doc_url.startswith("http"): | |
| st.write(f"**URL:** {row.doc_url}") | |
| st.write(row.text) |