Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import requests | |
| from bs4 import BeautifulSoup | |
| from langchain.schema import Document | |
| from langchain.text_splitter import CharacterTextSplitter | |
| from langchain_community.embeddings import HuggingFaceEmbeddings | |
| from langchain_community.vectorstores import FAISS | |
| from transformers import pipeline | |
| # β Verified healthcare sources | |
| urls = [ | |
| # CDC | |
| "https://www.cdc.gov/asthma/faqs.htm", | |
| "https://www.cdc.gov/bloodpressure/index.htm", | |
| "https://www.cdc.gov/diabetes/basics/index.html", | |
| "https://www.cdc.gov/obesity/data/adult.html", | |
| "https://www.cdc.gov/nutrition/data-statistics/index.html", | |
| "https://www.cdc.gov/flu/about/index.html", | |
| "https://www.cdc.gov/hepatitis/hbv/index.htm", | |
| "https://www.cdc.gov/stroke/about.htm", | |
| "https://www.cdc.gov/arthritis/basics/index.html", | |
| "https://www.cdc.gov/kidneydisease/publications-resources/index.html", | |
| "https://www.cdc.gov/vaccines/vpd/vaccines-list.html", | |
| "https://www.cdc.gov/coronavirus/2019-ncov/your-health/about-covid-19/basics-covid-19.html", | |
| "https://www.cdc.gov/rsv/about/symptoms.html", | |
| "https://www.cdc.gov/tb/topic/basics/default.htm", | |
| # WHO | |
| "https://www.who.int/news-room/fact-sheets/detail/mental-disorders", | |
| "https://www.who.int/news-room/fact-sheets/detail/influenza-(seasonal)", | |
| "https://www.who.int/news-room/questions-and-answers/item/vaccines-and-immunization-what-is-vaccination", | |
| # Mayo Clinic | |
| "https://www.mayoclinic.org/diseases-conditions/asthma/symptoms-causes/syc-20369653", | |
| "https://www.mayoclinic.org/diseases-conditions/depression/symptoms-causes/syc-20356007", | |
| "https://www.mayoclinic.org/diseases-conditions/high-blood-pressure/symptoms-causes/syc-20373410", | |
| "https://www.mayoclinic.org/diseases-conditions/type-2-diabetes/symptoms-causes/syc-20351193", | |
| "https://www.mayoclinic.org/healthy-lifestyle/nutrition-and-healthy-eating/in-depth/nutrition/art-20049340", | |
| # MedlinePlus | |
| "https://medlineplus.gov/asthma.html", | |
| "https://medlineplus.gov/depression.html", | |
| "https://medlineplus.gov/diabetes.html", | |
| "https://medlineplus.gov/influenza.html", | |
| "https://medlineplus.gov/vaccines.html", | |
| "https://medlineplus.gov/coronaryarterydisease.html", | |
| # NIH | |
| "https://www.nih.gov/about-nih/what-we-do/nih-almanac/national-heart-lung-blood-institute-nhlbi", | |
| "https://www.nih.gov/news-events/nih-research-matters/brain-aging-tied-cell-cleanup-problems", | |
| "https://www.nih.gov/news-events/nih-research-matters/covid-19-causes-changes-brain", | |
| # NIMH | |
| "https://www.nimh.nih.gov/health/topics/depression", | |
| "https://www.nimh.nih.gov/health/topics/anxiety-disorders", | |
| "https://www.nimh.nih.gov/health/publications/depression", | |
| "https://www.nimh.nih.gov/health/publications/anxiety-disorders", | |
| # U.S. Dept of Health | |
| "https://health.gov/myhealthfinder/topics/health-conditions/diabetes", | |
| "https://health.gov/myhealthfinder/topics/everyday-healthy-living/nutrition", | |
| "https://health.gov/myhealthfinder/topics/everyday-healthy-living/physical-activity", | |
| # John Hopkins Medicine | |
| "https://www.hopkinsmedicine.org/health/conditions-and-diseases/high-blood-pressure-hypertension", | |
| "https://www.hopkinsmedicine.org/health/conditions-and-diseases/diabetes", | |
| # Cleveland Clinic | |
| "https://my.clevelandclinic.org/health/diseases/22276-long-covid", | |
| "https://my.clevelandclinic.org/health/diseases/9634-diabetes", | |
| # Harvard Health | |
| "https://www.health.harvard.edu/mental-health/what-causes-depression", | |
| "https://www.health.harvard.edu/staying-healthy/foods-linked-to-better-brainpower", | |
| "https://www.health.harvard.edu/newsletter_article/how-sleep-cleans-your-brain", | |
| # Health.gov | |
| "https://www.health.gov/healthypeople", | |
| "https://www.health.gov/news", | |
| # NIH Office of Dietary Supplements | |
| "https://ods.od.nih.gov/factsheets/Iron-HealthProfessional/", | |
| "https://ods.od.nih.gov/factsheets/VitaminD-HealthProfessional/", | |
| # Red Cross | |
| "https://www.redcross.org/about-us/news-and-events/news/2023/blood-donation-facts.html" | |
| ] | |
| # π§½ Scraper function | |
| def scrape_url(url): | |
| try: | |
| response = requests.get(url, timeout=10) | |
| soup = BeautifulSoup(response.content, "html.parser") | |
| for tag in soup(["nav", "footer", "script", "style", "header", "noscript", "iframe"]): | |
| tag.decompose() | |
| text = "\n\n".join(p.get_text(strip=True) for p in soup.find_all("p") if len(p.get_text(strip=True)) > 50) | |
| return Document(page_content=text, metadata={"source": url}) | |
| except Exception as e: | |
| print(f"Error scraping {url}: {e}") | |
| return None | |
| # π§ Vector store | |
| def prepare_knowledge_base(): | |
| docs = [scrape_url(url) for url in urls] | |
| docs = [doc for doc in docs if doc] | |
| splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50) | |
| chunks = splitter.split_documents(docs) | |
| embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") | |
| return FAISS.from_documents(chunks, embeddings) | |
| # π€ Load LLM | |
| def load_llm(): | |
| return pipeline( | |
| "text2text-generation", | |
| model="google/flan-t5-base", | |
| device=-1, | |
| max_new_tokens=400, | |
| do_sample=False, | |
| num_beams=1 | |
| ) | |
| # πΌοΈ UI | |
| st.set_page_config(page_title="Healthcare RAG", layout="centered") | |
| st.title("π₯ Healthcare RAG Assistant") | |
| st.markdown("Ask your question. The assistant will summarize info from verified sources like CDC, WHO, Mayo Clinic, and NIMH.") | |
| # Load model and vectorstore | |
| with st.spinner("Loading verified knowledge base..."): | |
| vectorstore = prepare_knowledge_base() | |
| llm = load_llm() | |
| question = st.text_input("π¬ Ask a healthcare question:") | |
| if question: | |
| with st.spinner("Generating expert response..."): | |
| retriever = vectorstore.as_retriever(search_kwargs={"k": 2}) | |
| context_docs = retriever.get_relevant_documents(question) | |
| context = "\n\n".join(doc.page_content for doc in context_docs)[:2000] # trim context for faster generation | |
| prompt = f""" | |
| You are a helpful and professional healthcare assistant. You have access to trusted health documents from CDC, Mayo Clinic, WHO, and NIMH. | |
| Using only the context below, summarize the information in a clear, helpful, and medically accurate way. Do not copy or fabricate facts. Structure the response using short paragraphs or bullet points. Act like you're writing a high-quality health blog or patient education guide. | |
| Context: | |
| {context} | |
| Question: {question} | |
| Answer: | |
| """ | |
| response = llm(prompt) | |
| st.success(response[0]['generated_text']) | |
| with st.expander("π Sources used"): | |
| for doc in context_docs: | |
| st.markdown(f"- [{doc.metadata['source']}]({doc.metadata['source']})") | |