Spaces:
Sleeping
Sleeping
| import os | |
| import streamlit as st | |
| import pickle | |
| import time | |
| import requests | |
| from bs4 import BeautifulSoup | |
| # ---- LangChain Community Packages ---- | |
| from langchain_community.embeddings import HuggingFaceEmbeddings | |
| from langchain_community.vectorstores import FAISS | |
| from langchain.chains import RetrievalQAWithSourcesChain | |
| # ---- LangChain Core Packages ---- | |
| from langchain_core.prompts import ChatPromptTemplate | |
| from langchain_core.output_parsers import StrOutputParser | |
| from langchain_core.documents import Document # β Correct import | |
| # ---- Text Splitters ---- | |
| from langchain_text_splitters import RecursiveCharacterTextSplitter | |
| # ---- LLM ---- | |
| from langchain_groq import ChatGroq | |
| st.title("RockyBot: News Research Tool π") | |
| st.sidebar.title("News Article URLs") | |
| # Collect URLs from user input | |
| urls = [st.sidebar.text_input(f"URL {i+1}") for i in range(3)] | |
| process_url_clicked = st.sidebar.button("Process URLs") | |
| file_path = "faiss_store_openai.pkl" | |
| main_placeholder = st.empty() | |
| llm = ChatGroq( | |
| api_key=os.environ["GROQ_API_KEY"], | |
| model_name="llama-3.1-8b-instant" | |
| ) | |
| def fetch_web_content(url): | |
| """Fetch text content from URL using BeautifulSoup.""" | |
| try: | |
| response = requests.get(url, timeout=10) | |
| response.raise_for_status() | |
| soup = BeautifulSoup(response.text, "html.parser") | |
| return soup.get_text() | |
| except Exception as e: | |
| return f"Error fetching {url}: {str(e)}" | |
| if process_url_clicked: | |
| main_placeholder.text("Data Loading...Started...β ") | |
| data = [(url, fetch_web_content(url)) for url in urls if url.strip()] | |
| main_placeholder.text("Data Loading...Completed...β ") | |
| # Split into chunks | |
| text_splitter = RecursiveCharacterTextSplitter( | |
| separators=['\n\n', '\n', '.', ','], | |
| chunk_size=1000 | |
| ) | |
| main_placeholder.text("Text Splitting...Started...") | |
| docs = [] | |
| for url, text in data: | |
| split_docs = text_splitter.split_text(text) | |
| docs.extend([Document(page_content=chunk, metadata={"source": url}) for chunk in split_docs]) | |
| main_placeholder.text("Text Splitting...Completed...") | |
| # Embeddings + FAISS | |
| embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") | |
| vectorstore = FAISS.from_documents(docs, embedding_model) | |
| with open(file_path, "wb") as f: | |
| pickle.dump(vectorstore, f) | |
| main_placeholder.text("Vectorstore Ready! β ") | |
| # -------------------------- | |
| # π₯ NEW CHAIN (REPLACES RetrievalQAWithSourcesChain) | |
| # -------------------------- | |
| prompt = ChatPromptTemplate.from_template(""" | |
| You are a financial news analyst. | |
| Use ONLY the provided context to answer. | |
| Context: | |
| {context} | |
| Question: {question} | |
| Provide: | |
| - a concise answer | |
| - list of sources at the end | |
| """) | |
| def build_qa_with_sources_chain(retriever): | |
| """ | |
| New-style LangChain retrieval chain that returns answer + sources. | |
| """ | |
| # LLM + prompt β document chain | |
| document_chain = create_stuff_documents_chain( | |
| llm=llm, | |
| prompt=prompt, | |
| output_parser=StrOutputParser() | |
| ) | |
| # Retriever + doc chain | |
| retrieval_chain = create_retrieval_chain( | |
| retriever=retriever, | |
| combine_docs_chain=document_chain, | |
| ) | |
| # Wrapper to format consistent outputs | |
| class QAWrapper: | |
| def __init__(self, chain): | |
| self.chain = chain | |
| def invoke(self, query): | |
| result = self.chain.invoke({"question": query}) | |
| answer = result.get("answer") or result.get("result") or "" | |
| docs = result.get("context") or [] | |
| sources = [d.metadata.get("source", "N/A") for d in docs] | |
| return {"answer": answer, "sources": sources} | |
| return QAWrapper(retrieval_chain) | |
| # -------------------------- | |
| # QUERY EXECUTION | |
| # -------------------------- | |
| query = st.text_input("Question: ") | |
| if query: | |
| if os.path.exists(file_path): | |
| # Load FAISS DB | |
| with open(file_path, "rb") as f: | |
| vectorstore = pickle.load(f) | |
| retriever = vectorstore.as_retriever(search_kwargs={"k": 4}) | |
| qa_chain = build_qa_with_sources_chain(retriever) | |
| result = qa_chain.invoke(query) | |
| # Display result | |
| st.header("Answer") | |
| st.write(result["answer"]) | |
| st.subheader("Sources") | |
| if result["sources"]: | |
| for s in result["sources"]: | |
| st.write("π", s) | |
| else: | |
| st.write("No sources found.") | |