Spaces:
Sleeping
Sleeping
Commit ·
cb953ef
0
Parent(s):
first commit
Browse files- .env +2 -0
- .github/workflows/main.yml +31 -0
- Dockerfile +19 -0
- __pycache__/backend.cpython-313.pyc +0 -0
- app.py +79 -0
- backend.py +117 -0
- requirements.txt +10 -0
.env
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
GROQ_API_KEY="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"
|
| 2 |
+
TAVILY_API_KEY="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"
|
.github/workflows/main.yml
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: Sync to Hugging Face Hub
|
| 2 |
+
|
| 3 |
+
on:
|
| 4 |
+
push:
|
| 5 |
+
branches: [main]
|
| 6 |
+
|
| 7 |
+
# This allows you to run it manually from the Actions tab for testing
|
| 8 |
+
workflow_dispatch:
|
| 9 |
+
|
| 10 |
+
jobs:
|
| 11 |
+
sync-to-hub:
|
| 12 |
+
runs-on: ubuntu-latest
|
| 13 |
+
steps:
|
| 14 |
+
- uses: actions/checkout@v3
|
| 15 |
+
with:
|
| 16 |
+
fetch-depth: 0
|
| 17 |
+
lfs: true
|
| 18 |
+
|
| 19 |
+
- name: Push to hub
|
| 20 |
+
env:
|
| 21 |
+
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
| 22 |
+
run: |
|
| 23 |
+
git config --global user.email "your-email@example.com"
|
| 24 |
+
git config --global user.name "Your Name"
|
| 25 |
+
|
| 26 |
+
# Add Hugging Face as a remote repository
|
| 27 |
+
# REPLACE 'your-username' AND 'space-name' BELOW:
|
| 28 |
+
git remote add space https://your-username:$HF_TOKEN@huggingface.co/spaces/your-username/space-name
|
| 29 |
+
|
| 30 |
+
# Force push to the Space
|
| 31 |
+
git push --force space main
|
Dockerfile
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Use a lightweight Python version
|
| 2 |
+
FROM python:3.9-slim
|
| 3 |
+
|
| 4 |
+
# Set the working directory inside the container
|
| 5 |
+
WORKDIR /app
|
| 6 |
+
|
| 7 |
+
# Copy your files to the container
|
| 8 |
+
COPY . .
|
| 9 |
+
|
| 10 |
+
# Install dependencies
|
| 11 |
+
# We use --no-cache-dir to keep the image small
|
| 12 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 13 |
+
|
| 14 |
+
# Expose port 7860 (Required for Hugging Face Spaces)
|
| 15 |
+
EXPOSE 7860
|
| 16 |
+
|
| 17 |
+
# Command to run the app
|
| 18 |
+
# Notice we force the port to 7860 here
|
| 19 |
+
CMD ["streamlit", "run", "app.py", "--server.port=7860", "--server.address=0.0.0.0"]
|
__pycache__/backend.cpython-313.pyc
ADDED
|
Binary file (4.33 kB). View file
|
|
|
app.py
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import backend # Importing the logic file we created earlier
|
| 3 |
+
|
| 4 |
+
# --- PAGE CONFIGURATION ---
|
| 5 |
+
st.set_page_config(
|
| 6 |
+
page_title="The Devil's Advocate",
|
| 7 |
+
page_icon="😈",
|
| 8 |
+
layout="centered"
|
| 9 |
+
)
|
| 10 |
+
|
| 11 |
+
# --- SESSION STATE INITIALIZATION ---
|
| 12 |
+
# This keeps the chat history and database alive when the user clicks buttons
|
| 13 |
+
if "messages" not in st.session_state:
|
| 14 |
+
st.session_state.messages = [
|
| 15 |
+
{"role": "assistant", "content": "I am the Devil's Advocate. State an opinion, and I will find evidence to challenge it."}
|
| 16 |
+
]
|
| 17 |
+
|
| 18 |
+
if "retriever" not in st.session_state:
|
| 19 |
+
st.session_state.retriever = None
|
| 20 |
+
|
| 21 |
+
# --- SIDEBAR (Settings & Upload) ---
|
| 22 |
+
with st.sidebar:
|
| 23 |
+
st.title("😈 Settings")
|
| 24 |
+
st.write("Upload a document to debate its contents, or leave empty to debate using the Web.")
|
| 25 |
+
|
| 26 |
+
# File Uploader
|
| 27 |
+
uploaded_file = st.file_uploader("Upload PDF (Optional)", type=("pdf"))
|
| 28 |
+
|
| 29 |
+
# Logic: Process file ONLY if it's new
|
| 30 |
+
if uploaded_file and st.session_state.retriever is None:
|
| 31 |
+
with st.spinner("Reading and analyzing your document..."):
|
| 32 |
+
# Call the backend function
|
| 33 |
+
retriever = backend.process_uploaded_file(uploaded_file)
|
| 34 |
+
if retriever:
|
| 35 |
+
st.session_state.retriever = retriever
|
| 36 |
+
st.success("Document processed! I will now use this file to challenge you.")
|
| 37 |
+
else:
|
| 38 |
+
st.error("Error processing file.")
|
| 39 |
+
|
| 40 |
+
# Reset Button
|
| 41 |
+
if st.button("Clear Chat"):
|
| 42 |
+
st.session_state.messages = []
|
| 43 |
+
st.rerun()
|
| 44 |
+
|
| 45 |
+
# --- MAIN CHAT INTERFACE ---
|
| 46 |
+
st.title("The Devil's Advocate ⚖️")
|
| 47 |
+
st.markdown("*> A RAG System designed to find the counter-argument.*")
|
| 48 |
+
|
| 49 |
+
# 1. Display Chat History
|
| 50 |
+
for message in st.session_state.messages:
|
| 51 |
+
with st.chat_message(message["role"]):
|
| 52 |
+
st.markdown(message["content"])
|
| 53 |
+
|
| 54 |
+
# 2. Handle User Input
|
| 55 |
+
if prompt := st.chat_input("State your opinion (e.g., 'Remote work destroys culture')"):
|
| 56 |
+
|
| 57 |
+
# A. Display User Message
|
| 58 |
+
st.session_state.messages.append({"role": "user", "content": prompt})
|
| 59 |
+
with st.chat_message("user"):
|
| 60 |
+
st.markdown(prompt)
|
| 61 |
+
|
| 62 |
+
# B. Generate AI Response
|
| 63 |
+
with st.chat_message("assistant"):
|
| 64 |
+
with st.spinner("Searching for counter-evidence..."):
|
| 65 |
+
try:
|
| 66 |
+
# Call the backend logic
|
| 67 |
+
# Pass the retriever (if it exists) or None (to trigger web search)
|
| 68 |
+
response = backend.get_advocate_response(
|
| 69 |
+
prompt,
|
| 70 |
+
pdf_retriever=st.session_state.retriever
|
| 71 |
+
)
|
| 72 |
+
|
| 73 |
+
st.markdown(response)
|
| 74 |
+
|
| 75 |
+
# C. Save Response to History
|
| 76 |
+
st.session_state.messages.append({"role": "assistant", "content": response})
|
| 77 |
+
|
| 78 |
+
except Exception as e:
|
| 79 |
+
st.error(f"An error occurred: {e}")
|
backend.py
ADDED
|
@@ -0,0 +1,117 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from dotenv import load_dotenv
|
| 3 |
+
|
| 4 |
+
# --- CHANGED IMPORTS FOR GROQ & HUGGINGFACE ---
|
| 5 |
+
from langchain_groq import ChatGroq
|
| 6 |
+
from langchain_huggingface import HuggingFaceEmbeddings
|
| 7 |
+
|
| 8 |
+
from langchain_community.vectorstores import FAISS
|
| 9 |
+
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
| 10 |
+
from langchain_community.document_loaders import PyPDFLoader
|
| 11 |
+
from langchain_community.tools.tavily_search import TavilySearchResults
|
| 12 |
+
from langchain_core.prompts import ChatPromptTemplate
|
| 13 |
+
from langchain_core.output_parsers import StrOutputParser
|
| 14 |
+
|
| 15 |
+
# Load environment variables
|
| 16 |
+
load_dotenv()
|
| 17 |
+
|
| 18 |
+
# --- CONFIGURATION (UPDATED) ---
|
| 19 |
+
|
| 20 |
+
# 1. Setup Groq LLM (Uses your gsk_ key automatically if stored as GROQ_API_KEY in .env)
|
| 21 |
+
# We use Llama-3-8b because it is fast and smart.
|
| 22 |
+
llm = ChatGroq(
|
| 23 |
+
model="openai/gpt-oss-20b",
|
| 24 |
+
temperature=0.7
|
| 25 |
+
)
|
| 26 |
+
|
| 27 |
+
# 2. Setup Embeddings (Switched to Free Local Embeddings)
|
| 28 |
+
# This downloads a small model to your computer so you don't need an OpenAI key for this.
|
| 29 |
+
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
|
| 30 |
+
|
| 31 |
+
# Initialize Web Search Tool
|
| 32 |
+
web_search_tool = TavilySearchResults(k=3)
|
| 33 |
+
|
| 34 |
+
def process_uploaded_file(uploaded_file):
|
| 35 |
+
"""
|
| 36 |
+
Process the uploaded PDF using local embeddings.
|
| 37 |
+
"""
|
| 38 |
+
try:
|
| 39 |
+
file_path = f"temp_{uploaded_file.name}"
|
| 40 |
+
with open(file_path, "wb") as f:
|
| 41 |
+
f.write(uploaded_file.getbuffer())
|
| 42 |
+
|
| 43 |
+
loader = PyPDFLoader(file_path)
|
| 44 |
+
pages = loader.load_and_split()
|
| 45 |
+
|
| 46 |
+
text_splitter = RecursiveCharacterTextSplitter(
|
| 47 |
+
chunk_size=1000,
|
| 48 |
+
chunk_overlap=200
|
| 49 |
+
)
|
| 50 |
+
splits = text_splitter.split_documents(pages)
|
| 51 |
+
|
| 52 |
+
# Create Vector Store using the new embeddings
|
| 53 |
+
vectorstore = FAISS.from_documents(documents=splits, embedding=embeddings)
|
| 54 |
+
retriever = vectorstore.as_retriever()
|
| 55 |
+
|
| 56 |
+
os.remove(file_path)
|
| 57 |
+
return retriever
|
| 58 |
+
except Exception as e:
|
| 59 |
+
print(f"Error processing file: {e}")
|
| 60 |
+
return None
|
| 61 |
+
|
| 62 |
+
def generate_counter_query(user_input):
|
| 63 |
+
"""
|
| 64 |
+
Uses Groq to flip the query.
|
| 65 |
+
"""
|
| 66 |
+
flip_prompt = ChatPromptTemplate.from_template(
|
| 67 |
+
"""
|
| 68 |
+
You are a research assistant.
|
| 69 |
+
User Statement: "{user_input}"
|
| 70 |
+
Task: Write a single, concise Google search query to find evidence that DISPROVES or CHALLENGES the user's statement.
|
| 71 |
+
Search Query:
|
| 72 |
+
"""
|
| 73 |
+
)
|
| 74 |
+
|
| 75 |
+
chain = flip_prompt | llm | StrOutputParser()
|
| 76 |
+
return chain.invoke({"user_input": user_input})
|
| 77 |
+
|
| 78 |
+
def get_advocate_response(user_input, pdf_retriever=None):
|
| 79 |
+
context_text = ""
|
| 80 |
+
source_type = ""
|
| 81 |
+
|
| 82 |
+
# --- RETRIEVAL ---
|
| 83 |
+
if pdf_retriever:
|
| 84 |
+
docs = pdf_retriever.invoke(user_input)
|
| 85 |
+
context_text = "\n\n".join([d.page_content for d in docs])
|
| 86 |
+
source_type = "Uploaded Document"
|
| 87 |
+
else:
|
| 88 |
+
print("Switching to Web Search...")
|
| 89 |
+
counter_query = generate_counter_query(user_input)
|
| 90 |
+
print(f"flipped Query: {counter_query}")
|
| 91 |
+
|
| 92 |
+
results = web_search_tool.invoke({"query": counter_query})
|
| 93 |
+
context_text = "\n\n".join([res['content'] for res in results])
|
| 94 |
+
source_type = "Web Search (Verified Sources)"
|
| 95 |
+
|
| 96 |
+
# --- GENERATION ---
|
| 97 |
+
system_template = """
|
| 98 |
+
You are the "Devil's Advocate". Critically analyze the user's statement
|
| 99 |
+
and provide a counter-argument based ONLY on the context.
|
| 100 |
+
|
| 101 |
+
CONTEXT ({source_type}):
|
| 102 |
+
{context}
|
| 103 |
+
|
| 104 |
+
USER STATEMENT:
|
| 105 |
+
{user_input}
|
| 106 |
+
|
| 107 |
+
YOUR COUNTER-ARGUMENT:
|
| 108 |
+
"""
|
| 109 |
+
|
| 110 |
+
prompt = ChatPromptTemplate.from_template(system_template)
|
| 111 |
+
chain = prompt | llm | StrOutputParser()
|
| 112 |
+
|
| 113 |
+
return chain.invoke({
|
| 114 |
+
"source_type": source_type,
|
| 115 |
+
"context": context_text,
|
| 116 |
+
"user_input": user_input
|
| 117 |
+
})
|
requirements.txt
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
streamlit
|
| 2 |
+
langchain
|
| 3 |
+
langchain-groq # CHANGED: For your gsk_ key
|
| 4 |
+
langchain-community
|
| 5 |
+
langchain-huggingface # CHANGED: For free local embeddings
|
| 6 |
+
faiss-cpu
|
| 7 |
+
python-dotenv
|
| 8 |
+
tavily-python
|
| 9 |
+
pypdf
|
| 10 |
+
sentence-transformers # Required for HuggingFace embeddings
|