File size: 4,606 Bytes
42192c3
da0368f
dd5caaa
da0368f
 
 
 
 
 
 
 
 
d5cb15b
da0368f
 
810442d
da0368f
 
810442d
da0368f
810442d
da0368f
810442d
 
 
 
da0368f
 
 
 
 
 
2cffa1d
da0368f
 
 
 
dd5caaa
da0368f
 
 
 
 
 
42192c3
da0368f
 
 
 
 
2cffa1d
da0368f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3eb4b38
da0368f
 
 
dd5caaa
da0368f
dd5caaa
da0368f
 
 
 
 
 
 
 
 
 
dd5caaa
 
da0368f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46bc2ba
da0368f
 
 
dd5caaa
da0368f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
import gradio as gr
from PyPDF2 import PdfReader
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.docstore.document import Document
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

# -----------------------
# 1️⃣ Load PDF & Split
# -----------------------
pdf_path = "chimera.pdf"

reader = PdfReader(pdf_path)
evidences = [page.extract_text() for page in reader.pages if page.extract_text()]

# Split each evidence into chunks
docs = [Document(page_content=text) for text in evidences]
text_splitter = CharacterTextSplitter(chunk_size=800, chunk_overlap=100)
texts = text_splitter.split_documents(docs)

# Embeddings & FAISS
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
db = FAISS.from_documents(texts, embeddings)
retriever = db.as_retriever(search_kwargs={"k": 3})

# -----------------------
# 2️⃣ Local LLM
# -----------------------
llm_model_name = "google/flan-t5-small"
tokenizer = AutoTokenizer.from_pretrained(llm_model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(llm_model_name)

def generate_answer(prompt):
    inputs = tokenizer(prompt, return_tensors="pt")
    outputs = model.generate(**inputs, max_new_tokens=150)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# -----------------------
# 3️⃣ Evidence Navigation & Chat
# -----------------------
def enter_case():
    msg = f"Evidence 1 of {len(evidences)}:\n\n{evidences[0]}"
    return msg, 0, 0, gr.update(interactive=True), gr.update(interactive=False), gr.update(visible=False)

def next_evidence(idx):
    if idx + 1 < len(evidences):
        idx += 1
        return f"Evidence {idx+1} of {len(evidences)}:\n\n{evidences[idx]}", idx, 0, gr.update(interactive=True), gr.update(interactive=False), gr.update(visible=False)
    return "All evidences reviewed. Investigation completed.", idx, 0, gr.update(interactive=False), gr.update(interactive=False), gr.update(visible=False)

def ask_question(message, history, idx, qcount):
    if qcount >= 3:
        return history, qcount, gr.update(interactive=False), gr.update(interactive=True)
    
    relevant_docs = retriever.get_relevant_documents(message)
    context = "\n".join([doc.page_content for doc in relevant_docs])
    prompt = f"Context: {context}\n\nQuestion: {message}\nAnswer:"
    answer = generate_answer(prompt)
    
    history = history or []
    history.append((message, answer))
    qcount += 1
    
    disable_input = gr.update(interactive=(qcount < 3))
    enable_next = gr.update(interactive=(qcount >= 3))
    
    return history, qcount, disable_input, enable_next

# -----------------------
# 4️⃣ Gradio UI
# -----------------------
with gr.Blocks() as demo:
    # Warning Message
    with gr.Column():
        warning_msg = gr.Markdown(
            """
            ⚠ **WARNING — INVESTIGATIVE SIMULATION** ⚠  
            You are about to enter The Chimera Case, a high-stakes investigation into Innovate Future Labs (IFL)
            and Project Chimera.  
            There are 11 pieces of evidence. For each evidence, you can ask **only 3 questions**.  
            Total questions allowed: 33.  
            Treat every claim as unverified until verified by evidence.  
            Are you ready to proceed?  
            """,
        )
        enter_btn = gr.Button("Enter the Case")
    
    # Evidence display
    evidence_box = gr.Textbox(label="Evidence", interactive=False, lines=10, visible=False)
    next_btn = gr.Button("Next Evidence", interactive=False, visible=False)
    
    # Chatbot
    chatbot = gr.Chatbot()
    user_input = gr.Textbox(placeholder="Ask a question about this evidence...", interactive=False)
    submit_btn = gr.Button("Send", interactive=False)
    
    state_idx = gr.State(value=0)
    q_count = gr.State(value=0)
    
    # -----------------------
    # Button Actions
    # -----------------------
    enter_btn.click(
        enter_case, 
        outputs=[evidence_box, state_idx, q_count, user_input, next_btn, enter_btn]
    )
    
    next_btn.click(
        next_evidence, 
        inputs=[state_idx], 
        outputs=[evidence_box, state_idx, q_count, user_input, next_btn, enter_btn]
    )
    
    submit_btn.click(
        ask_question, 
        inputs=[user_input, chatbot, state_idx, q_count], 
        outputs=[chatbot, q_count, user_input, next_btn]
    )

# -----------------------
# 5️⃣ Launch
# -----------------------
if __name__ == "__main__":
    demo.launch()