import gradio as gr from PyPDF2 import PdfReader from langchain.text_splitter import CharacterTextSplitter from langchain.vectorstores import FAISS from langchain.embeddings import HuggingFaceEmbeddings from langchain.docstore.document import Document from transformers import AutoTokenizer, AutoModelForSeq2SeqLM # ----------------------- # 1️⃣ Load PDF & Split # ----------------------- pdf_path = "chimera.pdf" reader = PdfReader(pdf_path) evidences = [page.extract_text() for page in reader.pages if page.extract_text()] # Split each evidence into chunks docs = [Document(page_content=text) for text in evidences] text_splitter = CharacterTextSplitter(chunk_size=800, chunk_overlap=100) texts = text_splitter.split_documents(docs) # Embeddings & FAISS embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") db = FAISS.from_documents(texts, embeddings) retriever = db.as_retriever(search_kwargs={"k": 3}) # ----------------------- # 2️⃣ Local LLM # ----------------------- llm_model_name = "google/flan-t5-small" tokenizer = AutoTokenizer.from_pretrained(llm_model_name) model = AutoModelForSeq2SeqLM.from_pretrained(llm_model_name) def generate_answer(prompt): inputs = tokenizer(prompt, return_tensors="pt") outputs = model.generate(**inputs, max_new_tokens=150) return tokenizer.decode(outputs[0], skip_special_tokens=True) # ----------------------- # 3️⃣ Evidence Navigation & Chat # ----------------------- def enter_case(): msg = f"Evidence 1 of {len(evidences)}:\n\n{evidences[0]}" return msg, 0, 0, gr.update(interactive=True), gr.update(interactive=False), gr.update(visible=False) def next_evidence(idx): if idx + 1 < len(evidences): idx += 1 return f"Evidence {idx+1} of {len(evidences)}:\n\n{evidences[idx]}", idx, 0, gr.update(interactive=True), gr.update(interactive=False), gr.update(visible=False) return "All evidences reviewed. Investigation completed.", idx, 0, gr.update(interactive=False), gr.update(interactive=False), gr.update(visible=False) def ask_question(message, history, idx, qcount): if qcount >= 3: return history, qcount, gr.update(interactive=False), gr.update(interactive=True) relevant_docs = retriever.get_relevant_documents(message) context = "\n".join([doc.page_content for doc in relevant_docs]) prompt = f"Context: {context}\n\nQuestion: {message}\nAnswer:" answer = generate_answer(prompt) history = history or [] history.append((message, answer)) qcount += 1 disable_input = gr.update(interactive=(qcount < 3)) enable_next = gr.update(interactive=(qcount >= 3)) return history, qcount, disable_input, enable_next # ----------------------- # 4️⃣ Gradio UI # ----------------------- with gr.Blocks() as demo: # Warning Message with gr.Column(): warning_msg = gr.Markdown( """ ⚠ **WARNING — INVESTIGATIVE SIMULATION** ⚠ You are about to enter The Chimera Case, a high-stakes investigation into Innovate Future Labs (IFL) and Project Chimera. There are 11 pieces of evidence. For each evidence, you can ask **only 3 questions**. Total questions allowed: 33. Treat every claim as unverified until verified by evidence. Are you ready to proceed? """, ) enter_btn = gr.Button("Enter the Case") # Evidence display evidence_box = gr.Textbox(label="Evidence", interactive=False, lines=10, visible=False) next_btn = gr.Button("Next Evidence", interactive=False, visible=False) # Chatbot chatbot = gr.Chatbot() user_input = gr.Textbox(placeholder="Ask a question about this evidence...", interactive=False) submit_btn = gr.Button("Send", interactive=False) state_idx = gr.State(value=0) q_count = gr.State(value=0) # ----------------------- # Button Actions # ----------------------- enter_btn.click( enter_case, outputs=[evidence_box, state_idx, q_count, user_input, next_btn, enter_btn] ) next_btn.click( next_evidence, inputs=[state_idx], outputs=[evidence_box, state_idx, q_count, user_input, next_btn, enter_btn] ) submit_btn.click( ask_question, inputs=[user_input, chatbot, state_idx, q_count], outputs=[chatbot, q_count, user_input, next_btn] ) # ----------------------- # 5️⃣ Launch # ----------------------- if __name__ == "__main__": demo.launch()