hema05core commited on
Commit
da0368f
·
verified ·
1 Parent(s): f9406f6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +104 -66
app.py CHANGED
@@ -1,87 +1,125 @@
1
- import os
2
  import gradio as gr
3
-
4
- # ✅ New LangChain Hugging Face imports
5
- from langchain_huggingface import HuggingFaceEndpoint
6
  from langchain.text_splitter import CharacterTextSplitter
7
- from langchain_community.embeddings import HuggingFaceEmbeddings
8
- from langchain_community.vectorstores import FAISS
9
- from langchain.chains import ConversationalRetrievalChain
10
- from langchain_community.document_loaders import PyPDFLoader
 
 
 
 
 
11
 
12
- # --- 1️⃣ Load your PDF ---
13
- loader = PyPDFLoader("chimera.pdf")
14
- documents = loader.load()
15
 
16
- # --- 2️⃣ Split into chunks ---
 
17
  text_splitter = CharacterTextSplitter(chunk_size=800, chunk_overlap=100)
18
- texts = text_splitter.split_documents(documents)
19
 
20
- # --- 3️⃣ Create embeddings + FAISS vector store ---
21
  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
22
  db = FAISS.from_documents(texts, embeddings)
23
  retriever = db.as_retriever(search_kwargs={"k": 3})
24
 
25
- # --- 4️⃣ Hugging Face Endpoint setup ---
26
- hf_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
27
- if hf_token is None:
28
- raise ValueError(
29
- "HUGGINGFACEHUB_API_TOKEN not found! Add it in Space Settings → Secrets."
30
- )
31
 
32
- llm = HuggingFaceEndpoint(
33
- endpoint_url="https://api-inference.huggingface.co/models/google/flan-t5-base",
34
- headers={"Authorization": f"Bearer {hf_token}"},
35
- model_kwargs={"temperature": 0}
36
- )
37
 
38
- # --- 5️⃣ Build conversational chain ---
39
- qa = ConversationalRetrievalChain.from_llm(llm, retriever=retriever)
 
 
 
 
40
 
41
- chat_history = []
 
 
 
 
42
 
43
- # --- 6️⃣ Respond function ---
44
- def respond(message, history):
45
- history = history[-6:] # keep last 3 exchanges
46
- result = qa({"question": message, "chat_history": history})
47
- history.append((message, result["answer"]))
48
- return history, history
 
 
 
 
 
 
 
 
 
 
 
49
 
50
- # --- 7️⃣ Gradio UI ---
 
 
51
  with gr.Blocks() as demo:
 
52
  with gr.Column():
53
- warning_text = gr.HTML(
54
- "<div style='background-color:black;color:white;padding:20px;'>⚠ WARNING: Investigative Simulation ⚠<br>Are you ready?</div>"
 
 
 
 
 
 
 
 
55
  )
56
  enter_btn = gr.Button("Enter the Case")
57
- exit_btn = gr.Button("Exit")
58
- chatbot = gr.Chatbot(visible=False)
59
- user_input = gr.Textbox(placeholder="Type here...", visible=False)
60
- submit_btn = gr.Button("Send", visible=False)
61
-
62
- def enter_case():
63
- return (
64
- gr.update(visible=True), # chatbot
65
- gr.update(visible=True), # user_input
66
- gr.update(visible=True), # submit_btn
67
- gr.update(value=""), # hide warning
68
- gr.update(visible=False), # hide enter_btn
69
- gr.update(visible=False) # hide exit_btn
70
- )
71
-
72
- def exit_case():
73
- return (
74
- gr.update(value="Session ended."),
75
- gr.update(visible=False),
76
- gr.update(visible=False),
77
- gr.update(visible=False),
78
- gr.update(visible=False),
79
- gr.update(visible=False)
80
- )
81
-
82
- enter_btn.click(enter_case, inputs=None, outputs=[chatbot, user_input, submit_btn, warning_text, enter_btn, exit_btn])
83
- exit_btn.click(exit_case, inputs=None, outputs=[warning_text, chatbot, user_input, submit_btn, enter_btn, exit_btn])
84
- submit_btn.click(respond, inputs=[user_input, chatbot], outputs=[chatbot, chatbot])
 
 
 
 
85
 
 
 
 
86
  if __name__ == "__main__":
87
- demo.launch(share=True, enable_queue=True)
 
 
1
  import gradio as gr
2
+ from PyPDF2 import PdfReader
 
 
3
  from langchain.text_splitter import CharacterTextSplitter
4
+ from langchain.vectorstores import FAISS
5
+ from langchain.embeddings import HuggingFaceEmbeddings
6
+ from langchain.docstore.document import Document
7
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
8
+
9
+ # -----------------------
10
+ # 1️⃣ Load PDF & Split
11
+ # -----------------------
12
+ pdf_path = "chimera.pdf"
13
 
14
+ reader = PdfReader(pdf_path)
15
+ evidences = [page.extract_text() for page in reader.pages if page.extract_text()]
 
16
 
17
+ # Split each evidence into chunks
18
+ docs = [Document(page_content=text) for text in evidences]
19
  text_splitter = CharacterTextSplitter(chunk_size=800, chunk_overlap=100)
20
+ texts = text_splitter.split_documents(docs)
21
 
22
+ # Embeddings & FAISS
23
  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
24
  db = FAISS.from_documents(texts, embeddings)
25
  retriever = db.as_retriever(search_kwargs={"k": 3})
26
 
27
+ # -----------------------
28
+ # 2️⃣ Local LLM
29
+ # -----------------------
30
+ llm_model_name = "google/flan-t5-small"
31
+ tokenizer = AutoTokenizer.from_pretrained(llm_model_name)
32
+ model = AutoModelForSeq2SeqLM.from_pretrained(llm_model_name)
33
 
34
+ def generate_answer(prompt):
35
+ inputs = tokenizer(prompt, return_tensors="pt")
36
+ outputs = model.generate(**inputs, max_new_tokens=150)
37
+ return tokenizer.decode(outputs[0], skip_special_tokens=True)
 
38
 
39
+ # -----------------------
40
+ # 3️⃣ Evidence Navigation & Chat
41
+ # -----------------------
42
+ def enter_case():
43
+ msg = f"Evidence 1 of {len(evidences)}:\n\n{evidences[0]}"
44
+ return msg, 0, 0, gr.update(interactive=True), gr.update(interactive=False), gr.update(visible=False)
45
 
46
+ def next_evidence(idx):
47
+ if idx + 1 < len(evidences):
48
+ idx += 1
49
+ return f"Evidence {idx+1} of {len(evidences)}:\n\n{evidences[idx]}", idx, 0, gr.update(interactive=True), gr.update(interactive=False), gr.update(visible=False)
50
+ return "All evidences reviewed. Investigation completed.", idx, 0, gr.update(interactive=False), gr.update(interactive=False), gr.update(visible=False)
51
 
52
+ def ask_question(message, history, idx, qcount):
53
+ if qcount >= 3:
54
+ return history, qcount, gr.update(interactive=False), gr.update(interactive=True)
55
+
56
+ relevant_docs = retriever.get_relevant_documents(message)
57
+ context = "\n".join([doc.page_content for doc in relevant_docs])
58
+ prompt = f"Context: {context}\n\nQuestion: {message}\nAnswer:"
59
+ answer = generate_answer(prompt)
60
+
61
+ history = history or []
62
+ history.append((message, answer))
63
+ qcount += 1
64
+
65
+ disable_input = gr.update(interactive=(qcount < 3))
66
+ enable_next = gr.update(interactive=(qcount >= 3))
67
+
68
+ return history, qcount, disable_input, enable_next
69
 
70
+ # -----------------------
71
+ # 4️⃣ Gradio UI
72
+ # -----------------------
73
  with gr.Blocks() as demo:
74
+ # Warning Message
75
  with gr.Column():
76
+ warning_msg = gr.Markdown(
77
+ """
78
+ ⚠ **WARNING — INVESTIGATIVE SIMULATION** ⚠
79
+ You are about to enter The Chimera Case, a high-stakes investigation into Innovate Future Labs (IFL)
80
+ and Project Chimera.
81
+ There are 11 pieces of evidence. For each evidence, you can ask **only 3 questions**.
82
+ Total questions allowed: 33.
83
+ Treat every claim as unverified until verified by evidence.
84
+ Are you ready to proceed?
85
+ """,
86
  )
87
  enter_btn = gr.Button("Enter the Case")
88
+
89
+ # Evidence display
90
+ evidence_box = gr.Textbox(label="Evidence", interactive=False, lines=10, visible=False)
91
+ next_btn = gr.Button("Next Evidence", interactive=False, visible=False)
92
+
93
+ # Chatbot
94
+ chatbot = gr.Chatbot()
95
+ user_input = gr.Textbox(placeholder="Ask a question about this evidence...", interactive=False)
96
+ submit_btn = gr.Button("Send", interactive=False)
97
+
98
+ state_idx = gr.State(value=0)
99
+ q_count = gr.State(value=0)
100
+
101
+ # -----------------------
102
+ # Button Actions
103
+ # -----------------------
104
+ enter_btn.click(
105
+ enter_case,
106
+ outputs=[evidence_box, state_idx, q_count, user_input, next_btn, enter_btn]
107
+ )
108
+
109
+ next_btn.click(
110
+ next_evidence,
111
+ inputs=[state_idx],
112
+ outputs=[evidence_box, state_idx, q_count, user_input, next_btn, enter_btn]
113
+ )
114
+
115
+ submit_btn.click(
116
+ ask_question,
117
+ inputs=[user_input, chatbot, state_idx, q_count],
118
+ outputs=[chatbot, q_count, user_input, next_btn]
119
+ )
120
 
121
+ # -----------------------
122
+ # 5️⃣ Launch
123
+ # -----------------------
124
  if __name__ == "__main__":
125
+ demo.launch()