Spaces:
Sleeping
Sleeping
changed to old one and enhanced a bit
Browse files
app.py
CHANGED
|
@@ -10,208 +10,183 @@ from langchain_community.vectorstores import FAISS
|
|
| 10 |
from langchain_huggingface import HuggingFaceEmbeddings
|
| 11 |
from threading import Thread
|
| 12 |
from dotenv import load_dotenv
|
| 13 |
-
import json
|
| 14 |
|
| 15 |
load_dotenv()
|
| 16 |
|
| 17 |
-
|
|
|
|
| 18 |
STORAGE_DIR = "storage"
|
| 19 |
-
CLEANUP_INTERVAL = 600
|
| 20 |
-
SESSION_TTL = 1000
|
| 21 |
OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
|
| 22 |
OPENROUTER_MODEL = "z-ai/glm-4.5-air:free"
|
| 23 |
|
| 24 |
if not os.path.exists(STORAGE_DIR):
|
| 25 |
-
|
|
|
|
|
|
|
| 26 |
|
| 27 |
-
# === CLEANUP THREAD ===
|
| 28 |
def cleanup_old_sessions():
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
|
| 37 |
Thread(target=cleanup_old_sessions, daemon=True).start()
|
| 38 |
|
| 39 |
-
|
|
|
|
| 40 |
def process_pdf(pdf_file):
|
| 41 |
-
|
| 42 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
|
| 44 |
-
|
| 45 |
-
|
|
|
|
| 46 |
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
title = metadata.get("/Title", "Unknown Title")
|
| 50 |
-
author = metadata.get("/Author", "Unknown Author")
|
| 51 |
|
| 52 |
-
|
|
|
|
| 53 |
|
| 54 |
-
|
| 55 |
-
chunks = splitter.split_text(text)
|
| 56 |
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
|
|
|
|
|
|
| 60 |
|
| 61 |
-
|
| 62 |
-
|
| 63 |
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
"session_id": session_id,
|
| 69 |
-
"created_at": time.ctime()
|
| 70 |
-
}
|
| 71 |
-
with open(os.path.join(session_path, "metadata.json"), "w") as f:
|
| 72 |
-
json.dump(meta_data, f)
|
| 73 |
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
return f"Paper uploaded successfully. Session ID: {session_id}", session_id, chat_history
|
| 78 |
|
| 79 |
-
|
| 80 |
-
def query_paper(session_id, user_message, chat_history):
|
| 81 |
-
if not session_id or not os.path.exists(os.path.join(STORAGE_DIR, session_id)):
|
| 82 |
-
chat_history = chat_history or []
|
| 83 |
-
chat_history.append({"role": "system", "content": "Session expired or not found. Upload the paper again."})
|
| 84 |
-
return chat_history, ""
|
| 85 |
-
|
| 86 |
-
if not user_message.strip():
|
| 87 |
-
return chat_history, ""
|
| 88 |
-
|
| 89 |
-
session_path = os.path.join(STORAGE_DIR, session_id)
|
| 90 |
-
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
|
| 91 |
-
db = FAISS.load_local(session_path, embeddings, allow_dangerous_deserialization=True)
|
| 92 |
-
retriever = db.as_retriever(search_kwargs={"k": 3})
|
| 93 |
-
|
| 94 |
-
metadata_path = os.path.join(session_path, "metadata.json")
|
| 95 |
-
if os.path.exists(metadata_path):
|
| 96 |
-
with open(metadata_path, "r") as f:
|
| 97 |
-
metadata = json.load(f)
|
| 98 |
-
else:
|
| 99 |
-
metadata = {"title": "Unknown", "author": "Unknown", "pages": "Unknown"}
|
| 100 |
-
|
| 101 |
-
lower_q = user_message.lower()
|
| 102 |
-
if "title" in lower_q or "name of this paper" in lower_q:
|
| 103 |
-
answer = f"The title of this paper is: **{metadata['title']}**."
|
| 104 |
-
elif "author" in lower_q or "who wrote" in lower_q:
|
| 105 |
-
answer = f"The author of this paper is: **{metadata['author']}**."
|
| 106 |
-
elif "pages" in lower_q or "how many pages" in lower_q:
|
| 107 |
-
answer = f"This paper has **{metadata['pages']} pages**."
|
| 108 |
-
else:
|
| 109 |
-
docs = retriever.invoke(user_message)
|
| 110 |
-
context = "\n\n".join([d.page_content for d in docs])
|
| 111 |
-
|
| 112 |
-
prompt = f"""
|
| 113 |
-
You are an AI research assistant. Use the paper content and metadata to answer clearly.
|
| 114 |
-
|
| 115 |
-
Paper Metadata:
|
| 116 |
-
- Title: {metadata['title']}
|
| 117 |
-
- Author: {metadata['author']}
|
| 118 |
-
- Pages: {metadata['pages']}
|
| 119 |
|
|
|
|
| 120 |
Context from paper:
|
| 121 |
{context}
|
| 122 |
|
| 123 |
Question: {user_message}
|
| 124 |
Answer:
|
| 125 |
"""
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 151 |
with gr.Blocks() as demo:
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
from langchain_huggingface import HuggingFaceEmbeddings
|
| 11 |
from threading import Thread
|
| 12 |
from dotenv import load_dotenv
|
|
|
|
| 13 |
|
| 14 |
load_dotenv()
|
| 15 |
|
| 16 |
+
=== CONFIG ===
|
| 17 |
+
|
| 18 |
STORAGE_DIR = "storage"
|
| 19 |
+
CLEANUP_INTERVAL = 600 # 10 min
|
| 20 |
+
SESSION_TTL = 1000 # 30 min
|
| 21 |
OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
|
| 22 |
OPENROUTER_MODEL = "z-ai/glm-4.5-air:free"
|
| 23 |
|
| 24 |
if not os.path.exists(STORAGE_DIR):
|
| 25 |
+
os.makedirs(STORAGE_DIR)
|
| 26 |
+
|
| 27 |
+
=== CLEANUP THREAD ===
|
| 28 |
|
|
|
|
| 29 |
def cleanup_old_sessions():
|
| 30 |
+
while True:
|
| 31 |
+
now = time.time()
|
| 32 |
+
for folder in os.listdir(STORAGE_DIR):
|
| 33 |
+
path = os.path.join(STORAGE_DIR, folder)
|
| 34 |
+
if os.path.isdir(path) and now - os.path.getmtime(path) > SESSION_TTL:
|
| 35 |
+
shutil.rmtree(path)
|
| 36 |
+
time.sleep(CLEANUP_INTERVAL)
|
| 37 |
|
| 38 |
Thread(target=cleanup_old_sessions, daemon=True).start()
|
| 39 |
|
| 40 |
+
=== PDF PROCESSING ===
|
| 41 |
+
|
| 42 |
def process_pdf(pdf_file):
|
| 43 |
+
if pdf_file is None:
|
| 44 |
+
return "No file uploaded.", "", []
|
| 45 |
+
session_id = str(uuid.uuid4())
|
| 46 |
+
reader = PdfReader(pdf_file.name)
|
| 47 |
+
text = "".join([page.extract_text() for page in reader.pages if page.extract_text()])
|
| 48 |
+
|
| 49 |
+
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
|
| 50 |
+
chunks = splitter.split_text(text)
|
| 51 |
|
| 52 |
+
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
|
| 53 |
+
session_path = os.path.join(STORAGE_DIR, session_id)
|
| 54 |
+
os.makedirs(session_path, exist_ok=True)
|
| 55 |
|
| 56 |
+
db = FAISS.from_texts(chunks, embeddings)
|
| 57 |
+
db.save_local(session_path)
|
|
|
|
|
|
|
| 58 |
|
| 59 |
+
chat_history = [("System", "Paper uploaded and processed. You can now ask questions.")]
|
| 60 |
+
return f"Paper uploaded successfully. Session ID: {session_id}", session_id, chat_history
|
| 61 |
|
| 62 |
+
=== QUERY FUNCTION ===
|
|
|
|
| 63 |
|
| 64 |
+
def query_paper(session_id, user_message, chat_history):
|
| 65 |
+
if not session_id or not os.path.exists(os.path.join(STORAGE_DIR, session_id)):
|
| 66 |
+
chat_history = chat_history or []
|
| 67 |
+
chat_history.append(("System", "Session expired or not found. Upload the paper again."))
|
| 68 |
+
return chat_history, ""
|
| 69 |
|
| 70 |
+
if not user_message.strip():
|
| 71 |
+
return chat_history, ""
|
| 72 |
|
| 73 |
+
session_path = os.path.join(STORAGE_DIR, session_id)
|
| 74 |
+
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
|
| 75 |
+
db = FAISS.load_local(session_path, embeddings, allow_dangerous_deserialization=True)
|
| 76 |
+
retriever = db.as_retriever(search_kwargs={"k": 3})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
|
| 78 |
+
# Use invoke() method
|
| 79 |
+
docs = retriever.invoke(user_message)
|
| 80 |
+
context = "\n\n".join([d.page_content for d in docs])
|
|
|
|
| 81 |
|
| 82 |
+
prompt = f"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 83 |
|
| 84 |
+
You are an AI assistant. Explain the following research paper content in simple terms and answer the question. Use your own knowledge also and make it more technical but simpler explanation should be like professor with high knowledge but teaches in awesome way with more technical stuff but easier
|
| 85 |
Context from paper:
|
| 86 |
{context}
|
| 87 |
|
| 88 |
Question: {user_message}
|
| 89 |
Answer:
|
| 90 |
"""
|
| 91 |
+
|
| 92 |
+
headers = {"Authorization": f"Bearer {OPENROUTER_API_KEY}", "Content-Type": "application/json"}
|
| 93 |
+
payload = {
|
| 94 |
+
"model": OPENROUTER_MODEL,
|
| 95 |
+
"messages": [
|
| 96 |
+
{"role": "system", "content": "You are a helpful research paper explainer.Explain all concepts clearly with technical aspects but in a easy way that user can understand easily and gains more knowledge don't be greedy and use more tokens if question is more or it's about the research paper"},
|
| 97 |
+
{"role": "user", "content": prompt}
|
| 98 |
+
]
|
| 99 |
+
}
|
| 100 |
+
|
| 101 |
+
try:
|
| 102 |
+
response = requests.post("https://openrouter.ai/api/v1/chat/completions",
|
| 103 |
+
headers=headers, json=payload)
|
| 104 |
+
|
| 105 |
+
if response.status_code == 200:
|
| 106 |
+
answer = response.json()["choices"][0]["message"]["content"].strip()
|
| 107 |
+
else:
|
| 108 |
+
answer = f"Error: {response.status_code} - {response.text}"
|
| 109 |
+
except Exception as e:
|
| 110 |
+
answer = f"Error: {str(e)}"
|
| 111 |
+
|
| 112 |
+
# Update chat history
|
| 113 |
+
chat_history = chat_history or []
|
| 114 |
+
chat_history.append((user_message, answer))
|
| 115 |
+
|
| 116 |
+
return chat_history, ""
|
| 117 |
+
|
| 118 |
+
=== GRADIO UI ===
|
| 119 |
+
|
| 120 |
with gr.Blocks() as demo:
|
| 121 |
+
gr.Markdown("## 📄 Research Paper Chatbot (RAG + OpenRouter)")
|
| 122 |
+
|
| 123 |
+
with gr.Row():
|
| 124 |
+
pdf_input = gr.File(label="Upload Research Paper (PDF)", file_types=[".pdf"])
|
| 125 |
+
session_box = gr.Textbox(label="Session ID", interactive=False)
|
| 126 |
+
|
| 127 |
+
chatbot = gr.Chatbot(label="Chat about your paper", height=400)
|
| 128 |
+
user_message = gr.Textbox(label="Ask a question", placeholder="What is this paper about?")
|
| 129 |
+
|
| 130 |
+
with gr.Row():
|
| 131 |
+
upload_btn = gr.Button("Upload Paper", variant="primary")
|
| 132 |
+
ask_btn = gr.Button("Send Question")
|
| 133 |
+
clear_btn = gr.Button("Clear Chat")
|
| 134 |
+
|
| 135 |
+
# Store chat history and session
|
| 136 |
+
state_chat = gr.State([])
|
| 137 |
+
state_session = gr.State("")
|
| 138 |
+
|
| 139 |
+
# Upload button functionality
|
| 140 |
+
def handle_upload(pdf_file):
|
| 141 |
+
status, session_id, chat_history = process_pdf(pdf_file)
|
| 142 |
+
return status, session_id, chat_history
|
| 143 |
+
|
| 144 |
+
upload_btn.click(
|
| 145 |
+
fn=handle_upload,
|
| 146 |
+
inputs=[pdf_input],
|
| 147 |
+
outputs=[session_box, state_session, state_chat]
|
| 148 |
+
)
|
| 149 |
+
|
| 150 |
+
# Ask button functionality
|
| 151 |
+
def handle_question(session_id, message, chat_history):
|
| 152 |
+
updated_chat, _ = query_paper(session_id, message, chat_history)
|
| 153 |
+
return updated_chat, ""
|
| 154 |
+
|
| 155 |
+
ask_btn.click(
|
| 156 |
+
fn=handle_question,
|
| 157 |
+
inputs=[state_session, user_message, state_chat],
|
| 158 |
+
outputs=[chatbot, user_message]
|
| 159 |
+
).then(
|
| 160 |
+
lambda chat: chat,
|
| 161 |
+
inputs=[chatbot],
|
| 162 |
+
outputs=[state_chat]
|
| 163 |
+
)
|
| 164 |
+
|
| 165 |
+
# Submit on enter
|
| 166 |
+
user_message.submit(
|
| 167 |
+
fn=handle_question,
|
| 168 |
+
inputs=[state_session, user_message, state_chat],
|
| 169 |
+
outputs=[chatbot, user_message]
|
| 170 |
+
).then(
|
| 171 |
+
lambda chat: chat,
|
| 172 |
+
inputs=[chatbot],
|
| 173 |
+
outputs=[state_chat]
|
| 174 |
+
)
|
| 175 |
+
|
| 176 |
+
# Clear chat
|
| 177 |
+
def clear_chat():
|
| 178 |
+
return [], []
|
| 179 |
+
|
| 180 |
+
clear_btn.click(
|
| 181 |
+
fn=clear_chat,
|
| 182 |
+
outputs=[chatbot, state_chat]
|
| 183 |
+
)
|
| 184 |
+
|
| 185 |
+
# Update chatbot display when chat history changes
|
| 186 |
+
state_chat.change(
|
| 187 |
+
lambda chat: chat,
|
| 188 |
+
inputs=[state_chat],
|
| 189 |
+
outputs=[chatbot]
|
| 190 |
+
)
|
| 191 |
+
|
| 192 |
+
demo.launch(debug=True)
|