Spaces:
Sleeping
Sleeping
File size: 6,357 Bytes
4543e5a 98d82d2 4543e5a 0b436fd 4543e5a 98d82d2 4543e5a 98d82d2 4543e5a 98d82d2 4543e5a 98d82d2 4543e5a 98d82d2 0b436fd 98d82d2 0cf7f07 98d82d2 b9af798 98d82d2 4543e5a 98d82d2 b9af798 b66f579 98d82d2 4543e5a 98d82d2 0b436fd 98d82d2 b66f579 98d82d2 b66f579 98d82d2 b66f579 98d82d2 b66f579 98d82d2 4543e5a 98d82d2 b66f579 98d82d2 0b436fd 0508a5a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 | import os
import time
import shutil
import uuid
import gradio as gr
import requests
from PyPDF2 import PdfReader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings
from threading import Thread
from dotenv import load_dotenv
load_dotenv()
# === CONFIG ===
STORAGE_DIR = "storage"
CLEANUP_INTERVAL = 600 # 10 min
SESSION_TTL = 1000 # 30 min
OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
OPENROUTER_MODEL = "z-ai/glm-4.5-air:free"
if not os.path.exists(STORAGE_DIR):
os.makedirs(STORAGE_DIR)
# === CLEANUP THREAD ===
def cleanup_old_sessions():
while True:
now = time.time()
for folder in os.listdir(STORAGE_DIR):
path = os.path.join(STORAGE_DIR, folder)
if os.path.isdir(path) and now - os.path.getmtime(path) > SESSION_TTL:
shutil.rmtree(path)
time.sleep(CLEANUP_INTERVAL)
Thread(target=cleanup_old_sessions, daemon=True).start()
# === PDF PROCESSING ===
def process_pdf(pdf_file):
if pdf_file is None:
return "No file uploaded.", "", []
session_id = str(uuid.uuid4())
reader = PdfReader(pdf_file.name)
text = "".join([page.extract_text() for page in reader.pages if page.extract_text()])
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = splitter.split_text(text)
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
session_path = os.path.join(STORAGE_DIR, session_id)
os.makedirs(session_path, exist_ok=True)
db = FAISS.from_texts(chunks, embeddings)
db.save_local(session_path)
chat_history = [("System", "Paper uploaded and processed. You can now ask questions.")]
return f"Paper uploaded successfully. Session ID: {session_id}", session_id, chat_history
# === QUERY FUNCTION ===
def query_paper(session_id, user_message, chat_history):
if not session_id or not os.path.exists(os.path.join(STORAGE_DIR, session_id)):
chat_history = chat_history or []
chat_history.append(("System", "Session expired or not found. Upload the paper again."))
return chat_history, ""
if not user_message.strip():
return chat_history, ""
session_path = os.path.join(STORAGE_DIR, session_id)
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
db = FAISS.load_local(session_path, embeddings, allow_dangerous_deserialization=True)
retriever = db.as_retriever(search_kwargs={"k": 3})
# Use invoke() instead of deprecated method
docs = retriever.invoke(user_message)
context = "\n\n".join([d.page_content for d in docs])
prompt = f"""
You are an AI assistant. Explain the following research paper content in simple terms and answer the question.
Use your own knowledge also and make it more technical but simpler explanation should be like professor with
high knowledge but teaches in awesome way with more technical stuff but easier.
Context from paper:
{context}
Question: {user_message}
Answer:
"""
headers = {
"Authorization": f"Bearer {OPENROUTER_API_KEY}",
"Content-Type": "application/json"
}
payload = {
"model": OPENROUTER_MODEL,
"messages": [
{
"role": "system",
"content": "You are a helpful research paper explainer. Explain all concepts clearly with technical aspects but in an easy way."
},
{"role": "user", "content": prompt}
]
}
try:
response = requests.post("https://openrouter.ai/api/v1/chat/completions",
headers=headers, json=payload)
if response.status_code == 200:
answer = response.json()["choices"][0]["message"]["content"].strip()
else:
answer = f"Error: {response.status_code} - {response.text}"
except Exception as e:
answer = f"Error: {str(e)}"
# Update chat history (tuple format)
chat_history = chat_history or []
chat_history.append((user_message, answer))
return chat_history, ""
# === GRADIO UI ===
with gr.Blocks() as demo:
gr.Markdown("## 📄 Research Paper Chatbot (RAG + OpenRouter)")
with gr.Row():
pdf_input = gr.File(label="Upload Research Paper (PDF)", file_types=[".pdf"])
session_box = gr.Textbox(label="Session ID", interactive=False)
chatbot = gr.Chatbot(label="Chat about your paper", height=400)
user_message = gr.Textbox(label="Ask a question", placeholder="What is this paper about?")
with gr.Row():
upload_btn = gr.Button("Upload Paper", variant="primary")
ask_btn = gr.Button("Send Question")
clear_btn = gr.Button("Clear Chat")
# Store chat history and session
state_chat = gr.State([])
state_session = gr.State("")
# Upload button functionality
def handle_upload(pdf_file):
status, session_id, chat_history = process_pdf(pdf_file)
return status, session_id, chat_history
upload_btn.click(
fn=handle_upload,
inputs=[pdf_input],
outputs=[session_box, state_session, chatbot]
)
# Ask button functionality
def handle_question(session_id, message, chat_history):
updated_chat, _ = query_paper(session_id, message, chat_history)
return updated_chat, ""
ask_btn.click(
fn=handle_question,
inputs=[state_session, user_message, chatbot],
outputs=[chatbot, user_message]
).then(
lambda chat: chat,
inputs=[chatbot],
outputs=[state_chat]
)
# Submit on enter
user_message.submit(
fn=handle_question,
inputs=[state_session, user_message, chatbot],
outputs=[chatbot, user_message]
).then(
lambda chat: chat,
inputs=[chatbot],
outputs=[state_chat]
)
# Clear chat
def clear_chat():
return [], []
clear_btn.click(
fn=clear_chat,
outputs=[chatbot, state_chat]
)
# Sync chat state with chatbot
state_chat.change(
lambda chat: chat,
inputs=[state_chat],
outputs=[chatbot]
)
demo.launch(debug=True, server_name="0.0.0.0",server_port=7860) |