Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -13,6 +13,7 @@ from transformers import pipeline
|
|
| 13 |
from PyPDF2 import PdfReader
|
| 14 |
from huggingface_hub import login
|
| 15 |
from groq import AsyncGroq, Groq
|
|
|
|
| 16 |
|
| 17 |
# Load environment variables
|
| 18 |
load_dotenv()
|
|
@@ -43,7 +44,7 @@ def summarize_text(text):
|
|
| 43 |
try:
|
| 44 |
sum_client = Groq(api_key=GROQ_API_KEY)
|
| 45 |
messages = [
|
| 46 |
-
{"role": "system", "content": "You are
|
| 47 |
{"role": "user", "content": f"Summarize the paper: {text}"}
|
| 48 |
]
|
| 49 |
|
|
@@ -63,8 +64,8 @@ def summarize_text(text):
|
|
| 63 |
def summarize_pdf(pdf_file_path, max_length):
|
| 64 |
"""Extract text from a PDF and summarize it."""
|
| 65 |
try:
|
| 66 |
-
|
| 67 |
-
text = "\n".join(page.extract_text() or "" for page in
|
| 68 |
|
| 69 |
text_splitter = TokenTextSplitter(chunk_size=8192, chunk_overlap=1000)
|
| 70 |
chunks = text_splitter.split_text(text)
|
|
@@ -119,30 +120,25 @@ async def chat_with_replit(message, history):
|
|
| 119 |
messages = [{"role": "system", "content": "You are an assistant answering user questions."}]
|
| 120 |
|
| 121 |
for chat in history:
|
| 122 |
-
|
| 123 |
-
messages.append({"role": "user", "content":
|
| 124 |
-
messages.append({"role": "assistant", "content":
|
| 125 |
|
| 126 |
messages.append({"role": "user", "content": message})
|
| 127 |
|
| 128 |
-
|
| 129 |
messages=messages,
|
| 130 |
model="llama3-70b-8192",
|
| 131 |
temperature=0,
|
| 132 |
max_tokens=1024,
|
| 133 |
top_p=1,
|
| 134 |
-
stream=
|
| 135 |
)
|
| 136 |
-
|
| 137 |
-
response_content = ""
|
| 138 |
-
async for chunk in stream:
|
| 139 |
-
if chunk.choices[0].delta.content:
|
| 140 |
-
response_content += chunk.choices[0].delta.content
|
| 141 |
-
yield response_content
|
| 142 |
|
| 143 |
except Exception as e:
|
| 144 |
logger.error(f"Chat error: {e}")
|
| 145 |
-
|
| 146 |
|
| 147 |
async def chat_with_replit_pdf(message, history, doi_num):
|
| 148 |
"""Chat with arXiv papers using document retrieval."""
|
|
@@ -180,8 +176,13 @@ async def chat_with_replit_pdf(message, history, doi_num):
|
|
| 180 |
logger.error(f"Error in chat with PDF: {e}")
|
| 181 |
return "Error processing chat with PDF."
|
| 182 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 183 |
# Gradio UI
|
| 184 |
with gr.Blocks() as app:
|
|
|
|
| 185 |
with gr.Tab(label="Local PDF Summarization"):
|
| 186 |
with gr.Row():
|
| 187 |
input_pdf = gr.File(label="Upload PDF file")
|
|
@@ -189,16 +190,46 @@ with gr.Blocks() as app:
|
|
| 189 |
summarize_pdf_btn = gr.Button(value="Summarize PDF")
|
| 190 |
with gr.Row():
|
| 191 |
output_pdf_summary = gr.Markdown(label="Summary", height=1000)
|
| 192 |
-
|
| 193 |
-
summarize_pdf_btn.click(summarize_pdf, inputs=[input_pdf, max_length_slider], outputs=output_pdf_summary)
|
| 194 |
|
|
|
|
| 195 |
with gr.Tab(label="Arxiv Summarization"):
|
| 196 |
with gr.Column():
|
| 197 |
-
arxiv_number = gr.Textbox(label="Enter arXiv number")
|
| 198 |
summarize_btn = gr.Button(value="Summarize arXiv Paper")
|
| 199 |
with gr.Column():
|
| 200 |
output_summary = gr.Markdown(label="Summary", height=1000)
|
|
|
|
| 201 |
|
| 202 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 203 |
|
| 204 |
app.launch()
|
|
|
|
| 13 |
from PyPDF2 import PdfReader
|
| 14 |
from huggingface_hub import login
|
| 15 |
from groq import AsyncGroq, Groq
|
| 16 |
+
import asyncio
|
| 17 |
|
| 18 |
# Load environment variables
|
| 19 |
load_dotenv()
|
|
|
|
| 44 |
try:
|
| 45 |
sum_client = Groq(api_key=GROQ_API_KEY)
|
| 46 |
messages = [
|
| 47 |
+
{"role": "system", "content": "You are an excellent analyst who excels in summarization task. If I give you the whole text, you should summarize it."},
|
| 48 |
{"role": "user", "content": f"Summarize the paper: {text}"}
|
| 49 |
]
|
| 50 |
|
|
|
|
| 64 |
def summarize_pdf(pdf_file_path, max_length):
|
| 65 |
"""Extract text from a PDF and summarize it."""
|
| 66 |
try:
|
| 67 |
+
reader = PdfReader(pdf_file_path)
|
| 68 |
+
text = "\n".join(page.extract_text() or "" for page in reader.pages)
|
| 69 |
|
| 70 |
text_splitter = TokenTextSplitter(chunk_size=8192, chunk_overlap=1000)
|
| 71 |
chunks = text_splitter.split_text(text)
|
|
|
|
| 120 |
messages = [{"role": "system", "content": "You are an assistant answering user questions."}]
|
| 121 |
|
| 122 |
for chat in history:
|
| 123 |
+
user_msg, assistant_msg = chat
|
| 124 |
+
messages.append({"role": "user", "content": user_msg})
|
| 125 |
+
messages.append({"role": "assistant", "content": assistant_msg})
|
| 126 |
|
| 127 |
messages.append({"role": "user", "content": message})
|
| 128 |
|
| 129 |
+
response = await client.chat.completions.create(
|
| 130 |
messages=messages,
|
| 131 |
model="llama3-70b-8192",
|
| 132 |
temperature=0,
|
| 133 |
max_tokens=1024,
|
| 134 |
top_p=1,
|
| 135 |
+
stream=False, # Using non-streaming for simplicity in this integration.
|
| 136 |
)
|
| 137 |
+
return response.choices[0].message.content
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 138 |
|
| 139 |
except Exception as e:
|
| 140 |
logger.error(f"Chat error: {e}")
|
| 141 |
+
return "Error in chat response."
|
| 142 |
|
| 143 |
async def chat_with_replit_pdf(message, history, doi_num):
|
| 144 |
"""Chat with arXiv papers using document retrieval."""
|
|
|
|
| 176 |
logger.error(f"Error in chat with PDF: {e}")
|
| 177 |
return "Error processing chat with PDF."
|
| 178 |
|
| 179 |
+
# Define a synchronous wrapper for the async chat function
|
| 180 |
+
def chat_with_replit_sync(message, history):
|
| 181 |
+
return asyncio.run(chat_with_replit(message, history))
|
| 182 |
+
|
| 183 |
# Gradio UI
|
| 184 |
with gr.Blocks() as app:
|
| 185 |
+
# Tab for Local PDF Summarization
|
| 186 |
with gr.Tab(label="Local PDF Summarization"):
|
| 187 |
with gr.Row():
|
| 188 |
input_pdf = gr.File(label="Upload PDF file")
|
|
|
|
| 190 |
summarize_pdf_btn = gr.Button(value="Summarize PDF")
|
| 191 |
with gr.Row():
|
| 192 |
output_pdf_summary = gr.Markdown(label="Summary", height=1000)
|
| 193 |
+
summarize_pdf_btn.click(summarize_pdf, inputs=[input_pdf, max_length_slider], outputs=output_pdf_summary)
|
|
|
|
| 194 |
|
| 195 |
+
# Tab for Arxiv Summarization
|
| 196 |
with gr.Tab(label="Arxiv Summarization"):
|
| 197 |
with gr.Column():
|
| 198 |
+
arxiv_number = gr.Textbox(label="Enter arXiv number, i.e 2502.02523")
|
| 199 |
summarize_btn = gr.Button(value="Summarize arXiv Paper")
|
| 200 |
with gr.Column():
|
| 201 |
output_summary = gr.Markdown(label="Summary", height=1000)
|
| 202 |
+
summarize_btn.click(summarize_arxiv_pdf, inputs=arxiv_number, outputs=output_summary)
|
| 203 |
|
| 204 |
+
# New Tab for Chat functionality
|
| 205 |
+
with gr.Tab(label="Chat with Assistant"):
|
| 206 |
+
gr.Markdown("### Chat with the Assistant")
|
| 207 |
+
with gr.Row():
|
| 208 |
+
chat_input = gr.Textbox(placeholder="Type your message here...", label="Your Message")
|
| 209 |
+
send_button = gr.Button("Send")
|
| 210 |
+
# A Markdown to display the conversation history (or you could use gr.Chatbot)
|
| 211 |
+
chat_output = gr.Markdown(label="Chat Output", height=300)
|
| 212 |
+
# Maintain chat history as a list of [user, assistant] pairs
|
| 213 |
+
chat_history = gr.State([])
|
| 214 |
+
|
| 215 |
+
# When the send button is clicked, update the chat history and get a response.
|
| 216 |
+
def update_chat(user_message, history):
|
| 217 |
+
# Append the new user message to history with an empty assistant response for now.
|
| 218 |
+
history = history or []
|
| 219 |
+
history.append([user_message, ""])
|
| 220 |
+
return history, history
|
| 221 |
+
|
| 222 |
+
def update_assistant_response(history):
|
| 223 |
+
# Get the last user message and call the chat function
|
| 224 |
+
user_message = history[-1][0]
|
| 225 |
+
response = chat_with_replit_sync(user_message, history[:-1])
|
| 226 |
+
# Update the last entry with the assistant's response
|
| 227 |
+
history[-1][1] = response
|
| 228 |
+
# Format the conversation for display
|
| 229 |
+
formatted = "\n\n".join([f"**User:** {u}\n\n**Assistant:** {a}" for u, a in history])
|
| 230 |
+
return history, formatted
|
| 231 |
+
|
| 232 |
+
send_button.click(update_chat, inputs=[chat_input, chat_history], outputs=[chat_history, chat_output])
|
| 233 |
+
send_button.click(update_assistant_response, inputs=chat_history, outputs=[chat_history, chat_output])
|
| 234 |
|
| 235 |
app.launch()
|