Spaces:

tdurzynski
/

chat-with-your-data

Sleeping

App Files Files Community

tdurzynski commited on Feb 5, 2025

Commit

6f98b16

verified ·

1 Parent(s): 16613ab

Update app.py

Browse files

Files changed (1) hide show

app.py +50 -19

app.py CHANGED Viewed

@@ -13,6 +13,7 @@ from transformers import pipeline
 from PyPDF2 import PdfReader
 from huggingface_hub import login
 from groq import AsyncGroq, Groq
 # Load environment variables
 load_dotenv()
@@ -43,7 +44,7 @@ def summarize_text(text):
     try:
         sum_client = Groq(api_key=GROQ_API_KEY)
         messages = [
-            {"role": "system", "content": "You are a summarizer. If I give you the whole text, you should summarize it."},
             {"role": "user", "content": f"Summarize the paper: {text}"}
         ]
@@ -63,8 +64,8 @@ def summarize_text(text):
 def summarize_pdf(pdf_file_path, max_length):
     """Extract text from a PDF and summarize it."""
     try:
-        loader = PdfReader(pdf_file_path)
-        text = "\n".join(page.extract_text() or "" for page in loader.pages)
         text_splitter = TokenTextSplitter(chunk_size=8192, chunk_overlap=1000)
         chunks = text_splitter.split_text(text)
@@ -119,30 +120,25 @@ async def chat_with_replit(message, history):
         messages = [{"role": "system", "content": "You are an assistant answering user questions."}]
         for chat in history:
-            user, assistant = chat
-            messages.append({"role": "user", "content": user})
-            messages.append({"role": "assistant", "content": assistant})
         messages.append({"role": "user", "content": message})
-        stream = await client.chat.completions.create(
             messages=messages,
             model="llama3-70b-8192",
             temperature=0,
             max_tokens=1024,
             top_p=1,
-            stream=True,
         )
-        response_content = ""
-        async for chunk in stream:
-            if chunk.choices[0].delta.content:
-                response_content += chunk.choices[0].delta.content
-            yield response_content
     except Exception as e:
         logger.error(f"Chat error: {e}")
-        yield "Error in chat response."
 async def chat_with_replit_pdf(message, history, doi_num):
     """Chat with arXiv papers using document retrieval."""
@@ -180,8 +176,13 @@ async def chat_with_replit_pdf(message, history, doi_num):
         logger.error(f"Error in chat with PDF: {e}")
         return "Error processing chat with PDF."
 # Gradio UI
 with gr.Blocks() as app:
     with gr.Tab(label="Local PDF Summarization"):
         with gr.Row():
             input_pdf = gr.File(label="Upload PDF file")
@@ -189,16 +190,46 @@ with gr.Blocks() as app:
             summarize_pdf_btn = gr.Button(value="Summarize PDF")
         with gr.Row():
             output_pdf_summary = gr.Markdown(label="Summary", height=1000)
-    summarize_pdf_btn.click(summarize_pdf, inputs=[input_pdf, max_length_slider], outputs=output_pdf_summary)
     with gr.Tab(label="Arxiv Summarization"):
         with gr.Column():
-            arxiv_number = gr.Textbox(label="Enter arXiv number")
             summarize_btn = gr.Button(value="Summarize arXiv Paper")
         with gr.Column():
             output_summary = gr.Markdown(label="Summary", height=1000)
-    summarize_btn.click(summarize_arxiv_pdf, inputs=arxiv_number, outputs=output_summary)
 app.launch()

 from PyPDF2 import PdfReader
 from huggingface_hub import login
 from groq import AsyncGroq, Groq
+import asyncio
 # Load environment variables
 load_dotenv()
     try:
         sum_client = Groq(api_key=GROQ_API_KEY)
         messages = [
+            {"role": "system", "content": "You are an excellent analyst who excels in summarization task. If I give you the whole text, you should summarize it."},
             {"role": "user", "content": f"Summarize the paper: {text}"}
         ]
 def summarize_pdf(pdf_file_path, max_length):
     """Extract text from a PDF and summarize it."""
     try:
+        reader = PdfReader(pdf_file_path)
+        text = "\n".join(page.extract_text() or "" for page in reader.pages)
         text_splitter = TokenTextSplitter(chunk_size=8192, chunk_overlap=1000)
         chunks = text_splitter.split_text(text)
         messages = [{"role": "system", "content": "You are an assistant answering user questions."}]
         for chat in history:
+            user_msg, assistant_msg = chat
+            messages.append({"role": "user", "content": user_msg})
+            messages.append({"role": "assistant", "content": assistant_msg})
         messages.append({"role": "user", "content": message})
+        response = await client.chat.completions.create(
             messages=messages,
             model="llama3-70b-8192",
             temperature=0,
             max_tokens=1024,
             top_p=1,
+            stream=False,  # Using non-streaming for simplicity in this integration.
         )
+        return response.choices[0].message.content
     except Exception as e:
         logger.error(f"Chat error: {e}")
+        return "Error in chat response."
 async def chat_with_replit_pdf(message, history, doi_num):
     """Chat with arXiv papers using document retrieval."""
         logger.error(f"Error in chat with PDF: {e}")
         return "Error processing chat with PDF."
+# Define a synchronous wrapper for the async chat function
+def chat_with_replit_sync(message, history):
+    return asyncio.run(chat_with_replit(message, history))
 # Gradio UI
 with gr.Blocks() as app:
+    # Tab for Local PDF Summarization
     with gr.Tab(label="Local PDF Summarization"):
         with gr.Row():
             input_pdf = gr.File(label="Upload PDF file")
             summarize_pdf_btn = gr.Button(value="Summarize PDF")
         with gr.Row():
             output_pdf_summary = gr.Markdown(label="Summary", height=1000)
+        summarize_pdf_btn.click(summarize_pdf, inputs=[input_pdf, max_length_slider], outputs=output_pdf_summary)
+    # Tab for Arxiv Summarization
     with gr.Tab(label="Arxiv Summarization"):
         with gr.Column():
+            arxiv_number = gr.Textbox(label="Enter arXiv number, i.e 2502.02523")
             summarize_btn = gr.Button(value="Summarize arXiv Paper")
         with gr.Column():
             output_summary = gr.Markdown(label="Summary", height=1000)
+        summarize_btn.click(summarize_arxiv_pdf, inputs=arxiv_number, outputs=output_summary)
+    # New Tab for Chat functionality
+    with gr.Tab(label="Chat with Assistant"):
+        gr.Markdown("### Chat with the Assistant")
+        with gr.Row():
+            chat_input = gr.Textbox(placeholder="Type your message here...", label="Your Message")
+            send_button = gr.Button("Send")
+        # A Markdown to display the conversation history (or you could use gr.Chatbot)
+        chat_output = gr.Markdown(label="Chat Output", height=300)
+        # Maintain chat history as a list of [user, assistant] pairs
+        chat_history = gr.State([])
+        # When the send button is clicked, update the chat history and get a response.
+        def update_chat(user_message, history):
+            # Append the new user message to history with an empty assistant response for now.
+            history = history or []
+            history.append([user_message, ""])
+            return history, history
+        def update_assistant_response(history):
+            # Get the last user message and call the chat function
+            user_message = history[-1][0]
+            response = chat_with_replit_sync(user_message, history[:-1])
+            # Update the last entry with the assistant's response
+            history[-1][1] = response
+            # Format the conversation for display
+            formatted = "\n\n".join([f"**User:** {u}\n\n**Assistant:** {a}" for u, a in history])
+            return history, formatted
+        send_button.click(update_chat, inputs=[chat_input, chat_history], outputs=[chat_history, chat_output])
+        send_button.click(update_assistant_response, inputs=chat_history, outputs=[chat_history, chat_output])
 app.launch()