Spaces:

admin08077
/

aitr

Sleeping

App Files Files Community

admin08077 commited on Jan 14, 2025

Commit

630bdac

verified ·

1 Parent(s): 5173b34

Update app.py

Browse files

Files changed (1) hide show

app.py +117 -99

app.py CHANGED Viewed

@@ -1,66 +1,75 @@
 import gradio as gr
 from huggingface_hub import InferenceClient
 import nltk
 import PyPDF2
-# Download the necessary NLTK data (using the correct resource "punkt")
 nltk.download("punkt", quiet=True)
-# Initialize the Hugging Face Inference Client
-client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
-# Function to split text into manageable chunks
-def chunk_text(text, max_chunk_size=1500):
-    from nltk.tokenize import sent_tokenize
-    sentences = sent_tokenize(text)
-    chunks = []
-    current_chunk = ""
-    current_tokens = 0
-    for sentence in sentences:
-        sentence_tokens = len(sentence.split())
-        if current_tokens + sentence_tokens <= max_chunk_size:
-            current_chunk += " " + sentence
-            current_tokens += sentence_tokens
-        else:
-            if current_chunk:
-                chunks.append(current_chunk.strip())
-            current_chunk = sentence
-            current_tokens = sentence_tokens
-    if current_chunk:
-        chunks.append(current_chunk.strip())
-    return chunks
-# Function to provide responses for each text chunk
-def respond_chunked(message, history, system_message, max_tokens, temperature, top_p, file_content):
-    if not file_content.strip():
-        return "No file content available to provide context."
-    chunks = chunk_text(file_content, max_chunk_size=1500)
-    combined_response = ""
-    for chunk in chunks:
-        chunked_system_message = f"{system_message}\n\nFile Content Chunk:\n{chunk}"
-        messages = [{"role": "system", "content": chunked_system_message}] + history
-        messages.append({"role": "user", "content": message})
-        try:
-            completion = client.chat_completion(
-                messages=messages,
-                max_tokens=max_tokens,
-                temperature=temperature,
-                top_p=top_p,
-            )
-            combined_response += completion.choices[0].message["content"] + "\n"
-        except Exception as e:
-            combined_response += f"Error processing chunk: {e}\n"
-    return combined_response.strip()
-# Function to parse the uploaded file based on its extension
 def parse_file(file_obj):
     file_extension = file_obj.name.split('.')[-1].lower()
     if file_extension == "pdf":
         try:
             reader = PyPDF2.PdfReader(file_obj)
-            return "\n".join(page.extract_text() or "" for page in reader.pages)
         except Exception as e:
             return f"Error reading PDF: {e}"
     else:
@@ -69,67 +78,76 @@ def parse_file(file_obj):
         except Exception as e:
             return f"Error reading file: {e}"
-# Define the Gradio app interface
-with gr.Blocks() as demo:
-    gr.Markdown("# **Chat with File Context (Chunking for Large Files)**")
-    gr.Markdown("Upload large files, and chat with AI using context derived from those files.")
-    # States to store file content and chat history
     file_content_state = gr.State("")
     chat_history_state = gr.State([])
-    # File upload component (accepts multiple files)
     file_input = gr.File(label="Upload File(s)", file_count="multiple", type="filepath")
-    def handle_file_upload(files):
-        """
-        Process uploaded files and store their content.
-        """
-        combined_text = ""
-        for file in files:
-            try:
-                with open(file, "rb") as f:
-                    content = parse_file(f)
-                combined_text += content + "\n"
-            except Exception as e:
-                combined_text += f"Error processing file {file}: {e}\n"
-        return combined_text.strip()
-    file_input.change(fn=handle_file_upload, inputs=file_input, outputs=file_content_state)
-    # Chat interface components
-    chatbot = gr.Chatbot(label="Conversation", type="messages")
-    user_input = gr.Textbox(label="Your Message", placeholder="Ask something...", lines=2)
-    system_prompt = gr.Textbox(label="System Prompt", value="You are a helpful AI assistant.", interactive=True)
-    max_tokens = gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max Tokens")
-    temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
-    top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p")
-    def chat_function(user_message, history, file_content, system_prompt, max_tokens, temperature, top_p):
-        if not user_message.strip():
             return "", history
-        # Get the assistant's response using the chunking function
-        assistant_response = respond_chunked(
-            user_message, history, system_prompt, max_tokens, temperature, top_p, file_content
-        )
-        # Append user and assistant messages to the conversation history
-        history.append({"role": "user", "content": user_message})
-        history.append({"role": "assistant", "content": assistant_response})
         return "", history
-    # Button to send the user message
-    send_button = gr.Button("Send")
-    send_button.click(
-        fn=chat_function,
-        inputs=[user_input, chat_history_state, file_content_state, system_prompt, max_tokens, temperature, top_p],
-        outputs=[user_input, chatbot]
-    )
-    # Enable submission via the Enter key in the textbox
     user_input.submit(
-        fn=chat_function,
-        inputs=[user_input, chat_history_state, file_content_state, system_prompt, max_tokens, temperature, top_p],
-        outputs=[user_input, chatbot]
     )
-    demo.launch(server_name="0.0.0.0", server_port=7860, share=True, show_error=True)

 import gradio as gr
 from huggingface_hub import InferenceClient
 import nltk
+import json
+import io
+from fpdf import FPDF
+from textblob import TextBlob
 import PyPDF2
+import tempfile
+# Download NLTK punkt tokenizer if needed.
 nltk.download("punkt", quiet=True)
+###############################################################################
+#                           Hugging Face Chat Code                            #
+###############################################################################
+"""
+For more information on Hugging Face Inference API support, please check:
+https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
+"""
+# Initialize the Hugging Face model client (make sure you have access)
+client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
+def respond(message, history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p, file_content):
+    """
+    Calls the model (in non-streaming mode) to get a complete response.
+    If file_content is non-empty, it is appended to the system message (context).
+    """
+    if file_content and file_content.strip():
+        system_message += "\n\nFile content:\n" + file_content
+    # Build messages list in the expected format.
+    messages = [{"role": "system", "content": system_message}]
+    for user_msg, assistant_msg in history:
+        if user_msg:
+            messages.append({"role": "user", "content": user_msg})
+        if assistant_msg:
+            messages.append({"role": "assistant", "content": assistant_msg})
+    messages.append({"role": "user", "content": message})
+    try:
+        completion = client.chat_completion(
+            messages,
+            max_tokens=max_tokens,
+            stream=False,  # Non-streaming mode for simplicity
+            temperature=temperature,
+            top_p=top_p,
+        )
+        response = completion.choices[0].message["content"]
+    except Exception as e:
+        response = f"Error during model response: {e}"
+    return response
+###############################################################################
+#                        File Upload & Parsing Functions                        #
+###############################################################################
 def parse_file(file_obj):
+    """
+    Parses an uploaded file.
+    Supports PDF (using PyPDF2) and text files (UTF-8 decoding).
+    """
     file_extension = file_obj.name.split('.')[-1].lower()
     if file_extension == "pdf":
         try:
             reader = PyPDF2.PdfReader(file_obj)
+            text = ""
+            for page in reader.pages:
+                text += (page.extract_text() or "") + "\n"
+            return text
         except Exception as e:
             return f"Error reading PDF: {e}"
     else:
         except Exception as e:
             return f"Error reading file: {e}"
+def load_files(files):
+    """
+    Processes a list of uploaded files (provided as file paths).
+    Opens each file, parses its content, and concatenates the text.
+    """
+    all_text = ""
+    for file_path in files:
+        try:
+            with open(file_path, "rb") as f:
+                content = parse_file(f)
+            all_text += content + "\n"
+        except Exception as e:
+            all_text += f"Error processing file {file_path}: {e}\n"
+    return all_text
+###############################################################################
+#                             Gradio UI Layout                                #
+###############################################################################
+with gr.Blocks() as demo:
+    gr.Markdown("# Combined Chat & File Upload App")
+    gr.Markdown(
+        """
+This app allows you to upload file(s) (PDF or TXT) to provide context for the AI.
+Once files are uploaded, their contents are automatically parsed and used in every conversation.
+Simply upload a file and then start chatting.
+        """
+    )
+    # State to hold file content (the concatenated text of uploaded files)
     file_content_state = gr.State("")
+    # State to hold the conversation history (list of (user, assistant) tuples)
     chat_history_state = gr.State([])
+    # --- File Upload ---
+    # Using type="filepath" so that we get a file path that can be opened later.
     file_input = gr.File(label="Upload File(s)", file_count="multiple", type="filepath")
+    # Automatically process files when they are uploaded.
+    file_input.change(fn=load_files, inputs=file_input, outputs=file_content_state)
+    gr.Markdown("## Chat")
+    chatbot = gr.Chatbot(label="Chat History")
+    user_input = gr.Textbox(label="Your Message", placeholder="Type your message here...", lines=2)
+    # Additional model parameters:
+    system_prompt = gr.Textbox(label="System Message", value="You are a helpful AI assistant.", interactive=True)
+    max_tokens_slider = gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max New Tokens")
+    temperature_slider = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
+    top_p_slider = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)")
+    def chat_fn(user_msg, history, file_content, system_msg, max_tokens, temperature, top_p):
+        if not user_msg.strip():
             return "", history
+        # Append the user's message to the conversation history.
+        history.append((user_msg, ""))
+        # Call the respond function (non-streaming) to get a complete answer.
+        response = respond(user_msg, history, system_msg, max_tokens, temperature, top_p, file_content)
+        # Update the last entry in the conversation with the response.
+        history[-1] = (user_msg, response)
         return "", history
+    # When user submits a message, call chat_fn.
     user_input.submit(
+        fn=chat_fn,
+        inputs=[user_input, chat_history_state, file_content_state, system_prompt, max_tokens_slider, temperature_slider, top_p_slider],
+        outputs=[user_input, chatbot],
+        queue=True
     )
+    demo.launch(server_name="0.0.0.0", server_port=7860)
+if __name__ == "__main__":
+    demo.launch()