Spaces:

admin08077
/

aitr

Sleeping

App Files Files Community

admin08077 commited on Jan 14, 2025

Commit

cc25a12

verified ·

1 Parent(s): 325e85e

Update app.py

Browse files

Files changed (1) hide show

app.py +5 -58

app.py CHANGED Viewed

@@ -3,29 +3,17 @@ from huggingface_hub import InferenceClient
 import nltk
 import PyPDF2
 nltk.download("punkt", quiet=True)
-###############################################################################
-#                           Hugging Face Chat Code                            #
-###############################################################################
-# Initialize the Hugging Face model client
 client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
 def respond_chunked(message, history, system_message, max_tokens, temperature, top_p, file_content):
-    """
-    Calls the Hugging Face model for a response with support for chunked file content.
-    """
-    # Split file content into manageable chunks
     chunks = chunk_text(file_content, max_chunk_size=1500)
     combined_response = ""
-    # Process each chunk and append to the response
     for chunk in chunks:
-        # Append chunk to system message for context
         chunked_system_message = f"{system_message}\n\nFile Content Chunk:\n{chunk}"
-        # Prepare the message payload
         messages = [{"role": "system", "content": chunked_system_message}]
         for user, assistant in history:
             if user:
@@ -33,7 +21,6 @@ def respond_chunked(message, history, system_message, max_tokens, temperature, t
             if assistant:
                 messages.append({"role": "assistant", "content": assistant})
         messages.append({"role": "user", "content": message})
         try:
             completion = client.chat_completion(
                 messages,
@@ -44,18 +31,9 @@ def respond_chunked(message, history, system_message, max_tokens, temperature, t
             combined_response += completion.choices[0].message["content"] + "\n"
         except Exception as e:
             combined_response += f"Error processing chunk: {e}\n"
     return combined_response.strip()
-###############################################################################
-#                        File Upload & Parsing Functions                      #
-###############################################################################
 def parse_file(file_obj):
-    """
-    Parses uploaded files and extracts content.
-    Supports PDFs and plain text.
-    """
     file_extension = file_obj.name.split('.')[-1].lower()
     if file_extension == "pdf":
         try:
@@ -70,9 +48,6 @@ def parse_file(file_obj):
             return f"Error reading file: {e}"
 def load_files(files):
-    """
-    Loads multiple files, parses their content, and concatenates the text.
-    """
     combined_text = ""
     for file in files:
         try:
@@ -83,23 +58,14 @@ def load_files(files):
             combined_text += f"Error processing file {file}: {e}\n"
     return combined_text
-###############################################################################
-#                            Chunking Function                                #
-###############################################################################
 def chunk_text(text, max_chunk_size=1500):
-    """
-    Splits text into chunks of up to `max_chunk_size` tokens (approximate).
-    """
     from nltk.tokenize import sent_tokenize
     sentences = sent_tokenize(text)
     chunks = []
     current_chunk = ""
     current_tokens = 0
     def approximate_token_count(text):
-        # Naive tokenization approximation
         return len(text.split())
     for sentence in sentences:
@@ -117,50 +83,32 @@ def chunk_text(text, max_chunk_size=1500):
     return chunks
-###############################################################################
-#                             Gradio UI Layout                                #
-###############################################################################
 with gr.Blocks() as demo:
     gr.Markdown("# **Chat with File Context (Chunking for Large Files)**")
-    gr.Markdown(
-        """
-This app lets you upload large file(s) and chat with an AI assistant.
-Uploaded file content will be processed in chunks to ensure smooth handling.
-        """
-    )
-    # States to store file content and chat history
     file_content_state = gr.State("")
     chat_history_state = gr.State([])
-    # File Upload Section
     file_input = gr.File(label="Upload File(s)", file_count="multiple", type="filepath")
     file_input.change(fn=load_files, inputs=file_input, outputs=file_content_state)
-    # Chat Section
-    gr.Markdown("## Chat")
-    chatbot = gr.Chatbot(label="Conversation")
     user_input = gr.Textbox(label="Your Message", placeholder="Ask something...", lines=2)
-    # Model Configuration Sliders
     system_prompt = gr.Textbox(label="System Prompt", value="You are a helpful AI assistant.", interactive=True)
     max_tokens = gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max Tokens")
     temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
     top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p")
-    # Chat Function with Chunking
     def chat_function(user_message, history, file_content, system_prompt, max_tokens, temperature, top_p):
         if not user_message.strip():
             return "", history
-        # Append user's message to the chat history
         history.append((user_message, ""))
-        # Get response from the model with chunking
         assistant_response = respond_chunked(user_message, history, system_prompt, max_tokens, temperature, top_p, file_content)
         history[-1] = (user_message, assistant_response)
         return "", history
-    # Add a Send Button for manual submission
     send_button = gr.Button("Send")
     send_button.click(
         fn=chat_function,
@@ -168,7 +116,6 @@ Uploaded file content will be processed in chunks to ensure smooth handling.
         outputs=[user_input, chatbot]
     )
-    # Submit Chat Input with Enter Key as well
     user_input.submit(
         fn=chat_function,
         inputs=[user_input, chat_history_state, file_content_state, system_prompt, max_tokens, temperature, top_p],

 import nltk
 import PyPDF2
+# Download required NLTK resources
 nltk.download("punkt", quiet=True)
+nltk.download("punkt_tab", quiet=True)
 client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
 def respond_chunked(message, history, system_message, max_tokens, temperature, top_p, file_content):
     chunks = chunk_text(file_content, max_chunk_size=1500)
     combined_response = ""
     for chunk in chunks:
         chunked_system_message = f"{system_message}\n\nFile Content Chunk:\n{chunk}"
         messages = [{"role": "system", "content": chunked_system_message}]
         for user, assistant in history:
             if user:
             if assistant:
                 messages.append({"role": "assistant", "content": assistant})
         messages.append({"role": "user", "content": message})
         try:
             completion = client.chat_completion(
                 messages,
             combined_response += completion.choices[0].message["content"] + "\n"
         except Exception as e:
             combined_response += f"Error processing chunk: {e}\n"
     return combined_response.strip()
 def parse_file(file_obj):
     file_extension = file_obj.name.split('.')[-1].lower()
     if file_extension == "pdf":
         try:
             return f"Error reading file: {e}"
 def load_files(files):
     combined_text = ""
     for file in files:
         try:
             combined_text += f"Error processing file {file}: {e}\n"
     return combined_text
 def chunk_text(text, max_chunk_size=1500):
     from nltk.tokenize import sent_tokenize
     sentences = sent_tokenize(text)
     chunks = []
     current_chunk = ""
     current_tokens = 0
     def approximate_token_count(text):
         return len(text.split())
     for sentence in sentences:
     return chunks
 with gr.Blocks() as demo:
     gr.Markdown("# **Chat with File Context (Chunking for Large Files)**")
+    gr.Markdown("Upload large files, and chat with AI using context derived from those files.")
     file_content_state = gr.State("")
     chat_history_state = gr.State([])
     file_input = gr.File(label="Upload File(s)", file_count="multiple", type="filepath")
     file_input.change(fn=load_files, inputs=file_input, outputs=file_content_state)
+    chatbot = gr.Chatbot(label="Conversation", type="messages")
     user_input = gr.Textbox(label="Your Message", placeholder="Ask something...", lines=2)
     system_prompt = gr.Textbox(label="System Prompt", value="You are a helpful AI assistant.", interactive=True)
     max_tokens = gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max Tokens")
     temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
     top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p")
     def chat_function(user_message, history, file_content, system_prompt, max_tokens, temperature, top_p):
         if not user_message.strip():
             return "", history
         history.append((user_message, ""))
         assistant_response = respond_chunked(user_message, history, system_prompt, max_tokens, temperature, top_p, file_content)
         history[-1] = (user_message, assistant_response)
         return "", history
     send_button = gr.Button("Send")
     send_button.click(
         fn=chat_function,
         outputs=[user_input, chatbot]
     )
     user_input.submit(
         fn=chat_function,
         inputs=[user_input, chat_history_state, file_content_state, system_prompt, max_tokens, temperature, top_p],