Spaces:

admin08077
/

aitr

Sleeping

App Files Files Community

admin08077 commited on Jan 14, 2025

Commit

e143e23

verified ·

1 Parent(s): 39209da

Update app.py

Browse files

Files changed (1) hide show

app.py +82 -152

app.py CHANGED Viewed

@@ -7,6 +7,8 @@ import io
 import base64
 from fpdf import FPDF
 from textblob import TextBlob
 nltk.download("punkt", quiet=True)
@@ -14,37 +16,32 @@ nltk.download("punkt", quiet=True)
 #                           Hugging Face Chat Code                            #
 ###############################################################################
 """
-For more information on `huggingface_hub` Inference API support, please check:
 https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
 """
 # Initialize your Hugging Face model client
 client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
-def respond(
-    message,
-    history: list[tuple[str, str]],
-    system_message,
-    max_tokens,
-    temperature,
-    top_p
-):
     """
-    Streams the chat response from the Hugging Face model.
-    Yields tokens as they arrive, so Gradio can display partial responses.
     """
-    # Build the messages to send to the model
-    messages = [{"role": "system", "content": system_message}]
     for val in history:
         if val[0]:
             messages.append({"role": "user", "content": val[0]})
         if val[1]:
             messages.append({"role": "assistant", "content": val[1]})
     messages.append({"role": "user", "content": message})
-    # Streaming response
     response = ""
     for partial in client.chat_completion(
         messages,
@@ -58,158 +55,91 @@ def respond(
         yield response
 ###############################################################################
-#                       Advanced Text Converter Code                          #
 ###############################################################################
-def text_to_sentences(text: str):
-    """Splits the text into sentences using nltk."""
-    return [s.strip() for s in nltk.sent_tokenize(text) if s.strip()]
-def generate_comments(sentences):
     """
-    Generates AI-based comments for each sentence using TextBlob
-    sentiment polarity as a simple demonstration.
     """
-    comments = []
-    for sentence in sentences:
-        polarity = TextBlob(sentence).sentiment.polarity
-        # A simple "AI Insight" comment
-        comment = f"AI Insight: Polarity={polarity:.2f} for sentence: '{sentence}'"
-        comments.append(comment)
-    return comments
-def convert_to_json(sentences, comments):
-    """Creates a JSON structure where each sentence has a comment."""
-    data = [{"sentence": s, "comment": c} for s, c in zip(sentences, comments)]
-    return json.dumps({"sentences": data}, indent=2)
-def convert_to_pdf(sentences, comments):
-    """Creates a PDF where each sentence is listed with a comment."""
-    pdf = FPDF()
-    pdf.add_page()
-    pdf.set_auto_page_break(auto=True, margin=15)
-    pdf.set_font("Arial", size=12)
-    for s, c in zip(sentences, comments):
-        pdf.multi_cell(0, 10, f"Sentence: {s}", 0, 1)
-        pdf.multi_cell(0, 10, c, 0, 1)
-        pdf.ln(5)
-    pdf_buffer = io.BytesIO()
-    pdf.output(pdf_buffer, 'F')
-    pdf_buffer.seek(0)
-    return pdf_buffer
-def process_text(user_text, output_format):
     """
-    Main function triggered by the Gradio interface.
-    Returns either JSON text or a PDF file (as bytes).
     """
-    if not user_text.strip():
-        return "Error: Please provide non-empty text!", None
-    sentences = text_to_sentences(user_text)
-    comments = generate_comments(sentences)
-    if output_format == "JSON":
-        # Return JSON text, no file
-        json_data = convert_to_json(sentences, comments)
-        return json_data, None
-    else:
-        # Return PDF as bytes, no text
-        pdf_buffer = convert_to_pdf(sentences, comments)
-        # Gradio expects a tuple: (file_name, file_bytes)
-        return None, ("output.pdf", pdf_buffer.getvalue())
 ###############################################################################
-#                              Gradio UI Layout                               #
 ###############################################################################
 with gr.Blocks() as demo:
-    gr.Markdown("# **Combined Gradio App**")
     gr.Markdown(
         """
-Welcome! This app has **two main tabs**:
-1. **AI Chat**: A streaming chat interface with a Hugging Face model.
-2. **Advanced Text Converter**: Convert text to JSON or PDF with AI-based sentiment comments.
 """
     )
-    with gr.Tabs():
-        # =========== TAB 1: AI Chat ===========
-        with gr.Tab("AI Chat"):
-            # We can simply use Gradio's ChatInterface for streaming responses
-            gr.Markdown("### Chat with a Hugging Face Model")
-            chat = gr.ChatInterface(
-                fn=respond,
-                additional_inputs=[
-                    gr.Textbox(
-                        value="You are a helpful AI assistant.",
-                        label="System message",
-                    ),
-                    gr.Slider(
-                        minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"
-                    ),
-                    gr.Slider(
-                        minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"
-                    ),
-                    gr.Slider(
-                        minimum=0.1,
-                        maximum=1.0,
-                        value=0.95,
-                        step=0.05,
-                        label="Top-p (nucleus sampling)",
-                    ),
-                ],
-            )
-        # =========== TAB 2: Text Converter ===========
-        with gr.Tab("Advanced Text Converter"):
-            gr.Markdown("### Convert text to JSON or PDF with AI comments")
-            input_text = gr.Textbox(
-                label="Enter your text (or paste from a file)",
-                placeholder="Type or paste your text here...",
-                lines=10,
-            )
-            format_dropdown = gr.Dropdown(
-                choices=["JSON", "PDF"],
-                value="JSON",
-                label="Choose output format",
-            )
-            convert_button = gr.Button("Convert")
-            # Two possible outputs: either JSON text or a PDF file
-            output_json = gr.Code(
-                label="JSON Output",
-                language="json",
-                visible=True,
-            )
-            output_file = gr.File(label="PDF Download")
-            def run_conversion(text, fmt):
-                """
-                Helper function to connect with Gradio.
-                Returns either a JSON string or a PDF file handle.
-                """
-                json_str, pdf_file = process_text(text, fmt)
-                # If we got an error or JSON
-                if isinstance(json_str, str) and json_str.startswith("Error:"):
-                    return json_str, None
-                if fmt == "JSON":
-                    # Show JSON in the code area, no file
-                    return json_str, None
-                else:
-                    # Return no text, but a file
-                    return None, pdf_file
-            convert_button.click(
-                fn=run_conversion,
-                inputs=[input_text, format_dropdown],
-                outputs=[output_json, output_file],
-            )
-# Launch the Gradio app
 if __name__ == "__main__":
     demo.launch()

 import base64
 from fpdf import FPDF
 from textblob import TextBlob
+import PyPDF2
+import tempfile
 nltk.download("punkt", quiet=True)
 #                           Hugging Face Chat Code                            #
 ###############################################################################
 """
+For more information on Hugging Face Inference API support, please check:
 https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
 """
 # Initialize your Hugging Face model client
 client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
+def respond(message, history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p, file_content):
     """
+    Streams the chat response from the Hugging Face model.
+    The uploaded file's content is appended to the system message context.
+    Yields tokens as they arrive.
     """
+    # Append file content to the system prompt if available.
+    if file_content and file_content.strip():
+        system_message = system_message + "\n\nFile content:\n" + file_content
+    # Build the messages list.
+    messages = [{"role": "system", "content": system_message}]
     for val in history:
         if val[0]:
             messages.append({"role": "user", "content": val[0]})
         if val[1]:
             messages.append({"role": "assistant", "content": val[1]})
     messages.append({"role": "user", "content": message})
     response = ""
     for partial in client.chat_completion(
         messages,
         yield response
 ###############################################################################
+#                    File Upload & Parsing Functionality                      #
 ###############################################################################
+def parse_file(file):
     """
+    Parses the uploaded file.
+    For PDFs, it extracts text using PyPDF2.
+    For other file types, it attempts to decode as UTF-8 text.
     """
+    file_extension = file.name.split('.')[-1].lower()
+    if file_extension == "pdf":
+        try:
+            reader = PyPDF2.PdfReader(file)
+            text = ""
+            for page in reader.pages:
+                extracted = page.extract_text() or ""
+                text += extracted
+            return text
+        except Exception as e:
+            return f"Error reading PDF: {e}"
+    else:
+        try:
+            return file.read().decode("utf-8", errors="ignore")
+        except Exception as e:
+            return f"Error reading file: {e}"
+def load_files(files):
     """
+    Processes a list of uploaded files.
+    Concatenates their text content.
     """
+    all_text = ""
+    for f in files:
+        content = parse_file(f)
+        all_text += content + "\n"
+    return all_text
 ###############################################################################
+#                           Gradio UI Layout                                  #
 ###############################################################################
 with gr.Blocks() as demo:
+    gr.Markdown("# Combined Chat & File Upload App")
     gr.Markdown(
         """
+This app allows you to upload file(s) and chat with an AI assistant that references the uploaded file(s) throughout the conversation.
+- **Step 1:** Upload your file(s) (e.g., PDF or TXT).
+- **Step 2:** Click **Load File(s)** to parse and store the file content.
+- **Step 3:** Chat with the AI—the uploaded file's content will be appended to the context on every prompt.
 """
     )
+    # Create a state to store the file's parsed content.
+    file_content_state = gr.State("")
+    with gr.Row():
+        file_input = gr.File(label="Upload File(s)", file_count="multiple")
+        load_button = gr.Button("Load File(s)")
+    # When the Load button is clicked, concatenate file contents into file_content_state.
+    load_button.click(fn=load_files, inputs=file_input, outputs=file_content_state)
+    gr.Markdown("## Chat with AI (using the uploaded file's content as context)")
+    # Note: We use Gradio’s ChatInterface which streams responses from the client.
+    demo_chat = gr.ChatInterface(
+        fn=respond,
+        additional_inputs=[
+            gr.Textbox(
+                value="You are a helpful AI assistant that uses the uploaded file's content as context.",
+                label="System message",
+            ),
+            gr.Slider(
+                minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"
+            ),
+            gr.Slider(
+                minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"
+            ),
+            gr.Slider(
+                minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"
+            ),
+            file_content_state,  # The uploaded file's content is passed into each chat call.
+        ],
+    )
+    demo.launch()
 if __name__ == "__main__":
     demo.launch()