Doc-Reader-and-Chat

Sleeping

App Files Files Community

KingNish commited on Sep 18, 2024

Commit

c87c622

verified ·

1 Parent(s): 8dc1546

Update app.py

Browse files

Files changed (1) hide show

app.py +66 -0

app.py CHANGED Viewed

@@ -73,6 +73,12 @@ def read_document(file):
         except Exception as e:
             return f"Error reading file: {e}"
 def chat_document(file, question):
     content = str(read_document(file))
     if len(content) > 32000:
@@ -103,6 +109,58 @@ def chat_document(file, question):
         yield output
 with gr.Blocks() as demo:
     with gr.Tabs():
         with gr.TabItem("Document Reader"):
@@ -121,5 +179,13 @@ with gr.Blocks() as demo:
                 title="Document Chat",
                 description="Upload a document and ask questions about its content."
             )
 demo.launch()

         except Exception as e:
             return f"Error reading file: {e}"
+def split_content(content, chunk_size=32000):
+    chunks = []
+    for i in range(0, len(content), chunk_size):
+        chunks.append(content[i:i + chunk_size])
+    return chunks
 def chat_document(file, question):
     content = str(read_document(file))
     if len(content) > 32000:
         yield output
+def chat_document_v2(file, question):
+    content = str(read_document(file))
+    content = content.replace('\n', ' ')
+    content = content.replace('\r', ' ')
+    content = content.replace('\t', ' ')
+    content = content.strip()
+    chunks = split_content(content)
+    # Define system prompt for the chat API
+    system_prompt = """
+    You are a helpful and informative assistant that can answer questions based on the content of documents.
+    You will receive the content of a document and a question about it.
+    Your task is to provide a concise and accurate answer to the question based solely on the provided document content.
+    If the document does not contain enough information to answer the question, simply state that you cannot answer the question based on the provided information.
+    """
+    all_answers = []
+    for chunk in chunks:
+        message = f"""[INST] [SYSTEM] {system_prompt}
+        Document Content: {chunk[:32000]}
+        Question: {question}
+        Answer:"""
+        stream = client.text_generation(message, max_new_tokens=4096, stream=True, details=True, return_full_text=False)
+        output = ""
+        for response in stream:
+            if not response.token.text == "</s>":
+                output += response.token.text
+        all_answers.append(output)
+    # Summarize all answers using Mistral
+    summary_prompt = """
+    You are a helpful and informative assistant that can summarize multiple answers related to the same question.
+    You will receive a list of answers to a question, and your task is to generate a concise and comprehensive summary that incorporates the key information from all the answers.
+    Avoid repeating information unnecessarily and focus on providing the most relevant and accurate summary based on the provided answers.
+    Answers:
+    """
+    all_answers_str = "\n".join(all_answers)
+    summary_message = f"""[INST] {summary_prompt}
+    {all_answers_str[:30000]}
+    Summary:"""
+    stream = client.text_generation(summary_message, max_new_tokens=4096, stream=True, details=True, return_full_text=False)
+    output = ""
+    for response in stream:
+        if not response.token.text == "</s>":
+            output += response.token.text
+        yield output
 with gr.Blocks() as demo:
     with gr.Tabs():
         with gr.TabItem("Document Reader"):
                 title="Document Chat",
                 description="Upload a document and ask questions about its content."
             )
+        with gr.TabItem("Document Chat V2"):
+            iface3 = gr.Interface(
+                fn=chat_document_v2,
+                inputs=[gr.File(label="Upload a Document"), gr.Textbox(label="Question")],
+                outputs=gr.Textbox(label="Answer"),
+                title="Document Chat V2",
+                description="Upload a document and ask questions about its content (using chunk-based approach)."
+            )
 demo.launch()