GLM-4-DOC

Runtime error

App Files Files Community

vilarin commited on Jun 6, 2024

Commit

8e4e648

verified ·

1 Parent(s): ac56402

Update app.py

Browse files

Files changed (1) hide show

app.py +75 -8

app.py CHANGED Viewed

@@ -6,6 +6,9 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStream
 import os
 from threading import Thread
 MODEL_LIST = ["THUDM/glm-4v-9b"]
@@ -15,7 +18,14 @@ MODEL_NAME = MODEL_ID.split("/")[-1]
 TITLE = "<h1>VL-Chatbox</h1>"
-DESCRIPTION = f'<center><p>A SPACE FOR VLM MODELS</p><br><h3>MODEL NOW: <a href="https://hf.co/{MODEL_ID}">{MODEL_NAME}</a></center></h3>'
 CSS = """
 .duplicate-button {
@@ -40,28 +50,86 @@ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
 model.eval()
 @spaces.GPU()
 def stream_chat(message, history: list, temperature: float, max_length: int, top_p: float, top_k: int, penalty: float):
     print(f'message is - {message}')
     print(f'history is - {history}')
     conversation = []
     if message["files"]:
-        image = Image.open(message["files"][-1]).convert('RGB')
-        conversation.append({"role": "user", "image": image, "content": message['text']})
     else:
         if len(history) == 0:
             #raise gr.Error("Please upload an image first.")
-            image = None
             conversation.append({"role": "user", "content": message['text']})
         else:
             #image = Image.open(history[0][0][0])
             for prompt, answer in history:
                 if answer is None:
-                    image = Image.open(prompt[0])
                     conversation.extend([{"role": "user", "content": ""},{"role": "assistant", "content": ""}])
                 else:
                     conversation.extend([{"role": "user", "content": prompt}, {"role": "assistant", "content": answer}])
-            conversation.append({"role": "user", "image": image, "content": message['text']})
     print(f"Conversation is -\n{conversation}")
     input_ids = tokenizer.apply_chat_template(conversation, tokenize=True, add_generation_prompt=True, return_tensors="pt", return_dict=True).to(model.device)
@@ -93,7 +161,6 @@ def stream_chat(message, history: list, temperature: float, max_length: int, top
 chatbot = gr.Chatbot(height=450)
 chat_input = gr.MultimodalTextbox(
     interactive=True,
-    file_types=["image"],
     placeholder="Enter message or upload a file one time...",
     show_label=False,
@@ -104,7 +171,7 @@ EXAMPLES = [
         [{"text": "Is it real?", "files": ["./spacecat.png"]}]
 ]
-with gr.Blocks(css=CSS) as demo:
     gr.HTML(TITLE)
     gr.HTML(DESCRIPTION)
     gr.DuplicateButton(value="Duplicate Space for private use", elem_classes="duplicate-button")

 import os
 from threading import Thread
+from langchain_community.document_loaders import PyMuPDFLoader
+import docx
+from pptx import Presentation
 MODEL_LIST = ["THUDM/glm-4v-9b"]
 TITLE = "<h1>VL-Chatbox</h1>"
+DESCRIPTION = f"""
+<center>
+<p>A SPACE FOR MY FAV VLM.
+<br>
+MODEL NOW: <a href="https://hf.co/{MODEL_ID}">{MODEL_NAME}</a>
+<br>
+TIPS: NOW SUPPORT DM & ONE IMAGE/FILE UPLOAD PER TIME.
+</p></center>"""
 CSS = """
 .duplicate-button {
 model.eval()
+def extract_text(path):
+    return open(path, 'r').read()
+def extract_pdf(path):
+    loader = PyMuPDFLoader(path)
+    data = loader.load()
+    data = [x.page_content for x in data]
+    content = '\n\n'.join(data)
+    return content
+def extract_docx(path):
+    doc = docx.Document(path)
+    data = []
+    for paragraph in doc.paragraphs:
+        data.append(paragraph.text)
+    content = '\n\n'.join(data)
+def extract_pptx(path):
+    prs = Presentation(path)
+    text = ""
+    for slide in prs.slides:
+        for shape in slide.shapes:
+            if hasattr(shape, "text"):
+                text += shape.text + "\n"
+    return text
+def mode_load(path):
+    choice = ""
+    file_type = path.split(".")[-1]
+    if file_type in ["pdf", "txt", "py", "docx", "pptx", "json", "cpp", "md"]:
+        if file_type.endswith(".pdf"):
+            content = extract_pdf(path)
+        elif file_type.endswith(".docx"):
+            content = extract_docx(path)
+        elif file_type.endswith(".pptx"):
+            content = extract_pptx(path)
+        else:
+            content = extract_text(path)
+        choice = "doc"
+        print(content)
+        return choice, content
+    elif file_type in ["png", "jpg", "jpeg", "bmp", "tiff", "webp"]:
+        content = Image.open(path).convert('RGB')
+        choice = "image"
+        return choice, content
+    else:
+        raise gr.Error("Oops, unsupported files.")
 @spaces.GPU()
 def stream_chat(message, history: list, temperature: float, max_length: int, top_p: float, top_k: int, penalty: float):
     print(f'message is - {message}')
     print(f'history is - {history}')
     conversation = []
+    prompt_files = []
     if message["files"]:
+        choice, contents = mode_load(message["files"][-1])
+        if choice == "image":
+            conversation.append({"role": "user", "image": contents, "content": message['text']})
+        elif choice == "doc":
+            format_msg = contents + "\n\n\n" + "{} files uploaded.\n" + message['text']
+            conversation.append({"role": "user", "content": format_msg})
     else:
         if len(history) == 0:
             #raise gr.Error("Please upload an image first.")
+            contents = None
             conversation.append({"role": "user", "content": message['text']})
         else:
             #image = Image.open(history[0][0][0])
             for prompt, answer in history:
                 if answer is None:
+                    prompt_files.append(prompt[0])
                     conversation.extend([{"role": "user", "content": ""},{"role": "assistant", "content": ""}])
                 else:
                     conversation.extend([{"role": "user", "content": prompt}, {"role": "assistant", "content": answer}])
+            choice, contents = mode_load(prompt_files[-1])
+            if choice == "image":
+                conversation.append({"role": "user", "image": contents, "content": message['text']})
+            elif choice == "doc":
+                format_msg = contents + "\n\n\n" + "{} files uploaded.\n" + message['text']
+                conversation.append({"role": "user", "content": format_msg})
     print(f"Conversation is -\n{conversation}")
     input_ids = tokenizer.apply_chat_template(conversation, tokenize=True, add_generation_prompt=True, return_tensors="pt", return_dict=True).to(model.device)
 chatbot = gr.Chatbot(height=450)
 chat_input = gr.MultimodalTextbox(
     interactive=True,
     placeholder="Enter message or upload a file one time...",
     show_label=False,
         [{"text": "Is it real?", "files": ["./spacecat.png"]}]
 ]
+with gr.Blocks(css=CSS, theme="soft") as demo:
     gr.HTML(TITLE)
     gr.HTML(DESCRIPTION)
     gr.DuplicateButton(value="Duplicate Space for private use", elem_classes="duplicate-button")