Spaces:

mateoluksenberg
/

chat

Sleeping

App Files Files Community

mateoluksenberg commited on Aug 7, 2024

Commit

c8f3971

verified ·

1 Parent(s): d0c5413

Update app.py

Browse files

Files changed (1) hide show

app.py +41 -79

app.py CHANGED Viewed

@@ -5,17 +5,27 @@ import spaces
 from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 import os
 from threading import Thread
-from fastapi import FastAPI, UploadFile, File, Form
-from fastapi.middleware.cors import CORSMiddleware
-from pydantic import BaseModel
-from typing import Optional, List
-import logging
-import fitz  # PyMuPDF
 import docx
 from pptx import Presentation
 MODEL_LIST = ["nikravan/glm-4vq"]
 HF_TOKEN = os.environ.get("HF_TOKEN", None)
 MODEL_ID = MODEL_LIST[0]
 MODEL_NAME = "GLM-4vq"
@@ -36,18 +46,23 @@ h1 {
 }
 """
 tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
 def extract_text(path):
     return open(path, 'r').read()
 def extract_pdf(path):
-    doc = fitz.open(path)
     text = ""
     for page in doc:
         text += page.get_text()
     return text
 def extract_docx(path):
     doc = docx.Document(path)
     data = []
@@ -56,6 +71,7 @@ def extract_docx(path):
     content = '\n\n'.join(data)
     return content
 def extract_pptx(path):
     prs = Presentation(path)
     text = ""
@@ -65,6 +81,7 @@ def extract_pptx(path):
                 text += shape.text + "\n"
     return text
 def mode_load(path):
     choice = ""
     file_type = path.split(".")[-1]
@@ -82,6 +99,7 @@ def mode_load(path):
         print(content[:100])
         return choice, content[:5000]
     elif file_type in ["png", "jpg", "jpeg", "bmp", "tiff", "webp"]:
         content = Image.open(path).convert('RGB')
         choice = "image"
@@ -90,6 +108,7 @@ def mode_load(path):
     else:
         raise gr.Error("Oops, unsupported files.")
 @spaces.GPU()
 def stream_chat(message, history: list, temperature: float, max_length: int, top_p: float, top_k: int, penalty: float):
@@ -113,9 +132,11 @@ def stream_chat(message, history: list, temperature: float, max_length: int, top
             conversation.append({"role": "user", "content": format_msg})
     else:
         if len(history) == 0:
             contents = None
             conversation.append({"role": "user", "content": message['text']})
         else:
             for prompt, answer in history:
                 if answer is None:
                     prompt_files.append(prompt[0])
@@ -128,6 +149,7 @@ def stream_chat(message, history: list, temperature: float, max_length: int, top
                 choice = ""
                 conversation.append({"role": "user", "image": "", "content": message['text']})
             if choice == "image":
                 conversation.append({"role": "user", "image": contents, "content": message['text']})
             elif choice == "doc":
@@ -159,11 +181,18 @@ def stream_chat(message, history: list, temperature: float, max_length: int, top
             buffer += new_text
             yield buffer
-chatbot = gr.Chatbot()
 chat_input = gr.MultimodalTextbox(
     interactive=True,
     placeholder="Enter message or upload a file ...",
     show_label=False,
 )
 EXAMPLES = [
@@ -173,80 +202,14 @@ EXAMPLES = [
     [{"text": "Quiero armar un JSON, solo el JSON sin texto, que contenga los datos de la primera mitad de la tabla de la imagen (las primeras 10 jurisdicciones 901-910). Ten en cuenta que los valores numéricos son decimales de cuatro dígitos. La tabla contiene las siguientes columnas: Codigo, Nombre, Fecha Inicio, Fecha Cese, Coeficiente Ingresos, Coeficiente Gastos y Coeficiente Unificado. La tabla puede contener valores vacíos, en ese caso dejarlos como null. Cada fila de la tabla representa una jurisdicción con sus respectivos valores.", }]
 ]
-app = FastAPI()
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=["*"],
-    allow_credentials=True,
-    allow_methods=["*"],
-    allow_headers=["*"],
-)
-class ChatMessage(BaseModel):
-    text: str
-    history: Optional[List] = []
-    temperature: float = 0.8
-    max_length: int = 4096
-    top_p: float = 1.0
-    top_k: int = 10
-    penalty: float = 1.0
-@app.post("/test/")
-async def test_endpoint(message: dict):
-    logging.info(f"Received message: {message}")
-    if "text" not in message:
-        raise HTTPException(status_code=400, detail="Missing 'text' in request body")
-    response = {"message": f"Received your message: {message['text']}"}
-    return response
-@app.post("/chat/")
-async def chat_endpoint(message: ChatMessage, file: Optional[UploadFile] = None):
-    conversation = []
-    if file:
-        path = f"/tmp/{file.filename}"
-        with open(path, "wb") as f:
-            f.write(await file.read())
-        choice, contents = mode_load(path)
-        if choice == "image":
-            conversation.append({"role": "user", "image": contents, "content": message.text})
-        elif choice == "doc":
-            format_msg = contents + "\n\n\n" + "{} files uploaded.\n" + message.text
-            conversation.append({"role": "user", "content": format_msg})
-    else:
-        conversation.append({"role": "user", "content": message.text})
-    input_ids = tokenizer.apply_chat_template(conversation, tokenize=True, add_generation_prompt=True,
-                                              return_tensors="pt", return_dict=True).to(model.device)
-    streamer = TextIteratorStreamer(tokenizer, timeout=60.0, skip_prompt=True, skip_special_tokens=True)
-    generate_kwargs = dict(
-        max_length=message.max_length,
-        streamer=streamer,
-        do_sample=True,
-        top_p=message.top_p,
-        top_k=message.top_k,
-        temperature=message.temperature,
-        repetition_penalty=message.penalty,
-        eos_token_id=[151329, 151336, 151338],
-    )
-    gen_kwargs = {**input_ids, **generate_kwargs}
-    with torch.no_grad():
-        thread = Thread(target=model.generate, kwargs=gen_kwargs)
-        thread.start()
-        buffer = ""
-        for new_text in streamer:
-            buffer += new_text
-            return {"response": buffer}
 with gr.Blocks(css=CSS, theme="soft", fill_height=True) as demo:
     gr.HTML(TITLE)
     gr.HTML(DESCRIPTION)
     gr.ChatInterface(
         fn=stream_chat,
         multimodal=True,
         textbox=chat_input,
         chatbot=chatbot,
         fill_height=True,
@@ -297,6 +260,5 @@ with gr.Blocks(css=CSS, theme="soft", fill_height=True) as demo:
     gr.Examples(EXAMPLES, [chat_input])
 if __name__ == "__main__":
-    demo.queue(api_open=False).launch(show_api=False, share=False)
-    import uvicorn
-    uvicorn.run(app, host="0.0.0.0", port=8000)

 from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 import os
 from threading import Thread
+import pymupdf
 import docx
 from pptx import Presentation
+from fastapi import FastAPI, File, UploadFile, HTTPException
+from fastapi.responses import HTMLResponse
+app = FastAPI()
+@app.post("/test/")
+async def test_endpoint(message: dict):
+    if "text" not in message:
+        raise HTTPException(status_code=400, detail="Missing 'text' in request body")
+    response = {"message": f"Received your message: {message['text']}"}
+    return response
 MODEL_LIST = ["nikravan/glm-4vq"]
 HF_TOKEN = os.environ.get("HF_TOKEN", None)
 MODEL_ID = MODEL_LIST[0]
 MODEL_NAME = "GLM-4vq"
 }
 """
 tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
 def extract_text(path):
     return open(path, 'r').read()
 def extract_pdf(path):
+    doc = pymupdf.open(path)
     text = ""
     for page in doc:
         text += page.get_text()
     return text
 def extract_docx(path):
     doc = docx.Document(path)
     data = []
     content = '\n\n'.join(data)
     return content
 def extract_pptx(path):
     prs = Presentation(path)
     text = ""
                 text += shape.text + "\n"
     return text
 def mode_load(path):
     choice = ""
     file_type = path.split(".")[-1]
         print(content[:100])
         return choice, content[:5000]
     elif file_type in ["png", "jpg", "jpeg", "bmp", "tiff", "webp"]:
         content = Image.open(path).convert('RGB')
         choice = "image"
     else:
         raise gr.Error("Oops, unsupported files.")
 @spaces.GPU()
 def stream_chat(message, history: list, temperature: float, max_length: int, top_p: float, top_k: int, penalty: float):
             conversation.append({"role": "user", "content": format_msg})
     else:
         if len(history) == 0:
+            # raise gr.Error("Please upload an image first.")
             contents = None
             conversation.append({"role": "user", "content": message['text']})
         else:
+            # image = Image.open(history[0][0][0])
             for prompt, answer in history:
                 if answer is None:
                     prompt_files.append(prompt[0])
                 choice = ""
                 conversation.append({"role": "user", "image": "", "content": message['text']})
             if choice == "image":
                 conversation.append({"role": "user", "image": contents, "content": message['text']})
             elif choice == "doc":
             buffer += new_text
             yield buffer
+chatbot = gr.Chatbot(
+    #rtl=True,
+)
 chat_input = gr.MultimodalTextbox(
     interactive=True,
     placeholder="Enter message or upload a file ...",
     show_label=False,
+    #rtl=True,
 )
 EXAMPLES = [
     [{"text": "Quiero armar un JSON, solo el JSON sin texto, que contenga los datos de la primera mitad de la tabla de la imagen (las primeras 10 jurisdicciones 901-910). Ten en cuenta que los valores numéricos son decimales de cuatro dígitos. La tabla contiene las siguientes columnas: Codigo, Nombre, Fecha Inicio, Fecha Cese, Coeficiente Ingresos, Coeficiente Gastos y Coeficiente Unificado. La tabla puede contener valores vacíos, en ese caso dejarlos como null. Cada fila de la tabla representa una jurisdicción con sus respectivos valores.", }]
 ]
 with gr.Blocks(css=CSS, theme="soft", fill_height=True) as demo:
     gr.HTML(TITLE)
     gr.HTML(DESCRIPTION)
     gr.ChatInterface(
         fn=stream_chat,
         multimodal=True,
         textbox=chat_input,
         chatbot=chatbot,
         fill_height=True,
     gr.Examples(EXAMPLES, [chat_input])
 if __name__ == "__main__":
+    demo.queue(api_open=False).launch(show_api=False, share=False, )#server_name="0.0.0.0", )