Spaces:

michaelmc1618
/

Hawkeye_AI

Sleeping

App Files Files Community

michaelmc1618 commited on Jun 27, 2024

Commit

07a3b9b

verified ·

1 Parent(s): 6e460e9

Update app.py

Browse files

Files changed (1) hide show

app.py +346 -105

app.py CHANGED Viewed

@@ -1,47 +1,267 @@
 import os
-os.system('pip install torch')  # or 'pip install tensorflow'
-os.system('pip install transformers')
-os.system('pip install datasets')
-os.system('pip install gradio')
-os.system('pip install minijinja')
-os.system('pip install PyMuPDF')
 import gradio as gr
 from huggingface_hub import InferenceClient
-from transformers import pipeline
 from datasets import load_dataset
 import fitz  # PyMuPDF
-client = InferenceClient()
-dataset = load_dataset("ibunescu/qa_legal_dataset_train")
-def score_argument_from_outcome(outcome, argument):
-    prosecutor_score = 0
-    if "Prosecutor" in outcome:
-        prosecutor_score = outcome.count("Prosecutor") * 2
-        if "won" in outcome and "Prosecutor" in outcome:
-            prosecutor_score += 10
-    return prosecutor_score
 def chat_between_bots(system_message1, system_message2, max_tokens, temperature, top_p, history1, history2, shared_history, message):
     response1, history1 = list(respond(message, history1, system_message1, max_tokens, temperature, top_p))[-1]
     response2, history2 = list(respond(message, history2, system_message2, max_tokens, temperature, top_p))[-1]
-    return response1, response2, history1, history2, shared_history
-def extract_text_from_pdf(pdf_file):
-    text = ""
-    doc = fitz.open(pdf_file)
-    for page in doc:
-        text += page.get_text()
-    return text
-def ask_about_pdf(pdf_text, question):
-    prompt = f"PDF Content: {pdf_text}\n\nQuestion: {question}\n\nAnswer:"
     response = ""
     for message in client.chat_completion(
-        [{"role": "system", "content": "You are a legal expert answering questions based on the PDF content provided."},
          {"role": "user", "content": prompt}],
         max_tokens=512,
         stream=True,
@@ -53,38 +273,12 @@ def ask_about_pdf(pdf_text, question):
             response += token
     return response
-def update_pdf_gallery_and_extract_text(pdf_files):
-    if len(pdf_files) > 0:
-        pdf_text = extract_text_from_pdf(pdf_files[0].name)
-    else:
-        pdf_text = ""
-    return pdf_files, pdf_text
 def add_message(history, message):
-    history.append(message)
-    return history, gr.Textbox(value=None, interactive=False)
-def bot(history):
-    system_message = "You are a helpful assistant."
-    messages = [{"role": "system", "content": system_message}]
-    for val in history:
-        if val[0]:
-            messages.append({"role": "user", "content": val[0]})
-        if val[1]:
-            messages.append({"role": "assistant", "content": val[1]})
-    response = ""
-    for message in client.chat_completion(
-        messages,
-        max_tokens=150,
-        stream=True,
-        temperature=0.6,
-        top_p=0.95,
-    ):
-        token = message.choices[0].delta.content
-        if token is not None:
-            response += token
-        history[-1][1] = response
-        yield history
 def print_like_dislike(x: gr.LikeData):
     print(x.index, x.value, x.liked)
@@ -95,56 +289,103 @@ def reset_conversation():
 def save_conversation(history1, history2, shared_history):
     return history1, history2, shared_history
-custom_css = """
-.scroll-box {
-    max-height: 400px;
-    overflow-y: auto;
-}
-"""
 with gr.Blocks(css=custom_css) as demo:
     history1 = gr.State([])
     history2 = gr.State([])
     shared_history = gr.State([])
-    pdf_files = gr.State([])
-    pdf_text = gr.State("")
     with gr.Tab("Argument Evaluation"):
-        message = gr.Textbox(label="Case to Argue")
-        system_message1 = gr.Textbox(value="System message for bot 1")
-        system_message2 = gr.Textbox(value="System message for bot 2")
-        max_tokens = gr.Slider(1, 512, value=150)
-        temperature = gr.Slider(0.0, 1.0, value=0.6)
-        top_p = gr.Slider(0.0, 1.0, value=0.95)
-        prosecutor_response = gr.Textbox(label="Prosecutor Response", interactive=False)
-        defense_response = gr.Textbox(label="Defense Response", interactive=False)
-        prosecutor_score_color = gr.Textbox(label="Prosecutor Score Color", interactive=False)
-        defense_score_color = gr.Textbox(label="Defense Score Color", interactive=False)
-        shared_argument = gr.Textbox(label="Case Outcome", interactive=True)
-        submit_btn = gr.Button("Argue")
-        clear_btn = gr.Button("Clear and Reset")
-        save_btn = gr.Button("Save Conversation")
-        submit_btn.click(chat_between_bots, inputs=[system_message1, system_message2, max_tokens, temperature, top_p, history1, history2, shared_history, message], outputs=[prosecutor_response, defense_response, history1, history2, shared_argument, prosecutor_score_color, defense_score_color])
-        clear_btn.click(reset_conversation, outputs=[history1, history2, shared_history, prosecutor_response, defense_response, shared_argument])
-        save_btn.click(save_conversation, inputs=[history1, history2, shared_history], outputs=[history1, history2, shared_history])
-    with gr.Tab("PDF Management"):
-        pdf_upload = gr.File(label="Upload Case Files (PDF)", file_types=[".pdf"])
-        pdf_gallery = gr.Gallery(label="PDF Gallery")
-        pdf_view = gr.Textbox(label="PDF Content", interactive=False, elem_classes=["scroll-box"])
-        pdf_question = gr.Textbox(label="Ask a Question about the PDF")
-        pdf_answer = gr.Textbox(label="Answer", interactive=False, elem_classes=["scroll-box"])
-        pdf_upload_btn = gr.Button("Update PDF Gallery")
-        pdf_ask_btn = gr.Button("Ask")
-        pdf_upload_btn.click(update_pdf_gallery_and_extract_text, inputs=[pdf_upload], outputs=[pdf_gallery, pdf_text])
-        pdf_text.change(fn=lambda x: x, inputs=pdf_text, outputs=pdf_view)
-        pdf_ask_btn.click(ask_about_pdf, inputs=[pdf_text, pdf_question], outputs=pdf_answer)
-    with gr.Tab("Chatbot"):
-        chatbot = gr.Chatbot()
 demo.launch()

 import os
+import tempfile
+import torch
+import yt_dlp as youtube_dl
 import gradio as gr
+from transformers import pipeline, AutoTokenizer, AutoModelForMaskedLM, AutoProcessor, AutoModelForSpeechSeq2Seq
 from huggingface_hub import InferenceClient
 from datasets import load_dataset
 import fitz  # PyMuPDF
+from transformers.pipelines.audio_utils import ffmpeg_read
+# Constants for Whisper ASR
+MODEL_NAME = "openai/whisper-large-v3"
+BATCH_SIZE = 8
+FILE_LIMIT_MB = 1000
+YT_LENGTH_LIMIT_S = 3600  # limit to 1 hour YouTube files
+device = 0 if torch.cuda.is_available() else "cpu"
+# Load the Whisper model and processor
+processor = AutoProcessor.from_pretrained(MODEL_NAME)
+model_s2s = AutoModelForSpeechSeq2Seq.from_pretrained(MODEL_NAME)
+# Load the BERT model and tokenizer
+tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-uncased")
+model = AutoModelForMaskedLM.from_pretrained("google-bert/bert-base-uncased")
+# Create the fill-mask pipeline
+pipe = pipeline("fill-mask", model=model, tokenizer=tokenizer)
+client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
+def respond(
+    message,
+    history: list[tuple[str, str]],
+    system_message,
+    max_tokens,
+    temperature,
+    top_p,
+):
+    messages = [{"role": "system", "content": system_message}]
+    for val in history:
+        if val[0]:
+            messages.append({"role": "user", "content": val[0]})
+        if val[1]:
+            messages.append({"role": "assistant", "content": val[1]})
+    messages.append({"role": "user", "content": message})
+    try:
+        response = ""
+        for message in client.chat_completion(
+            messages,
+            max_tokens=max_tokens,
+            stream=True,
+            temperature=temperature,
+            top_p=top_p,
+        ):
+            token = message.choices[0].delta.content
+            if token is not None:
+                response += token
+            yield response, history + [(message, response)]
+    except Exception as e:
+        print(f"Error during chat completion: {e}")
+        yield "An error occurred during the chat completion.", history
+def generate_case_outcome(prosecutor_response, defense_response):
+    prompt = f"Prosecutor's arguments: {prosecutor_response}\n\nDefense's arguments: {defense_response}\n\nProvide details on who won the case and why. Provide reasons for your decision and provide a link to the source of the case."
+    evaluation = ""
+    try:
+        for message in client.chat_completion(
+            [{"role": "system", "content": "You are a legal expert evaluating the details of the case presented by the prosecution and the defense."},
+             {"role": "user", "content": prompt}],
+            max_tokens=512,
+            stream=True,
+            temperature=0.6,
+            top_p=0.95,
+        ):
+            token = message.choices[0].delta.content
+            if token is not None:
+                evaluation += token
+    except Exception as e:
+        print(f"Error during case outcome generation: {e}")
+        return "An error occurred during the case outcome generation."
+    return evaluation
+def determine_outcome(outcome):
+    prosecutor_count = outcome.split().count("Prosecutor")
+    defense_count = outcome.split().count("Defense")
+    if prosecutor_count > defense_count:
+        return "Prosecutor Wins"
+    elif defense_count > prosecutor_count:
+        return "Defense Wins"
+    else:
+        return "No clear winner"
+def transcribe(inputs, task):
+    if inputs is None:
+        raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
+    inputs = processor(inputs, return_tensors="pt", sampling_rate=16000).to(device)
+    with torch.no_grad():
+        generated_ids = model_s2s.generate(inputs["input_features"])
+    transcription = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+    return transcription
+def _return_yt_html_embed(yt_url):
+    video_id = yt_url.split("?v=")[-1]
+    HTML_str = (
+        f'<center> <iframe width="500" height="320" src="https://www.youtube.com/embed/{video_id}"> </iframe>'
+        " </center>"
+    )
+    return HTML_str
+def download_yt_audio(yt_url, filename):
+    info_loader = youtube_dl.YoutubeDL()
+    try:
+        info = info_loader.extract_info(yt_url, download=False)
+    except youtube_dl.utils.DownloadError as err:
+        raise gr.Error(str(err))
+    file_length = info["duration_string"]
+    file_h_m_s = file_length.split(":")
+    file_h_m_s = [int(sub_length) for sub_length in file_h_m_s]
+    if len(file_h_m_s) == 1:
+        file_h_m_s.insert(0, 0)
+    if len(file_h_m_s) == 2:
+        file_h_m_s.insert(0, 0)
+    file_length_s = file_h_m_s[0] * 3600 + file_h_m_s[1] * 60 + file_h_m_s[2]
+    if file_length_s > YT_LENGTH_LIMIT_S:
+        yt_length_limit_hms = time.strftime("%HH:%MM:%SS", time.gmtime(YT_LENGTH_LIMIT_S))
+        file_length_hms = time.strftime("%HH:%MM:%SS", time.gmtime(file_length_s))
+        raise gr.Error(f"Maximum YouTube length is {yt_length_limit_hms}, got {file_length_hms} YouTube video.")
+    ydl_opts = {"outtmpl": filename, "format": "worstvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best"}
+    with youtube_dl.YoutubeDL(ydl_opts) as ydl:
+        try:
+            ydl.download([yt_url])
+        except youtube_dl.utils.ExtractorError as err:
+            raise gr.Error(str(err))
+def yt_transcribe(yt_url, task, max_filesize=75.0):
+    html_embed_str = _return_yt_html_embed(yt_url)
+    with tempfile.TemporaryDirectory() as tmpdirname:
+        filepath = os.path.join(tmpdirname, "video.mp4")
+        download_yt_audio(yt_url, filepath)
+        with open(filepath, "rb") as f:
+            inputs = f.read()
+    inputs = ffmpeg_read(inputs, processor.feature_extractor.sampling_rate)
+    inputs = {"array": inputs, "sampling_rate": processor.feature_extractor.sampling_rate}
+    inputs = processor(inputs, return_tensors="pt", sampling_rate=16000).to(device)
+    with torch.no_grad():
+        generated_ids = model_s2s.generate(inputs["input_features"])
+    transcription = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+    return html_embed_str, transcription
+# Custom CSS for white background and black text for input and output boxes
+custom_css = """
+body {
+    background-color: #ffffff;
+    color: #000000;
+    font-family: Arial, sans-serif;
+}
+.gradio-container {
+    max-width: 1000px;
+    margin: 0 auto;
+    padding: 20px;
+    background-color: #ffffff;
+    border: 1px solid #e0e0e0;
+    border-radius: 8px;
+    box-shadow: 0 2px 5px rgba(0, 0, 0, 0.1);
+}
+.gr-button {
+    background-color: #ffffff !important;
+    border-color: #ffffff !important;
+    color: #000000 !important;
+    margin: 5px;
+}
+.gr-button:hover {
+    background-color: #ffffff !important;
+    border-color: #004085 !important;
+}
+.gr-input, .gr-textbox, .gr-slider, .gr-markdown, .gr-chatbox {
+    border-radius: 4px;
+    border: 1px solid #ced4da;
+    background-color: #ffffff !important;
+    color: #000000 !important;
+}
+.gr-input:focus, .gr-textbox:focus, .gr-slider:focus {
+    border-color: #ffffff;
+    outline: 0;
+    box-shadow: 0 0 0 0.2rem rgba(255, 255, 255, 1.0);
+}
+#flagging-button {
+    display: none;
+}
+footer {
+    display: none;
+}
+.chatbox .chat-container .chat-message {
+    background-color: #ffffff !important;
+    color: #000000 !important;
+}
+.chatbox .chat-container .chat-message-input {
+    background-color: #ffffff !important;
+    color: #000000 !important;
+}
+.gr-markdown {
+    background-color: #ffffff !important;
+    color: #000000 !important;
+}
+.gr-markdown h1, .gr-markdown h2, .gr-markdown h3, .gr-markdown h4, .gr-markdown h5, .gr-markdown h6, .gr-markdown p, .gr-markdown ul, .gr-markdown ol, .gr-markdown li {
+    color: #000000 !important;
+}
+.score-box {
+    width: 60px;
+    height: 60px;
+    display: flex;
+    align-items: center;
+    justify-content: center;
+    font-size: 12px;
+    font-weight: bold;
+    color: black;
+    margin: 5px;
+}
+.scroll-box {
+    max-height: 200px;
+    overflow-y: scroll;
+    border: 1px solid #ced4da;
+    padding: 10px;
+    border-radius: 4px;
+}
+"""
 def chat_between_bots(system_message1, system_message2, max_tokens, temperature, top_p, history1, history2, shared_history, message):
     response1, history1 = list(respond(message, history1, system_message1, max_tokens, temperature, top_p))[-1]
     response2, history2 = list(respond(message, history2, system_message2, max_tokens, temperature, top_p))[-1]
+    shared_history.append(f"Prosecutor: {response1}")
+    shared_history.append(f"Defense Attorney: {response2}")
+    max_length = max(len(response1), len(response2))
+    response1 = response1[:max_length]
+    response2 = response2[:max_length]
+    outcome = generate_case_outcome(response1, response2)
+    winner = determine_outcome(outcome)
+    return response1, response2, history1, history2, shared_history, outcome
+def get_top_10_cases():
+    prompt = "List 10 high-profile legal cases that have received significant media attention and are currently ongoing. Just a list of case names and numbers."
     response = ""
     for message in client.chat_completion(
+        [{"role": "system", "content": "You are a legal research expert, able to provide information about high-profile legal cases."},
          {"role": "user", "content": prompt}],
         max_tokens=512,
         stream=True,
             response += token
     return response
 def add_message(history, message):
+    for x in message["files"]:
+        history.append(((x,), None))
+    if message["text"] is not None:
+        history.append((message["text"], None))
+    return history, gr.MultimodalTextbox(value=None, interactive=True)
 def print_like_dislike(x: gr.LikeData):
     print(x.index, x.value, x.liked)
 def save_conversation(history1, history2, shared_history):
     return history1, history2, shared_history
+def ask_about_case_outcome(shared_history, question):
+    prompt = f"Case Outcome: {shared_history}\n\nQuestion: {question}\n\nAnswer:"
+    response = ""
+    for message in client.chat_completion(
+        [{"role": "system", "content": "You are a legal expert answering questions based on the case outcome provided."},
+         {"role": "user", "content": prompt}],
+        max_tokens=512,
+        stream=True,
+        temperature=0.6,
+        top_p=0.95,
+    ):
+        token = message.choices[0].delta.content
+        if token is not None:
+            response += token
+    return response
 with gr.Blocks(css=custom_css) as demo:
     history1 = gr.State([])
     history2 = gr.State([])
     shared_history = gr.State([])
+    top_10_cases = gr.State("")
     with gr.Tab("Argument Evaluation"):
+        with gr.Row():
+            with gr.Column(scale=1):
+                top_10_btn = gr.Button("Give me the top 10 cases")
+                top_10_output = gr.Textbox(label="Top 10 Cases", interactive=False, elem_classes=["scroll-box"])
+                top_10_btn.click(get_top_10_cases, outputs=top_10_output)
+            with gr.Column(scale=2):
+                message = gr.Textbox(label="Case to Argue")
+                system_message1 = gr.State("You are an expert Prosecutor. Give your best arguments for the case on behalf of the prosecution.")
+                system_message2 = gr.State("You are an expert Defense Attorney. Give your best arguments for the case on behalf of the Defense.")
+                max_tokens = gr.State(512)
+                temperature = gr.State(0.6)
+                top_p = gr.State(0.95)
+                with gr.Row():
+                    with gr.Column(scale=4):
+                        prosecutor_response = gr.Textbox(label="Prosecutor's Response", interactive=True, elem_classes=["scroll-box"])
+                    with gr.Column(scale=1):
+                        prosecutor_score_color = gr.HTML()
+                    with gr.Column(scale=4):
+                        defense_response = gr.Textbox(label="Defense Attorney's Response", interactive=True, elem_classes=["scroll-box"])
+                    with gr.Column(scale=1):
+                        defense_score_color = gr.HTML()
+                outcome = gr.Textbox(label="Outcome", interactive=False, elem_classes=["scroll-box"])
+                with gr.Row():
+                    submit_btn = gr.Button("Argue")
+                    clear_btn = gr.Button("Clear and Reset")
+                    save_btn = gr.Button("Save Conversation")
+                submit_btn.click(chat_between_bots, inputs=[system_message1, system_message2, max_tokens, temperature, top_p, history1, history2, shared_history, message], outputs=[prosecutor_response, defense_response, history1, history2, shared_history, outcome])
+                clear_btn.click(reset_conversation, outputs=[history1, history2, shared_history, prosecutor_response, defense_response, outcome])
+                save_btn.click(save_conversation, inputs=[history1, history2, shared_history], outputs=[history1, history2, shared_history])
+    with gr.Tab("Practice Arguments"):
+        mf_transcribe = gr.Interface(
+            fn=transcribe,
+            inputs=[
+                gr.Audio(type="filepath", label="Record or Upload Audio"),
+                gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
+            ],
+            outputs="text",
+            layout="horizontal",
+            title="Practice Legal Arguments - Microphone",
+            description=(
+                "Practice your legal arguments by recording them through your microphone or uploading an audio file. The arguments will be transcribed for review."
+            ),
+            allow_flagging="never",
+        )
+        yt_transcribe = gr.Interface(
+            fn=yt_transcribe,
+            inputs=[
+                gr.Textbox(lines=1, placeholder="Paste the URL to a YouTube video here", label="YouTube URL"),
+                gr.Radio(["transcribe", "translate"], label="Task", value="transcribe")
+            ],
+            outputs=["html", "text"],
+            layout="horizontal",
+            title="Practice Legal Arguments - YouTube",
+            description=(
+                "Practice your legal arguments by providing a YouTube video link. The arguments will be transcribed for review."
+            ),
+            allow_flagging="never",
+        )
+        gr.TabbedInterface([mf_transcribe, yt_transcribe], ["Microphone", "YouTube"])
+    with gr.Tab("Case Outcome Chat"):
+        case_question = gr.Textbox(label="Ask a Question about the Case Outcome")
+        case_answer = gr.Textbox(label="Answer", interactive=False, elem_classes=["scroll-box"])
+        ask_case_btn = gr.Button("Ask")
+        ask_case_btn.click(ask_about_case_outcome, inputs=[shared_history, case_question], outputs=case_answer)
+demo.queue()
 demo.launch()