Spaces:

aman-augurs
/

LegalAI

Runtime error

App Files Files Community

aman-augurs commited on Feb 6, 2025

Commit

3200db8

verified ·

1 Parent(s): bd01003

Create app.py

Browse files

Files changed (1) hide show

app.py +84 -0

app.py ADDED Viewed

	@@ -0,0 +1,84 @@

+import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM
+import gradio as gr
+# Check if GPU is available
+if torch.cuda.is_available():
+    device = torch.device("cuda")
+    print(f"Using GPU: {torch.cuda.get_device_name(0)}")
+else:
+    device = torch.device("cpu")
+    print("GPU not available, using CPU instead.")
+# Load the model and tokenizer
+model_id = "aman-augurs/mistral-7b-instruct-legal-qa-3e22-merged"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16, device_map="auto")
+# Ensure the model is on the GPU
+model.to(device)
+print(f"Model loaded on {device}")
+def chat_with_model(user_input, chat_history=[]):
+    # Format the chat history for the model
+    messages = [{"role": "system", "content": "You are a helpful assistant."}]
+    for user, assistant in chat_history:
+        messages.append({"role": "user", "content": user})
+        messages.append({"role": "assistant", "content": assistant})
+    messages.append({"role": "user", "content": user_input})
+    # Tokenize the input and move to GPU
+    inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to(device)
+    # Generate a response
+    with torch.no_grad():
+        outputs = model.generate(inputs, max_new_tokens=512, do_sample=True, temperature=0.7)
+    # Decode the response
+    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    # Extract only the assistant's reply
+    response_parts = response.split("assistant")
+    if len(response_parts) > 1:
+        # Take the last part after "assistant"
+        assistant_reply = response_parts[-1].strip()
+        # Remove any leading artifacts
+        assistant_reply = assistant_reply.lstrip(". ").strip()
+        # If the assistant's reply contains the user's query, remove it
+        if user_input in assistant_reply:
+            assistant_reply = assistant_reply.replace(user_input, "").strip()
+        # Remove anything after potential "user" keyword
+        assistant_reply = assistant_reply.split("user")[0].strip()
+        # Clean up any remaining artifacts
+        assistant_reply = ' '.join(assistant_reply.split())
+    else:
+        assistant_reply = response.strip()
+    # Update chat history
+    chat_history.append((user_input, assistant_reply))
+    return chat_history
+# Define the Gradio interface
+def gradio_chat_interface(user_input, chat_history=[]):
+    chat_history = chat_with_model(user_input, chat_history)
+    return chat_history
+# Create the Gradio app
+with gr.Blocks() as demo:
+    gr.Markdown("# Chat with Legal AI")
+    chatbot = gr.Chatbot(label="Chat History")
+    user_input = gr.Textbox(label="Your Message")
+    submit_button = gr.Button("Send")
+    clear_button = gr.Button("Clear Chat")
+    # Define the interaction
+    submit_button.click(fn=gradio_chat_interface, inputs=[user_input, chatbot], outputs=chatbot)
+    clear_button.click(lambda: [], None, chatbot, queue=False)
+# Launch the app
+demo.launch()