omm7 commited on
Commit
e810b6d
·
verified ·
1 Parent(s): ac982f0

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +94 -0
app.py ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import uuid
4
+ from pathlib import Path
5
+ import gradio as gr
6
+ from collections import defaultdict
7
+ from huggingface_hub import CommitScheduler
8
+ from langchain_community.embeddings import SentenceTransformerEmbeddings
9
+ from langchain_community.vectorstores import Chroma
10
+ from openai import OpenAI
11
+
12
+ # Set up OpenAI client (Hugging Face Inference API)
13
+ client = OpenAI(
14
+ base_url="https://router.huggingface.co/featherless-ai/v1",
15
+ api_key="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",
16
+ )
17
+
18
+ # Vectorstore setup (embedding + ChromaDB)
19
+ embedding_model = SentenceTransformerEmbeddings(model_name='thenlper/gte-large')
20
+ vectorstore = Chroma(
21
+ collection_name='clause_index',
22
+ persist_directory="./clause_index",
23
+ embedding_function=embedding_model
24
+ )
25
+
26
+ # Log storage
27
+ log_file = Path("logs/") / f"query_{uuid.uuid4()}.json"
28
+ log_file.parent.mkdir(exist_ok=True)
29
+ scheduler = CommitScheduler(
30
+ repo_id="legal-rag-output",
31
+ repo_type="dataset",
32
+ folder_path=log_file.parent,
33
+ path_in_repo="logs",
34
+ every=2
35
+ )
36
+
37
+ # Prompt Template
38
+ system_message = """You are a legal AI assistant tasked with answering questions from legal contracts using only the provided context.
39
+ Answer strictly from the context. If the answer is not found, respond: "Sorry, no relevant information found in the context."
40
+ """
41
+ user_template = """
42
+ ###Context
43
+ {context}
44
+
45
+ ###Question
46
+ {question}
47
+ """
48
+
49
+ def predict(question):
50
+ docs = vectorstore.similarity_search(question, k=3)
51
+ context = "\n\n".join([doc.page_content for doc in docs])
52
+
53
+ prompt = [
54
+ {"role": "system", "content": system_message},
55
+ {"role": "user", "content": user_template.format(context=context, question=question)}
56
+ ]
57
+
58
+ try:
59
+ stream = client.chat.completions.create(
60
+ model="mistralai/Mistral-7B-Instruct-v0.2",
61
+ messages=prompt,
62
+ temperature=0.5,
63
+ top_p=0.7,
64
+ stream=True,
65
+ )
66
+ output = ""
67
+ for chunk in stream:
68
+ delta = chunk.choices[0].delta.content or ""
69
+ output += delta
70
+ except Exception as e:
71
+ output = f"Error: {str(e)}"
72
+
73
+ with scheduler.lock:
74
+ with log_file.open("a") as f:
75
+ f.write(json.dumps({
76
+ "question": question,
77
+ "context": context,
78
+ "response": output
79
+ }) + "\n")
80
+
81
+ return output
82
+
83
+ # Gradio UI
84
+ demo = gr.Interface(
85
+ fn=predict,
86
+ inputs=gr.Textbox(label="Enter your legal question:", lines=4),
87
+ outputs=gr.Textbox(label="Answer"),
88
+ title="⚖️ GL_LegalMind",
89
+ description="Ask legal contract-related questions. Answers are grounded in clause vector retrieval + Mistral LLM."
90
+ )
91
+
92
+ demo.queue()
93
+ demo.launch()
94
+