ali4568 commited on
Commit
1b6b786
ยท
verified ยท
1 Parent(s): 62e19a8

๐Ÿš€ Auto-deploy from GitHub Action

Browse files
.github/workflows/sync_env_to_huggingface.yml ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: ๐Ÿ” Sync .env to Hugging Face Secrets
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - main # or any branch you want
7
+
8
+ jobs:
9
+ sync:
10
+ runs-on: ubuntu-latest
11
+
12
+ steps:
13
+ - name: Checkout repo
14
+ uses: actions/checkout@v4
15
+
16
+ - name: Install dependencies
17
+ run: |
18
+ python -m pip install --upgrade pip
19
+ python -m pip install huggingface_hub python-dotenv requests
20
+
21
+ - name: Sync .env to Hugging Face Space Secrets
22
+ env:
23
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
24
+ DOTENV_CONTENT: ${{ secrets.DOTENV_CONTENT }}
25
+ run: |
26
+ python - <<'EOF'
27
+ import os, requests
28
+ from dotenv import dotenv_values
29
+
30
+ HF_TOKEN = os.getenv("HF_TOKEN")
31
+ DOTENV_CONTENT = os.getenv("DOTENV_CONTENT")
32
+ SPACE_ID = "ali4568/LawMadad" # change this
33
+
34
+ # Parse .env contents
35
+ lines = DOTENV_CONTENT.strip().splitlines()
36
+ secrets = {}
37
+ for line in lines:
38
+ if line and not line.startswith("#") and "=" in line:
39
+ key, value = line.split("=", 1)
40
+ secrets[key.strip()] = value.strip()
41
+
42
+ # Hugging Face API endpoint
43
+ url = f"https://huggingface.co/api/spaces/{SPACE_ID}/secrets"
44
+ headers = {"Authorization": f"Bearer {HF_TOKEN}"}
45
+
46
+ for key, value in secrets.items():
47
+ print(f"๐Ÿ” Syncing {key} ...")
48
+ payload = {"key": key, "value": value}
49
+ resp = requests.post(url, headers=headers, json=payload)
50
+ if resp.status_code in (200, 201):
51
+ print(f"โœ… {key} synced.")
52
+ elif resp.status_code == 409:
53
+ print(f"๐ŸŒ€ {key} already exists, updating...")
54
+ requests.put(f"{url}/{key}", headers=headers, json={"value": value})
55
+ else:
56
+ print(f"โŒ Failed to sync {key}: {resp.status_code} - {resp.text}")
57
+
58
+ print("๐ŸŽฏ All .env variables synced to Hugging Face!")
59
+ EOF
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .env
app.py CHANGED
@@ -1,409 +1,3 @@
1
- # # Import necessary libraries
2
- # import warnings
3
- # import os
4
- # import nltk
5
-
6
- # # Ensure NLTK uses our custom data directory
7
- # nltk.data.path = ['/app/nltk_data'] + nltk.data.path
8
-
9
- # # Import other libraries
10
- # from fastapi import FastAPI, HTTPException
11
- # from pydantic import BaseModel
12
- # from llama_index.core import (
13
- # VectorStoreIndex,
14
- # StorageContext,
15
- # ServiceContext,
16
- # load_index_from_storage,
17
- # Document
18
- # )
19
- # from llama_index.embeddings.huggingface import HuggingFaceEmbedding
20
- # from llama_index.llms.groq import Groq
21
- # import pdfplumber
22
-
23
- # # Suppress warnings
24
- # warnings.filterwarnings('ignore')
25
-
26
- # # Initialize FastAPI app
27
- # app = FastAPI()
28
-
29
- # # Define the request model
30
- # class QueryRequest(BaseModel):
31
- # query: str
32
-
33
- # # Set up the GROQ API key - use environment variable for security
34
- # GROQ_API_KEY = "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"
35
-
36
- # # Define the context path for PDF files
37
- # input_files = [
38
- # "civil.pdf",
39
- # "constitution.pdf",
40
- # "criminal.pdf",
41
- # "family.pdf",
42
- # ]
43
-
44
- # # Preprocessing function for PDF text extraction
45
- # def extract_text_from_pdf(file_path):
46
- # text_data = []
47
- # try:
48
- # with pdfplumber.open(file_path) as pdf:
49
- # for page in pdf.pages:
50
- # text = page.extract_text()
51
- # if text:
52
- # text_data.append(text)
53
- # return "\n".join(text_data)
54
- # except Exception as e:
55
- # print(f"Error extracting text from {file_path}: {str(e)}")
56
- # return ""
57
-
58
- # # Function to initialize the index
59
- # def initialize_index():
60
- # # Check if storage exists
61
- # persist_dir = "./storage_law_app"
62
- # if os.path.exists(persist_dir) and os.listdir(persist_dir):
63
- # print("Loading existing index...")
64
- # # Initialize embedding model
65
- # embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2")
66
- # # Initialize LLM
67
- # llm = Groq(model="llama-3.1-8b-instant", api_key=GROQ_API_KEY)
68
- # service_context = ServiceContext.from_defaults(embed_model=embed_model, llm=llm)
69
-
70
- # # Reload the index
71
- # storage_context = StorageContext.from_defaults(persist_dir=persist_dir)
72
- # return load_index_from_storage(storage_context, service_context=service_context)
73
- # else:
74
- # print("Creating new index...")
75
- # os.makedirs(persist_dir, exist_ok=True)
76
-
77
- # # Load and preprocess documents
78
- # documents = []
79
- # for file in input_files:
80
- # if os.path.exists(file):
81
- # content = extract_text_from_pdf(file)
82
- # if content:
83
- # documents.append(Document(text=content))
84
- # else:
85
- # print(f"Warning: File {file} not found")
86
-
87
- # if not documents:
88
- # print("Warning: No documents were loaded")
89
- # # Create a dummy document if no documents are found
90
- # documents = [Document(text="This is a placeholder document as no actual documents were found.")]
91
-
92
- # # Initialize embedding model
93
- # embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2")
94
- # # Initialize LLM
95
- # llm = Groq(model="llama-3.1-8b-instant", api_key=GROQ_API_KEY)
96
- # service_context = ServiceContext.from_defaults(embed_model=embed_model, llm=llm)
97
-
98
- # # Build and persist vector index
99
- # vector_index = VectorStoreIndex.from_documents(
100
- # documents=documents,
101
- # service_context=service_context,
102
- # show_progress=True
103
- # )
104
- # vector_index.storage_context.persist(persist_dir=persist_dir)
105
- # return vector_index
106
-
107
- # # Define the context for legal queries
108
- # LEGAL_CONTEXT = """
109
- # Context: Provide legal guidance based on the Pakistani legal framework.
110
- # Task: Analyze the query and provide a structured response with headings and bullet points.
111
- # The format should be:
112
- # 1. **Introduction/Overview**: A brief overview of the law or section.
113
- # 2. **Section Description**: Explain what this section does, including its purpose and scope.
114
- # 3. **Legal Provisions**: Highlight the key legal provisions or clauses under the specified section.
115
- # 4. **Punishments**: Explicitly mention the punishments with references if applicable.
116
- # 5. **Related Precedents**: Summarize any relevant legal precedents or landmark cases along with their results.
117
- # 6. **Conclusion/Recommendations**: Conclude with advice or recommendations tailored to the query.
118
- # """
119
-
120
- # # Initialize the index at startup
121
- # @app.on_event("startup")
122
- # async def startup_event():
123
- # global index
124
- # index = initialize_index()
125
-
126
- # # API endpoint for querying the model
127
- # @app.post("/query/")
128
- # async def query_model(request: QueryRequest):
129
- # try:
130
- # # Prepare the query engine
131
- # query_engine = index.as_query_engine()
132
-
133
- # # Append context to user query
134
- # full_query = f"{LEGAL_CONTEXT}\n\nQuery: {request.query}"
135
-
136
- # # Query the index
137
- # response = query_engine.query(full_query)
138
- # return {"response": response.response}
139
- # except Exception as e:
140
- # raise HTTPException(status_code=500, detail=str(e))
141
-
142
- # # Add a simple root endpoint for API documentation
143
- # @app.get("/")
144
- # async def root():
145
- # return {
146
- # "message": "Pakistani Legal Assistant API",
147
- # "usage": "Send POST requests to /query/ with a JSON body containing the 'query' field"
148
- # }
149
-
150
-
151
- # if __name__ == "__main__":
152
- # import uvicorn
153
- # uvicorn.run(app, host="0.0.0.0", port=7860)
154
-
155
-
156
- #-----------------------------------------------------------------------------------
157
- #-----------------------------------------------------------------------------------
158
-
159
-
160
- # import warnings
161
- # import os
162
- # import nltk
163
- # import re
164
-
165
- # # Ensure NLTK uses our custom data directory
166
- # nltk.data.path = ['/app/nltk_data'] + nltk.data.path
167
-
168
- # # Import other libraries
169
- # from fastapi import FastAPI, HTTPException
170
- # from pydantic import BaseModel
171
- # from llama_index.core import (
172
- # VectorStoreIndex,
173
- # StorageContext,
174
- # ServiceContext,
175
- # load_index_from_storage,
176
- # Document
177
- # )
178
- # from llama_index.embeddings.huggingface import HuggingFaceEmbedding
179
- # from llama_index.llms.groq import Groq
180
- # import pdfplumber
181
-
182
- # # Suppress warnings
183
- # warnings.filterwarnings('ignore')
184
-
185
- # # Initialize FastAPI app
186
- # app = FastAPI()
187
-
188
- # # Define the request model
189
- # class QueryRequest(BaseModel):
190
- # query: str
191
-
192
- # # Set up the GROQ API key - use environment variable for security
193
- # GROQ_API_KEY = "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"
194
-
195
- # # Define the context path for PDF files
196
- # input_files = [
197
- # "civil.pdf",
198
- # "constitution.pdf",
199
- # "criminal.pdf",
200
- # "family.pdf",
201
- # "civil_1.pdf",
202
- # "civil_2.pdf",
203
- # "property_final.pdf",
204
- # "criminal_final.pdf",
205
- # "family_final.pdf"
206
- # ]
207
-
208
- # # Preprocessing function for PDF text extraction
209
- # def extract_text_from_pdf(file_path):
210
- # text_data = []
211
- # try:
212
- # with pdfplumber.open(file_path) as pdf:
213
- # for page in pdf.pages:
214
- # text = page.extract_text()
215
- # if text:
216
- # text_data.append(text)
217
- # return "\n".join(text_data)
218
- # except Exception as e:
219
- # print(f"Error extracting text from {file_path}: {str(e)}")
220
- # return ""
221
-
222
- # # Function to initialize the index
223
- # def initialize_index():
224
- # # Check if storage exists
225
- # persist_dir = "./storage_law_app"
226
- # if os.path.exists(persist_dir) and os.listdir(persist_dir):
227
- # print("Loading existing index...")
228
- # # Initialize embedding model
229
- # embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2")
230
- # # Initialize LLM
231
- # llm = Groq(model="llama-3.1-8b-instant", api_key=GROQ_API_KEY)
232
- # service_context = ServiceContext.from_defaults(embed_model=embed_model, llm=llm)
233
-
234
- # # Reload the index
235
- # storage_context = StorageContext.from_defaults(persist_dir=persist_dir)
236
- # return load_index_from_storage(storage_context, service_context=service_context)
237
- # else:
238
- # print("Creating new index...")
239
- # os.makedirs(persist_dir, exist_ok=True)
240
-
241
- # # Load and preprocess documents
242
- # documents = []
243
- # for file in input_files:
244
- # if os.path.exists(file):
245
- # content = extract_text_from_pdf(file)
246
- # if content:
247
- # documents.append(Document(text=content))
248
- # else:
249
- # print(f"Warning: File {file} not found")
250
-
251
- # if not documents:
252
- # print("Warning: No documents were loaded")
253
- # # Create a dummy document if no documents are found
254
- # documents = [Document(text="This is a placeholder document as no actual documents were found.")]
255
-
256
- # # Initialize embedding model
257
- # embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2")
258
- # # Initialize LLM
259
- # llm = Groq(model="llama-3.1-8b-instant", api_key=GROQ_API_KEY)
260
- # service_context = ServiceContext.from_defaults(embed_model=embed_model, llm=llm)
261
-
262
- # # Build and persist vector index
263
- # vector_index = VectorStoreIndex.from_documents(
264
- # documents=documents,
265
- # service_context=service_context,
266
- # show_progress=True
267
- # )
268
- # vector_index.storage_context.persist(persist_dir=persist_dir)
269
- # return vector_index
270
-
271
- # # Define the context for legal queries
272
- # LEGAL_CONTEXT = """
273
- # Context: Provide legal guidance based on the Pakistani legal framework.
274
- # Task: Analyze the query and provide a structured response with headings and bullet points.
275
- # The format should be:
276
- # 1. **Introduction/Overview**: A brief overview of the law or section.
277
- # 2. **Section Description**: Explain what this section does, including its purpose and scope.
278
- # 3. **Legal Provisions**: Highlight the key legal provisions or clauses under the specified section.
279
- # 4. **Punishments**: Explicitly mention the punishments with references if applicable.
280
- # 5. **Related Precedents**: Summarize any relevant legal precedents or landmark cases along with their results.
281
- # 6. **Conclusion/Recommendations**: Conclude with advice or recommendations tailored to the query.
282
- # """
283
-
284
- # # Define general response templates for common non-legal queries
285
- # GENERAL_RESPONSES = {
286
- # "greeting": "Hello! I'm your Pakistani Legal Assistant. I can help you with questions about Pakistani law, including civil law, criminal law, family law, and constitutional matters. How can I assist you today?",
287
-
288
- # "capabilities": "I'm a specialized Pakistani Legal Assistant that can help you with:\n\n"
289
- # "- Information about Pakistani civil, criminal, family, and constitutional law\n"
290
- # "- Legal provisions and sections with detailed explanations\n"
291
- # "- Applicable punishments under various legal provisions\n"
292
- # "- Legal precedents and relevant case law\n"
293
- # "- Recommendations on legal matters\n\n"
294
- # "Just ask me any legal question, and I'll provide a structured response based on Pakistani law.",
295
-
296
- # "default": "I'm your Pakistani Legal Assistant. I can help answer questions about Pakistani law. "
297
- # "For legal queries, I'll provide detailed information with proper structure. "
298
- # "How can I assist you with your legal questions today?"
299
- # }
300
-
301
- # # Function to detect if a query is a legal question or general conversation
302
- # def is_legal_query(query):
303
- # # Convert to lowercase for easier matching
304
- # query_lower = query.lower()
305
-
306
- # # Define patterns for greetings and capability questions
307
- # greeting_patterns = [
308
- # r'\b(hi|hello|hey|greetings|howdy|salam|assalam|namaste)\b',
309
- # r'\bhow are you\b',
310
- # r'\bnice to meet you\b'
311
- # ]
312
-
313
- # capability_patterns = [
314
- # r'\bwhat can you do\b',
315
- # r'\bwhat are your capabilities\b',
316
- # r'\bhow can you help\b',
317
- # r'\bwhat do you know\b',
318
- # r'\bwhat are you\b',
319
- # r'\bwho are you\b',
320
- # r'\bwhat is your purpose\b',
321
- # r'\bhow do you work\b'
322
- # ]
323
-
324
- # # Check if query matches any greeting patterns
325
- # for pattern in greeting_patterns:
326
- # if re.search(pattern, query_lower):
327
- # return False, "greeting"
328
-
329
- # # Check if query matches any capability inquiry patterns
330
- # for pattern in capability_patterns:
331
- # if re.search(pattern, query_lower):
332
- # return False, "capabilities"
333
-
334
- # # Legal keywords that suggest a legal query
335
- # legal_keywords = [
336
- # 'law', 'legal', 'court', 'justice', 'right', 'constitution', 'section',
337
- # 'crime', 'criminal', 'civil', 'family', 'divorce', 'marriage', 'inheritance',
338
- # 'punishment', 'penalty', 'fine', 'jail', 'prison', 'arrest', 'police',
339
- # 'judge', 'lawyer', 'attorney', 'defendant', 'plaintiff', 'accused',
340
- # 'trial', 'case', 'lawsuit', 'petition', 'appeal', 'witness', 'evidence',
341
- # 'contract', 'property', 'damages', 'compensation', 'regulation', 'statute',
342
- # 'act', 'provision', 'legislation', 'parliament', 'supreme court', 'high court',
343
- # 'district court', 'tribunal', 'code', 'penal', 'procedure'
344
- # ]
345
-
346
- # # Check if the query contains legal keywords
347
- # for keyword in legal_keywords:
348
- # if keyword in query_lower:
349
- # return True, None
350
-
351
- # # If the query is longer than 20 characters and not identified as greeting or capabilities,
352
- # # assume it might be a legal query
353
- # if len(query) > 20:
354
- # return True, None
355
-
356
- # # Default to general response if we can't clearly identify
357
- # return False, "default"
358
-
359
- # # Initialize the index at startup
360
- # @app.on_event("startup")
361
- # async def startup_event():
362
- # global index
363
- # index = initialize_index()
364
-
365
- # # API endpoint for querying the model
366
- # @app.post("/query/")
367
- # async def query_model(request: QueryRequest):
368
- # try:
369
- # # Determine if the query is legal or general
370
- # is_legal, response_type = is_legal_query(request.query)
371
-
372
- # if not is_legal:
373
- # # Return predefined general response
374
- # return {"response": GENERAL_RESPONSES.get(response_type, GENERAL_RESPONSES["default"])}
375
- # else:
376
- # # Prepare the query engine for legal questions
377
- # query_engine = index.as_query_engine()
378
-
379
- # # Append context to user query
380
- # full_query = f"{LEGAL_CONTEXT}\n\nQuery: {request.query}"
381
-
382
- # # Query the index
383
- # response = query_engine.query(full_query)
384
- # return {"response": response.response}
385
-
386
- # except Exception as e:
387
- # raise HTTPException(status_code=500, detail=str(e))
388
-
389
- # # Add a simple root endpoint for API documentation
390
- # @app.get("/")
391
- # async def root():
392
- # return {
393
- # "message": "Pakistani Legal Assistant API",
394
- # "usage": "Send POST requests to /query/ with a JSON body containing the 'query' field"
395
- # }
396
-
397
-
398
- # if __name__ == "__main__":
399
- # import uvicorn
400
- # uvicorn.run(app, host="0.0.0.0", port=7860)
401
-
402
-
403
- #-----------------------------------------------------------------------------------
404
- #-----------------------------------------------------------------------------------
405
-
406
-
407
  import warnings
408
  import os
409
  import nltk
@@ -437,7 +31,7 @@ class QueryRequest(BaseModel):
437
  query: str
438
 
439
  # Set up the GROQ API key - use environment variable for security
440
- GROQ_API_KEY = "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"
441
 
442
  # Define the context path for PDF files
443
  input_files = [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import warnings
2
  import os
3
  import nltk
 
31
  query: str
32
 
33
  # Set up the GROQ API key - use environment variable for security
34
+ GROQ_API_KEY = os.getenv("GROQ_API_KEY")
35
 
36
  # Define the context path for PDF files
37
  input_files = [