edit in system

#1
by bshk57 - opened
Files changed (3) hide show
  1. Dockerfile +2 -2
  2. app_new.py +28 -48
  3. requirements.txt +0 -1
Dockerfile CHANGED
@@ -2,7 +2,7 @@ FROM python:3.10-slim
2
 
3
  WORKDIR /app
4
  COPY requirements.txt .
5
- RUN pip install --no-cache-dir -r requirements.txt
6
 
7
  COPY . .
8
- CMD ["gunicorn", "-b", "0.0.0.0:7860", "--timeout", "300", "--workers", "1", "--threads", "2", "app_new:app"]
 
2
 
3
  WORKDIR /app
4
  COPY requirements.txt .
5
+ RUN pip install -r requirements.txt
6
 
7
  COPY . .
8
+ CMD ["gunicorn", "-b", "0.0.0.0:7860", "app_new:app"]
app_new.py CHANGED
@@ -25,9 +25,8 @@ from langchain_community.embeddings import HuggingFaceEmbeddings
25
  from langchain_community.vectorstores import Chroma
26
  from langchain.chains import RetrievalQA
27
  from langchain.prompts import PromptTemplate
28
- from langchain_core.language_models.llms import LLM
29
- from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
30
- import torch
31
 
32
  from huggingface_hub import snapshot_download
33
  import uuid
@@ -49,8 +48,8 @@ LOCAL_DATASET_DIR = "knowledge_base"
49
  VECTOR_DB_PATH = "sastra_vector_db"
50
  ANALYTICS_DB_PATH = "sastra_analytics_db"
51
 
52
- EMBEDDING_MODEL = "sentence-transformers/paraphrase-MiniLM-L12-v2"
53
- LLM_MODEL = "google/flan-t5-large"
54
 
55
  MANDATORY_URLS = [
56
  "https://www.sastra.edu/admissions/ug-pg.html",
@@ -85,7 +84,7 @@ admin_keywords = {}
85
  # AUTHENTICATION
86
  # ============================================================
87
 
88
- ADMIN_API_KEY = os.getenv("ADMIN_API_KEY", "admin@sastra")
89
 
90
  def require_admin_auth(f):
91
  @wraps(f)
@@ -225,19 +224,15 @@ def initialize_model():
225
  # 1. LOAD WEBSITE DATA
226
  print("\nπŸ“š Loading website data...")
227
 
228
- import requests as _requests
229
  for url in SASTRA_URLS:
230
  try:
231
  print(f" Loading: {url}")
232
- # Pre-check URL with timeout to avoid hanging
233
- _requests.head(url, timeout=5)
234
  loader = WebBaseLoader(url)
235
- loader.requests_kwargs = {"timeout": 10}
236
  docs = loader.load()
237
  documents.extend(docs)
238
  print(f" βœ… Loaded {len(docs)} documents")
239
  except Exception as e:
240
- print(f" ⚠️ Failed (skipping): {str(e)[:100]}")
241
 
242
  print(f" Total from websites: {len(documents)} documents")
243
 
@@ -312,36 +307,21 @@ def initialize_model():
312
 
313
  print("βœ… Retriever configured (k=3, score_threshold=0.3)")
314
 
315
- # 7. INITIALIZE LLM (T5 seq2seq β€” loaded directly, not via pipeline)
316
  print("\nπŸ€– Loading LLM...")
317
 
318
- _tokenizer = AutoTokenizer.from_pretrained(LLM_MODEL)
319
- _model = AutoModelForSeq2SeqLM.from_pretrained(LLM_MODEL)
 
 
 
 
 
 
 
 
320
 
321
- class _T5LLM(LLM):
322
- """Thin LangChain wrapper around T5 for seq2seq generation."""
323
-
324
- class Config:
325
- arbitrary_types_allowed = True
326
-
327
- @property
328
- def _llm_type(self) -> str:
329
- return "t5-seq2seq"
330
-
331
- def _call(self, prompt, stop=None, **kwargs):
332
- inputs = _tokenizer(
333
- prompt, return_tensors="pt",
334
- max_length=512, truncation=True
335
- )
336
- with torch.no_grad():
337
- out = _model.generate(
338
- **inputs,
339
- max_new_tokens=200,
340
- repetition_penalty=1.2,
341
- )
342
- return _tokenizer.decode(out[0], skip_special_tokens=True)
343
-
344
- llm = _T5LLM()
345
  print("βœ… LLM loaded")
346
 
347
  # 8. CREATE PROMPT TEMPLATE - IMPROVED
@@ -455,6 +435,7 @@ def log_chat_to_vectordb(
455
 
456
  try:
457
  analytics_db.add_documents([doc])
 
458
  except Exception as e:
459
  print(f"⚠️ Error logging to analytics DB: {e}")
460
 
@@ -630,20 +611,13 @@ def chat():
630
  print(f"\nπŸ› DEBUG MODE ENABLED")
631
  debug_retrieval(query_en)
632
 
633
- # RAG INFERENCE with timeout protection
634
  answer_en = ""
635
  try:
636
- import concurrent.futures
637
  print(f"\nπŸ’¬ Processing query: {query_en}")
638
- with concurrent.futures.ThreadPoolExecutor() as executor:
639
- future = executor.submit(qa_chain.invoke, {"query": query_en})
640
- result = future.result(timeout=60) # 60s hard limit
641
- raw = result.get("result", "")
642
- answer_en = clean_llm_output(raw)
643
  print(f"βœ… Got answer: {answer_en[:100]}...")
644
- except concurrent.futures.TimeoutError:
645
- print("❌ QA Chain timed out after 60s")
646
- answer_en = ""
647
  except Exception as e:
648
  print(f"❌ QA Chain error: {e}")
649
  answer_en = ""
@@ -808,10 +782,12 @@ def retrain():
808
 
809
  initialize_model()
810
  vectordb.add_documents(chunks)
 
811
 
812
  message = "Knowledge base completely reset and retrained"
813
  else:
814
  vectordb.add_documents(chunks)
 
815
  qa_chain.retriever = vectordb.as_retriever(search_kwargs={"k": 3})
816
 
817
  message = "Successfully added new data to existing knowledge base"
@@ -1439,6 +1415,8 @@ def bulk_delete_questions():
1439
  )
1440
  analytics_db.add_documents([doc])
1441
 
 
 
1442
  return jsonify({
1443
  "status": "success",
1444
  "message": f"Deleted {deleted_count} questions",
@@ -1509,6 +1487,8 @@ def update_feedback(log_id):
1509
  )
1510
  analytics_db.add_documents([doc])
1511
 
 
 
1512
  return jsonify({
1513
  "status": "success",
1514
  "message": "Feedback updated successfully",
 
25
  from langchain_community.vectorstores import Chroma
26
  from langchain.chains import RetrievalQA
27
  from langchain.prompts import PromptTemplate
28
+ from langchain_community.llms import HuggingFacePipeline
29
+ from transformers import pipeline
 
30
 
31
  from huggingface_hub import snapshot_download
32
  import uuid
 
48
  VECTOR_DB_PATH = "sastra_vector_db"
49
  ANALYTICS_DB_PATH = "sastra_analytics_db"
50
 
51
+ EMBEDDING_MODEL = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
52
+ LLM_MODEL = "google/flan-t5-xl"
53
 
54
  MANDATORY_URLS = [
55
  "https://www.sastra.edu/admissions/ug-pg.html",
 
84
  # AUTHENTICATION
85
  # ============================================================
86
 
87
+ ADMIN_API_KEY = os.getenv("ADMIN_API_KEY", "your-secret-key-change-this")
88
 
89
  def require_admin_auth(f):
90
  @wraps(f)
 
224
  # 1. LOAD WEBSITE DATA
225
  print("\nπŸ“š Loading website data...")
226
 
 
227
  for url in SASTRA_URLS:
228
  try:
229
  print(f" Loading: {url}")
 
 
230
  loader = WebBaseLoader(url)
 
231
  docs = loader.load()
232
  documents.extend(docs)
233
  print(f" βœ… Loaded {len(docs)} documents")
234
  except Exception as e:
235
+ print(f" ⚠️ Failed: {str(e)[:100]}")
236
 
237
  print(f" Total from websites: {len(documents)} documents")
238
 
 
307
 
308
  print("βœ… Retriever configured (k=3, score_threshold=0.3)")
309
 
310
+ # 7. INITIALIZE LLM
311
  print("\nπŸ€– Loading LLM...")
312
 
313
+ generator = pipeline(
314
+ "text-generation",
315
+ model=LLM_MODEL,
316
+ tokenizer=LLM_MODEL,
317
+ max_new_tokens=200, # Reduced from 300
318
+ min_new_tokens=50, # Force some output
319
+ temperature=0.5, # Balanced (was 0.3 = too conservative)
320
+ top_p=0.9, # Nucleus sampling
321
+ repetition_penalty=1.2, # Reduced from 1.3
322
+ )
323
 
324
+ llm = HuggingFacePipeline(pipeline=generator)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
325
  print("βœ… LLM loaded")
326
 
327
  # 8. CREATE PROMPT TEMPLATE - IMPROVED
 
435
 
436
  try:
437
  analytics_db.add_documents([doc])
438
+ analytics_db.persist()
439
  except Exception as e:
440
  print(f"⚠️ Error logging to analytics DB: {e}")
441
 
 
611
  print(f"\nπŸ› DEBUG MODE ENABLED")
612
  debug_retrieval(query_en)
613
 
614
+ # RAG INFERENCE
615
  answer_en = ""
616
  try:
 
617
  print(f"\nπŸ’¬ Processing query: {query_en}")
618
+ result = qa_chain.invoke({"query": query_en})
619
+ answer_en = result.get("result", "")#clean_llm_output()
 
 
 
620
  print(f"βœ… Got answer: {answer_en[:100]}...")
 
 
 
621
  except Exception as e:
622
  print(f"❌ QA Chain error: {e}")
623
  answer_en = ""
 
782
 
783
  initialize_model()
784
  vectordb.add_documents(chunks)
785
+ vectordb.persist()
786
 
787
  message = "Knowledge base completely reset and retrained"
788
  else:
789
  vectordb.add_documents(chunks)
790
+ vectordb.persist()
791
  qa_chain.retriever = vectordb.as_retriever(search_kwargs={"k": 3})
792
 
793
  message = "Successfully added new data to existing knowledge base"
 
1415
  )
1416
  analytics_db.add_documents([doc])
1417
 
1418
+ analytics_db.persist()
1419
+
1420
  return jsonify({
1421
  "status": "success",
1422
  "message": f"Deleted {deleted_count} questions",
 
1487
  )
1488
  analytics_db.add_documents([doc])
1489
 
1490
+ analytics_db.persist()
1491
+
1492
  return jsonify({
1493
  "status": "success",
1494
  "message": "Feedback updated successfully",
requirements.txt CHANGED
@@ -1,7 +1,6 @@
1
  flask
2
  flask-cors
3
  gunicorn
4
- requests
5
  langchain==0.1.20
6
  langchain-community==0.0.38
7
  langchain-core==0.1.52
 
1
  flask
2
  flask-cors
3
  gunicorn
 
4
  langchain==0.1.20
5
  langchain-community==0.0.38
6
  langchain-core==0.1.52