Spaces:

SinaLabOrg
/

ReqConflictDetection

Sleeping

App Files Files Community

TymaaHammouda commited on Feb 20

Commit

983b14c

verified ·

1 Parent(s): 2a3cef3

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -21

app.py CHANGED Viewed

@@ -6,7 +6,7 @@ import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
 from openai import OpenAI
-print("Version ---- 2")
 app = FastAPI()
 # -----------------------------
@@ -26,7 +26,6 @@ def build_prompt(req1, req2, prompt_type="zero-shot"):
     if prompt_type == "zero-shot":
         return f"Do the following sentences contradict each other, answer with just yes or no: 1.{req1} 2.{req2}"
     elif prompt_type == "few-shot":
-        # Example few-shot style (you can expand with more examples)
         examples = (
             "Example 1:\n"
             "Req1: The system shall allow password reset.\n"
@@ -42,7 +41,37 @@ def build_prompt(req1, req2, prompt_type="zero-shot"):
         return f"Do the following sentences contradict each other, answer with just yes or no: 1.{req1} 2.{req2}"
 # -----------------------------
-# Model handlers
 # -----------------------------
 def run_gpt4(req1, req2, prompt_type, api_key):
     client = OpenAI(base_url="https://openrouter.ai/api/v1", api_key=api_key)
@@ -56,34 +85,21 @@ def run_gpt4(req1, req2, prompt_type, api_key):
     return completion.choices[0].message.content.strip()
 def run_deepseek(req1, req2, prompt_type):
-    model_name = "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
-    tokenizer = AutoTokenizer.from_pretrained(model_name)
-    model = AutoModelForCausalLM.from_pretrained(
-        model_name,
-        dtype=torch.bfloat16,
-        device_map="auto"
-    )
     prompt = build_prompt(req1, req2, prompt_type)
     inputs = tokenizer([prompt], return_tensors="pt").to(model.device)
     outputs = model.generate(inputs.input_ids, max_new_tokens=256)
     return tokenizer.decode(outputs[0], skip_special_tokens=True)
 def run_llama(req1, req2, prompt_type):
-    model_name = "meta-llama/Llama-3.1-8B-Instruct"
-    hf_token = os.getenv("LLAMA_HF_TOKEN")
-    tokenizer = AutoTokenizer.from_pretrained(model_name, token=hf_token)
-    model = AutoModelForCausalLM.from_pretrained(
-        model_name,
-        token=hf_token,
-        dtype=torch.bfloat16,
-        device_map="auto"
-    )
     prompt = build_prompt(req1, req2, prompt_type)
     inputs = tokenizer([prompt], return_tensors="pt").to(model.device)
     outputs = model.generate(inputs.input_ids, max_new_tokens=256)
     return tokenizer.decode(outputs[0], skip_special_tokens=True)
 def run_fanar(req1, req2, prompt_type):
     client = OpenAI(base_url="https://api.fanar.qa/v1", api_key=os.getenv("FANAR_API"))
     prompt = build_prompt(req1, req2, prompt_type)
@@ -91,7 +107,6 @@ def run_fanar(req1, req2, prompt_type):
         model="Fanar",
         messages=[{"role": "user", "content": prompt}]
     )
-    print("fanar response: ", response)
     return response.choices[0].message.content.strip()
 # -----------------------------
@@ -109,6 +124,8 @@ def predict(request: ConflictDetectionRequest):
             answer = run_deepseek(request.Req1, request.Req2, request.prompt_type)
         elif request.model_choice == "LLaMA-3.1-8B-Instruct":
             answer = run_llama(request.Req1, request.Req2, request.prompt_type)
         elif request.model_choice == "Fanar":

 from transformers import AutoModelForCausalLM, AutoTokenizer
 from openai import OpenAI
+print("Version ---- 3")
 app = FastAPI()
 # -----------------------------
     if prompt_type == "zero-shot":
         return f"Do the following sentences contradict each other, answer with just yes or no: 1.{req1} 2.{req2}"
     elif prompt_type == "few-shot":
         examples = (
             "Example 1:\n"
             "Req1: The system shall allow password reset.\n"
         return f"Do the following sentences contradict each other, answer with just yes or no: 1.{req1} 2.{req2}"
 # -----------------------------
+# Startup: load models once
+# -----------------------------
+@app.on_event("startup")
+def load_models():
+    print("Loading models into memory...")
+    # DeepSeek
+    deepseek_name = "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
+    app.state.deepseek_tokenizer = AutoTokenizer.from_pretrained(deepseek_name)
+    app.state.deepseek_model = AutoModelForCausalLM.from_pretrained(
+        deepseek_name,
+        dtype=torch.bfloat16,
+        device_map="auto"
+    )
+    # LLaMA (requires HF_TOKEN secret)
+    llama_name = "meta-llama/Llama-3.1-8B-Instruct"
+    hf_token = os.getenv("HF_TOKEN")
+    if hf_token:
+        app.state.llama_tokenizer = AutoTokenizer.from_pretrained(llama_name, token=hf_token)
+        app.state.llama_model = AutoModelForCausalLM.from_pretrained(
+            llama_name,
+            token=hf_token,
+            dtype=torch.bfloat16,
+            device_map="auto"
+        )
+    else:
+        print("No HF_TOKEN found, LLaMA will not be available.")
+# -----------------------------
+# Model handlers (reuse loaded models)
 # -----------------------------
 def run_gpt4(req1, req2, prompt_type, api_key):
     client = OpenAI(base_url="https://openrouter.ai/api/v1", api_key=api_key)
     return completion.choices[0].message.content.strip()
 def run_deepseek(req1, req2, prompt_type):
+    tokenizer = app.state.deepseek_tokenizer
+    model = app.state.deepseek_model
     prompt = build_prompt(req1, req2, prompt_type)
     inputs = tokenizer([prompt], return_tensors="pt").to(model.device)
     outputs = model.generate(inputs.input_ids, max_new_tokens=256)
     return tokenizer.decode(outputs[0], skip_special_tokens=True)
 def run_llama(req1, req2, prompt_type):
+    tokenizer = app.state.llama_tokenizer
+    model = app.state.llama_model
     prompt = build_prompt(req1, req2, prompt_type)
     inputs = tokenizer([prompt], return_tensors="pt").to(model.device)
     outputs = model.generate(inputs.input_ids, max_new_tokens=256)
     return tokenizer.decode(outputs[0], skip_special_tokens=True)
 def run_fanar(req1, req2, prompt_type):
     client = OpenAI(base_url="https://api.fanar.qa/v1", api_key=os.getenv("FANAR_API"))
     prompt = build_prompt(req1, req2, prompt_type)
         model="Fanar",
         messages=[{"role": "user", "content": prompt}]
     )
     return response.choices[0].message.content.strip()
 # -----------------------------
             answer = run_deepseek(request.Req1, request.Req2, request.prompt_type)
         elif request.model_choice == "LLaMA-3.1-8B-Instruct":
+            if not hasattr(app.state, "llama_model"):
+                return JSONResponse({"error": "LLaMA not loaded (missing HF_TOKEN)"}, status_code=400)
             answer = run_llama(request.Req1, request.Req2, request.prompt_type)
         elif request.model_choice == "Fanar":