Spaces:

TechDisciples1
/

Theo

Sleeping

App Files Files Community

alaselababatunde commited on Nov 3, 2025

Commit

21cb694

1 Parent(s): bec61fa

Updated

Browse files

Files changed (1) hide show

main.py +44 -43

main.py CHANGED Viewed

@@ -8,23 +8,19 @@ import torch
 import logging
 import os
-# Hugging Face Hub
 from huggingface_hub import login
-# LangChain
 from langchain.llms.huggingface_pipeline import HuggingFacePipeline
 from langchain.chains import LLMChain
 from langchain.prompts.prompt import PromptTemplate
 from langchain.memory import ConversationBufferMemory
-# Transformers pipeline
 from transformers import pipeline
 # =====================================================
 # CONFIGURATION
 # =====================================================
 API_SECRET = "techdisciplesai404"
-MODEL_NAME = "meta-llama/Llama-3.1-8B"
 DEVICE = 0 if torch.cuda.is_available() else -1
 # =====================================================
@@ -39,39 +35,47 @@ logger = logging.getLogger("TechDisciplesAI")
 app = FastAPI(title="Tech Disciples AI", version="3.1")
 # =====================================================
-# MODEL LOAD
 # =====================================================
-llm = None
-try:
-    logger.info(f"🚀 Loading model: {MODEL_NAME}")
-    hf_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
-    if hf_token:
         login(token=hf_token)
-        logger.info("🔐 Hugging Face authentication successful.")
-    else:
-        logger.warning("⚠️ HUGGINGFACEHUB_API_TOKEN not found — gated models may fail.")
-    # Load text generation pipeline
-    hf_pipeline = pipeline(
-        "text-generation",
-        model=MODEL_NAME,
-        device=DEVICE,
-        max_new_tokens=1024,
-        temperature=0.4,
-        top_p=0.9,
-        repetition_penalty=1.15,
-        do_sample=True,
-        use_auth_token=True
-    )
-    llm = HuggingFacePipeline(pipeline=hf_pipeline)
-    logger.info("✅ Model loaded successfully (Llama 3.1 - 8B).")
-except Exception as e:
-    logger.error(f"❌ Model load failed: {e}")
-    llm = None
 # =====================================================
 # MEMORY + PROMPT
@@ -96,10 +100,7 @@ prompt = PromptTemplate(
     input_variables=["conversation_history", "query"]
 )
-if llm:
-    chain = LLMChain(prompt=prompt, llm=llm, memory=memory)
-else:
-    chain = None
 # =====================================================
 # REQUEST MODEL
@@ -113,7 +114,7 @@ class QueryInput(BaseModel):
 # =====================================================
 @app.get("/")
 async def root():
-    return {"message": "✅ Tech Disciples AI is running."}
 @app.post("/ai-chat")
 async def ai_chat(data: QueryInput, x_api_key: str = Header(None)):
@@ -121,11 +122,11 @@ async def ai_chat(data: QueryInput, x_api_key: str = Header(None)):
         raise HTTPException(status_code=403, detail="Forbidden: Invalid API key")
     if not chain:
-        raise HTTPException(status_code=500, detail="Model not initialized or failed to load")
     try:
         response = chain.run(query=data.query.strip())
         return {"reply": response.strip()}
     except Exception as e:
-        logger.error(f"⚠️ Error generating response: {e}")
-        raise HTTPException(status_code=500, detail="Model failed to respond")

 import logging
 import os
 from huggingface_hub import login
 from langchain.llms.huggingface_pipeline import HuggingFacePipeline
 from langchain.chains import LLMChain
 from langchain.prompts.prompt import PromptTemplate
 from langchain.memory import ConversationBufferMemory
 from transformers import pipeline
 # =====================================================
 # CONFIGURATION
 # =====================================================
 API_SECRET = "techdisciplesai404"
+PRIMARY_MODEL = "meta-llama/Llama-3.1-8B"
+FALLBACK_MODEL = "mistralai/Mistral-7B-Instruct-v0.3"
 DEVICE = 0 if torch.cuda.is_available() else -1
 # =====================================================
 app = FastAPI(title="Tech Disciples AI", version="3.1")
 # =====================================================
+# MODEL LOADING FUNCTION
 # =====================================================
+def load_model(model_name, token=None):
+    try:
+        logger.info(f"🚀 Attempting to load model: {model_name}")
+        text_gen = pipeline(
+            "text-generation",
+            model=model_name,
+            device=DEVICE,
+            max_new_tokens=1024,
+            temperature=0.4,
+            top_p=0.9,
+            repetition_penalty=1.15,
+            do_sample=True,
+            token=token,  # ✅ modern auth argument
+        )
+        logger.info(f"✅ Loaded model successfully: {model_name}")
+        return HuggingFacePipeline(pipeline=text_gen)
+    except Exception as e:
+        logger.error(f"❌ Failed to load {model_name}: {e}")
+        return None
+# =====================================================
+# LOAD TOKEN + MODEL
+# =====================================================
+hf_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
+if hf_token:
+    try:
         login(token=hf_token)
+        logger.info("🔐 Hugging Face token authenticated.")
+    except Exception as e:
+        logger.warning(f"⚠️ Failed to log in: {e}")
+else:
+    logger.warning("⚠️ No HUGGINGFACEHUB_API_TOKEN found.")
+llm = load_model(PRIMARY_MODEL, token=hf_token)
+if llm is None:
+    logger.warning("⚠️ Falling back to Mistral 7B due to model load issue...")
+    llm = load_model(FALLBACK_MODEL, token=hf_token)
 # =====================================================
 # MEMORY + PROMPT
     input_variables=["conversation_history", "query"]
 )
+chain = LLMChain(prompt=prompt, llm=llm, memory=memory) if llm else None
 # =====================================================
 # REQUEST MODEL
 # =====================================================
 @app.get("/")
 async def root():
+    return {"message": "✅ Tech Disciples AI is online."}
 @app.post("/ai-chat")
 async def ai_chat(data: QueryInput, x_api_key: str = Header(None)):
         raise HTTPException(status_code=403, detail="Forbidden: Invalid API key")
     if not chain:
+        raise HTTPException(status_code=500, detail="Model not initialized")
     try:
         response = chain.run(query=data.query.strip())
         return {"reply": response.strip()}
     except Exception as e:
+        logger.error(f"⚠️ Model runtime error: {e}")
+        raise HTTPException(status_code=500, detail=f"Model failed to respond — {e}")