Spaces:

LucianStorm
/

Tiny_LLAMA_Assistant

Sleeping

App Files Files Community

LucianStorm commited on Feb 4, 2025

Commit

a542700

verified ·

1 Parent(s): 485b23d

Update app.py

Browse files

Files changed (1) hide show

app.py +69 -73

app.py CHANGED Viewed

@@ -11,13 +11,11 @@ os.environ['TORCH_HOME'] = '/tmp/torch_cache'
 app = FastAPI(title="DIANA - Diet And Nutrition Assistant")
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=["*"],
-    allow_credentials=True,
-    allow_methods=["*"],
-    allow_headers=["*"],
-)
 model = None
 tokenizer = None
@@ -28,11 +26,11 @@ def load_model():
     try:
         print("Starting model load...")
         model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
-        torch.set_num_threads(4)
         tokenizer = AutoTokenizer.from_pretrained(
             model_name,
-            cache_dir='/tmp/transformers_cache'
         )
         model = AutoModelForCausalLM.from_pretrained(
@@ -41,7 +39,7 @@ def load_model():
             low_cpu_mem_usage=True,
             device_map=None,
             cache_dir='/tmp/transformers_cache'
-        )
         model.eval()
         MODEL_LOADED = True
@@ -56,106 +54,104 @@ load_model()
 class Query(BaseModel):
     prompt: str
-    max_length: int = 200
     temperature: float = 0.7
-def is_greeting(text):
-    greetings = ['hi', 'hello', 'hey', 'good morning', 'good afternoon', 'good evening', 'greetings']
-    return any(greeting in text.lower() for greeting in greetings)
-def is_fitness_question(text):
-    fitness_keywords = [
-        'workout', 'exercise', 'training', 'muscle', 'strength', 'cardio', 'weight',
-        'diet', 'nutrition', 'protein', 'carbs', 'fat', 'meal', 'food', 'eating',
-        'routine', 'program', 'sets', 'reps', 'gym', 'fitness', 'health'
-    ]
-    return any(keyword in text.lower() for keyword in fitness_keywords)
 @app.post("/chat")
 async def chat(query: Query):
     if not MODEL_LOADED:
-        if not load_model():
-            raise HTTPException(
-                status_code=503,
-                detail="DIANA is still initializing. Please try again in a minute."
-            )
     try:
-        # Personalized system prompts
         if is_greeting(query.prompt):
-            system_prompt = """You are DIANA (Diet And Nutrition Assistant), a friendly and knowledgeable
-            fitness companion. Always respond warmly and offer to help with fitness and nutrition guidance.
-            Sign your responses with '- DIANA 💪'"""
-        else:
-            system_prompt = """You are DIANA (Diet And Nutrition Assistant), a knowledgeable fitness and
-            nutrition guide. Provide practical, safe, and evidence-based advice about workouts, nutrition,
-            and healthy living. Include:
-            1. Clear, actionable recommendations
-            2. Safety considerations
-            3. Beginner-friendly explanations
-            Remember to sign your responses with '- DIANA 💪'"""
-        formatted_prompt = f"""<|system|>{system_prompt}</s>
-<|user|>{query.prompt}</s>
-<|assistant|>"""
         inputs = tokenizer(
             formatted_prompt,
             return_tensors="pt",
             truncation=True,
-            max_length=300
-        )
-        with torch.no_grad():
             outputs = model.generate(
                 inputs["input_ids"],
-                max_new_tokens=200,
-                min_new_tokens=50,
                 temperature=0.7,
                 top_p=0.9,
                 do_sample=True,
                 pad_token_id=tokenizer.eos_token_id,
                 repetition_penalty=1.2,
                 no_repeat_ngram_size=3,
-                eos_token_id=tokenizer.eos_token_id,
-                early_stopping=True
             )
         response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-        response = response.split("<|assistant|>")[-1].strip()
-        # Add signature if not present
         if "- DIANA 💪" not in response:
-            response = response + "\n\n- DIANA 💪"
-        # Response validation and fallbacks
-        if not response or len(response.split()) < 20:
-            if is_greeting(query.prompt):
-                return {
-                    "response": "Hi there! I'm DIANA, your personal Diet And Nutrition Assistant. I'm here to help you achieve your health and fitness goals! Would you like some advice about workouts or nutrition?\n\n- DIANA 💪"
-                }
-            elif is_fitness_question(query.prompt):
-                return {
-                    "response": "Let me help you on your fitness journey! Could you provide more details about your specific goals and current fitness level? This will help me give you the most relevant advice.\n\n- DIANA 💪"
-                }
-            else:
-                return {
-                    "response": "Hi! I'm DIANA, your Diet And Nutrition Assistant. I specialize in workout plans, diet advice, and general health tips. What would you like to know more about?\n\n- DIANA 💪"
-                }
         return {"response": response}
     except Exception as e:
-        print(f"Error during generation: {str(e)}")
-        raise HTTPException(status_code=500, detail=str(e))
 @app.get("/")
 def read_root():
-    return {
-        "status": "DIANA (Diet And Nutrition Assistant) is running!",
-        "model_loaded": MODEL_LOADED,
-        "specialties": ["Personalized workout advice", "Nutrition guidance", "Fitness planning"]
-    }
 if __name__ == "__main__":
     uvicorn.run("app:app", host="0.0.0.0", port=7860)

 app = FastAPI(title="DIANA - Diet And Nutrition Assistant")
+app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"])
+DEVICE = torch.device('cpu')
+torch.set_num_threads(4)
+torch.set_grad_enabled(False)
 model = None
 tokenizer = None
     try:
         print("Starting model load...")
         model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
         tokenizer = AutoTokenizer.from_pretrained(
             model_name,
+            cache_dir='/tmp/transformers_cache',
+            use_fast=True
         )
         model = AutoModelForCausalLM.from_pretrained(
             low_cpu_mem_usage=True,
             device_map=None,
             cache_dir='/tmp/transformers_cache'
+        ).to(DEVICE)
         model.eval()
         MODEL_LOADED = True
 class Query(BaseModel):
     prompt: str
+    max_length: int = 150
     temperature: float = 0.7
+def get_structured_response(topic):
+    return f"""Here's what you need to know about {topic}:
+1. Start with the basics:
+   • Begin gradually
+   • Focus on proper form
+   • Stay consistent
+2. Key points to remember:
+   • Set realistic goals
+   • Track your progress
+   • Listen to your body
+3. Tips for success:
+   • Start today, not tomorrow
+   • Keep it simple
+   • Stay motivated
+Need more specific advice about any of these points?
+- DIANA 💪"""
+def is_greeting(text):
+    return any(g in text.lower() for g in ['hi', 'hello', 'hey'])
 @app.post("/chat")
 async def chat(query: Query):
     if not MODEL_LOADED:
+        raise HTTPException(status_code=503, detail="DIANA is initializing. Please try again.")
     try:
+        # Handle greetings
         if is_greeting(query.prompt):
+            return {"response": "Hi! I'm DIANA, your fitness assistant. How can I help you today?\n\n- DIANA 💪"}
+        # Optimized but complete prompt template
+        system_prompt = f"""You are DIANA, a fitness assistant. Give clear, complete advice about {query.prompt}.
+        Structure your response like this:
+        1. Brief welcome and intro
+        2. 3 main points with bullets
+        3. Encouraging conclusion
+        4. Sign with '- DIANA 💪'
+        IMPORTANT: Never end mid-sentence. Always complete your thoughts."""
+        formatted_prompt = f"<|system|>{system_prompt}</s><|user|>Give structured fitness advice about: {query.prompt}</s><|assistant|>Let me help you with that!\n\n"
         inputs = tokenizer(
             formatted_prompt,
             return_tensors="pt",
             truncation=True,
+            max_length=200,
+            padding=False
+        ).to(DEVICE)
+        with torch.inference_mode():
             outputs = model.generate(
                 inputs["input_ids"],
+                max_new_tokens=150,
+                min_new_tokens=100,  # Ensure minimum length
                 temperature=0.7,
                 top_p=0.9,
                 do_sample=True,
                 pad_token_id=tokenizer.eos_token_id,
                 repetition_penalty=1.2,
                 no_repeat_ngram_size=3,
+                eos_token_id=tokenizer.eos_token_id,  # Proper ending
+                num_beams=1,
+                early_stopping=True,
+                use_cache=True
             )
         response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+        response = response.split("Let me help you with that!")[-1].strip()
+        # Validate response completeness
+        sentences = [s.strip() for s in response.split('.') if s.strip()]
+        words = response.split()
+        # If response might be incomplete, use structured format
+        if len(sentences) < 4 or len(words) < 50 or not response.endswith(('!', '.', '?', '💪')):
+            return {"response": get_structured_response(query.prompt)}
+        # Ensure proper signature
         if "- DIANA 💪" not in response:
+            response += "\n\n- DIANA 💪"
         return {"response": response}
     except Exception as e:
+        print(f"Error: {str(e)}")
+        return {"response": get_structured_response(query.prompt)}
 @app.get("/")
 def read_root():
+    return {"status": "DIANA is ready!", "model_loaded": MODEL_LOADED}
 if __name__ == "__main__":
     uvicorn.run("app:app", host="0.0.0.0", port=7860)