Spaces:

SMForomir
/

ID-ai-service

Running

App Files Files Community

SMForomir commited on Jan 11

Commit

e366a24

verified ·

1 Parent(s): c34a524

Update main.py

Browse files

Files changed (1) hide show

main.py +50 -39

main.py CHANGED Viewed

@@ -9,20 +9,23 @@ from huggingface_hub import InferenceClient
 app = FastAPI()
 # --- CONFIGURACIÓN ---
-EXPECTED_TOKEN = os.getenv("SERVICE_SECRET", "pon_aqui_tu_token_secreto")
 HF_TOKEN = os.getenv("HF_TOKEN")
 # 1. CROSS ENCODER (Local - CPU)
 print("Cargando CrossEncoder...")
 similarity_model = CrossEncoder("cross-encoder/stsb-distilroberta-base")
 print("CrossEncoder cargado.")
 # 2. GENERADOR (API Hugging Face)
 MODEL_ID = "Qwen/Qwen2.5-72B-Instruct"
 print(f"Conectando a Inference API con modelo: {MODEL_ID}...")
 gen_client = InferenceClient(model=MODEL_ID, token=HF_TOKEN)
-# --- UTILIDADES JSON (Tus funciones existentes) ---
 def fix_truncated_json(json_str):
     """Intenta cerrar corchetes y llaves si el modelo se ha cortado."""
     json_str = json_str.strip()
@@ -35,14 +38,15 @@ def fix_truncated_json(json_str):
 def extract_json_array(text):
     """Limpia markdown y extrae el JSON."""
-    text = text.replace("```json", "").replace("```", "").strip()
     start_idx_arr = text.find('[')
-    start_idx_obj = text.find('{')
-    if start_idx_arr != -1 and (start_idx_obj == -1 or start_idx_arr < start_idx_obj):
         candidate = text[start_idx_arr:]
-    elif start_idx_obj != -1:
-        candidate = text[start_idx_obj:]
     else:
         return text
@@ -53,11 +57,15 @@ def extract_json_array(text):
         try:
             return json.loads(fixed_candidate)
         except:
-            last_char = ']' if candidate.strip().startswith('[') else '}'
-            last_idx = candidate.rfind(last_char)
             if last_idx != -1:
-                return json.loads(candidate[:last_idx+1])
-            raise ValueError("Could not extract or repair valid JSON")
 # --- MODELOS Pydantic ---
 class VerifyRequest(BaseModel):
@@ -69,59 +77,52 @@ class DeckRequest(BaseModel):
     count: int
     fields: list
-class ArchitectRequest(BaseModel): # <--- NUEVO MODELO
     topic: str
     subtopic_count: int
-# --- ENDPOINTS EXISTENTES (Verify) ---
 @app.get("/")
 def health():
     return {"status": "ok", "service": "InDeck Brain", "model": MODEL_ID}
 @app.post("/verify")
 def verify_answer(req: VerifyRequest, x_service_token: str = Header(None)):
-    if x_service_token != EXPECTED_TOKEN: raise HTTPException(401)
     try:
         scores = similarity_model.predict([(req.user_input, req.correct_answer)])
         score = float(scores[0])
         status = "WRONG"
-        final_score = score
-        method = "Semantic-CrossEncoder"
-        if score >= 0.85:
             status = "CORRECT"
-        elif score >= 0.75:
             status = "TYPO"
-        # Fallback a LLM si hay duda
-        if status != "CORRECT" and score > 0.4: # Solo preguntamos si no es un disparate total
-            # (Aquí iría tu lógica verify_with_llm que ya tenías implementada)
-            pass
-        return {"status": status, "score": final_score, "method": method}
     except Exception as e:
         raise HTTPException(500, str(e))
-# --- ENDPOINTS NUEVOS PARA GENERACIÓN ---
 @app.post("/architect")
 def architect_plan(req: ArchitectRequest, x_service_token: str = Header(None)):
     """
-    ROL ARQUITECTO: Genera la lista de subtemas cuando Gemini falla.
     """
-    if x_service_token != EXPECTED_TOKEN: raise HTTPException(401)
     messages = [
         {
             "role": "system",
-            "content": "You are an expert curriculum designer. Output ONLY a JSON Array of strings."
         },
         {
             "role": "user",
             "content": (
                 f"Create a curriculum list of exactly {req.subtopic_count} distinct subtopics "
-                f"to cover the subject: '{req.topic}'.\n"
-                "Return valid JSON array of strings. Example: [\"Topic A\", \"Topic B\"]"
             )
         }
     ]
@@ -130,29 +131,38 @@ def architect_plan(req: ArchitectRequest, x_service_token: str = Header(None)):
         response = gen_client.chat_completion(
             messages=messages,
             max_tokens=1024,
-            temperature=0.7
         )
         content = response.choices[0].message.content
         subtopics = extract_json_array(content)
         if not isinstance(subtopics, list):
-            # Qwen a veces es verborrágico, intentamos limpiar de nuevo
-            raise ValueError("Output is not a list")
         return {"subtopics": subtopics[:req.subtopic_count], "model": MODEL_ID}
     except Exception as e:
         print(f"Architect Error: {e}")
         raise HTTPException(500, f"Architect failed: {str(e)}")
 @app.post("/generate_deck")
 def generate_deck(req: DeckRequest, x_service_token: str = Header(None)):
-    if x_service_token != EXPECTED_TOKEN: raise HTTPException(401)
-    # Tu código existente de generación (Worker)
     field_names = [f.get('name', 'Field') for f in req.fields]
     messages = [
-        {"role": "system", "content": "You are a specialized JSON generator."},
-        {"role": "user", "content": f"Topic: {req.topic}\nCount: {req.count}\nFields: {', '.join(field_names)}\nOutput ONLY JSON Array."}
     ]
     try:
@@ -161,4 +171,5 @@ def generate_deck(req: DeckRequest, x_service_token: str = Header(None)):
         cards = extract_json_array(content)
         return {"cards": cards, "model_used": MODEL_ID}
     except Exception as e:
         raise HTTPException(500, str(e))

 app = FastAPI()
 # --- CONFIGURACIÓN ---
+EXPECTED_TOKEN = os.getenv("SERVICE_SECRET", "tu_token_super_secreto_aqui")
 HF_TOKEN = os.getenv("HF_TOKEN")
 # 1. CROSS ENCODER (Local - CPU)
+# Carga al inicio. Si usas HF Space con CPU básica, esto tardará unos segundos al arrancar.
 print("Cargando CrossEncoder...")
 similarity_model = CrossEncoder("cross-encoder/stsb-distilroberta-base")
 print("CrossEncoder cargado.")
 # 2. GENERADOR (API Hugging Face)
+# Qwen 2.5 72B es excelente. Si notas Timeouts (504), cambia a:
+# "Qwen/Qwen2.5-Coder-32B-Instruct" o "mistralai/Mistral-7B-Instruct-v0.3"
 MODEL_ID = "Qwen/Qwen2.5-72B-Instruct"
 print(f"Conectando a Inference API con modelo: {MODEL_ID}...")
 gen_client = InferenceClient(model=MODEL_ID, token=HF_TOKEN)
+# --- UTILIDADES JSON ---
 def fix_truncated_json(json_str):
     """Intenta cerrar corchetes y llaves si el modelo se ha cortado."""
     json_str = json_str.strip()
 def extract_json_array(text):
     """Limpia markdown y extrae el JSON."""
+    # Limpieza agresiva de bloques de código
+    text = re.sub(r'```json\s*', '', text)
+    text = re.sub(r'```\s*', '', text)
+    text = text.strip()
     start_idx_arr = text.find('[')
+    if start_idx_arr != -1:
         candidate = text[start_idx_arr:]
     else:
         return text
         try:
             return json.loads(fixed_candidate)
         except:
+            # Último intento: cortar hasta el último cierre
+            last_idx = candidate.rfind(']')
             if last_idx != -1:
+                try:
+                    return json.loads(candidate[:last_idx+1])
+                except:
+                    pass
+            print(f"FAILED JSON EXTRACTION: {text[:100]}...") # Log para debug
+            return [] # Retornar lista vacía es mejor que explotar
 # --- MODELOS Pydantic ---
 class VerifyRequest(BaseModel):
     count: int
     fields: list
+class ArchitectRequest(BaseModel):
     topic: str
     subtopic_count: int
+# --- ENDPOINTS ---
 @app.get("/")
 def health():
     return {"status": "ok", "service": "InDeck Brain", "model": MODEL_ID}
 @app.post("/verify")
 def verify_answer(req: VerifyRequest, x_service_token: str = Header(None)):
+    if x_service_token != EXPECTED_TOKEN: raise HTTPException(401, "Invalid Service Token")
     try:
         scores = similarity_model.predict([(req.user_input, req.correct_answer)])
         score = float(scores[0])
         status = "WRONG"
+        # Umbrales ajustados para lógica difusa
+        if score >= 0.82: # Un poco más permisivo que 0.85
             status = "CORRECT"
+        elif score >= 0.70:
             status = "TYPO"
+        return {"status": status, "score": score, "method": "Semantic-CrossEncoder"}
     except Exception as e:
+        print(f"Verify Error: {e}")
         raise HTTPException(500, str(e))
 @app.post("/architect")
 def architect_plan(req: ArchitectRequest, x_service_token: str = Header(None)):
     """
+    Genera subtemas. Usado como fallback cuando Gemini falla.
     """
+    if x_service_token != EXPECTED_TOKEN: raise HTTPException(401, "Invalid Service Token")
     messages = [
         {
             "role": "system",
+            "content": "You are a specialized JSON generator. You output ONLY valid raw JSON arrays. No Markdown. No introduction."
         },
         {
             "role": "user",
             "content": (
                 f"Create a curriculum list of exactly {req.subtopic_count} distinct subtopics "
+                f"for the subject: '{req.topic}'.\n"
+                "Format: [\"Subtopic 1\", \"Subtopic 2\", ...]"
             )
         }
     ]
         response = gen_client.chat_completion(
             messages=messages,
             max_tokens=1024,
+            temperature=0.6 # Temp baja para consistencia
         )
         content = response.choices[0].message.content
         subtopics = extract_json_array(content)
         if not isinstance(subtopics, list):
+            raise ValueError("Model did not return a list")
         return {"subtopics": subtopics[:req.subtopic_count], "model": MODEL_ID}
     except Exception as e:
         print(f"Architect Error: {e}")
+        # Retornamos error 500 para que el backend local use su propio fallback final
         raise HTTPException(500, f"Architect failed: {str(e)}")
 @app.post("/generate_deck")
 def generate_deck(req: DeckRequest, x_service_token: str = Header(None)):
+    if x_service_token != EXPECTED_TOKEN: raise HTTPException(401, "Invalid Service Token")
     field_names = [f.get('name', 'Field') for f in req.fields]
     messages = [
+        {
+            "role": "system",
+            "content": "You are a flashcard generator. Output ONLY a valid JSON Array. No formatting blocks."
+        },
+        {
+            "role": "user",
+            "content": (
+                f"Generate {req.count} flashcards for topic: '{req.topic}'.\n"
+                f"Required Fields: {', '.join(field_names)}\n"
+                "Example: [{\"Front\": \"Question...\", \"Back\": \"Answer...\"}]"
+            )
+        }
     ]
     try:
         cards = extract_json_array(content)
         return {"cards": cards, "model_used": MODEL_ID}
     except Exception as e:
+        print(f"Gen Deck Error: {e}")
         raise HTTPException(500, str(e))