SMForomir commited on
Commit
e366a24
verified
1 Parent(s): c34a524

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +50 -39
main.py CHANGED
@@ -9,20 +9,23 @@ from huggingface_hub import InferenceClient
9
  app = FastAPI()
10
 
11
  # --- CONFIGURACI脫N ---
12
- EXPECTED_TOKEN = os.getenv("SERVICE_SECRET", "pon_aqui_tu_token_secreto")
13
  HF_TOKEN = os.getenv("HF_TOKEN")
14
 
15
  # 1. CROSS ENCODER (Local - CPU)
 
16
  print("Cargando CrossEncoder...")
17
  similarity_model = CrossEncoder("cross-encoder/stsb-distilroberta-base")
18
  print("CrossEncoder cargado.")
19
 
20
  # 2. GENERADOR (API Hugging Face)
 
 
21
  MODEL_ID = "Qwen/Qwen2.5-72B-Instruct"
22
  print(f"Conectando a Inference API con modelo: {MODEL_ID}...")
23
  gen_client = InferenceClient(model=MODEL_ID, token=HF_TOKEN)
24
 
25
- # --- UTILIDADES JSON (Tus funciones existentes) ---
26
  def fix_truncated_json(json_str):
27
  """Intenta cerrar corchetes y llaves si el modelo se ha cortado."""
28
  json_str = json_str.strip()
@@ -35,14 +38,15 @@ def fix_truncated_json(json_str):
35
 
36
  def extract_json_array(text):
37
  """Limpia markdown y extrae el JSON."""
38
- text = text.replace("```json", "").replace("```", "").strip()
 
 
 
 
39
  start_idx_arr = text.find('[')
40
- start_idx_obj = text.find('{')
41
 
42
- if start_idx_arr != -1 and (start_idx_obj == -1 or start_idx_arr < start_idx_obj):
43
  candidate = text[start_idx_arr:]
44
- elif start_idx_obj != -1:
45
- candidate = text[start_idx_obj:]
46
  else:
47
  return text
48
 
@@ -53,11 +57,15 @@ def extract_json_array(text):
53
  try:
54
  return json.loads(fixed_candidate)
55
  except:
56
- last_char = ']' if candidate.strip().startswith('[') else '}'
57
- last_idx = candidate.rfind(last_char)
58
  if last_idx != -1:
59
- return json.loads(candidate[:last_idx+1])
60
- raise ValueError("Could not extract or repair valid JSON")
 
 
 
 
61
 
62
  # --- MODELOS Pydantic ---
63
  class VerifyRequest(BaseModel):
@@ -69,59 +77,52 @@ class DeckRequest(BaseModel):
69
  count: int
70
  fields: list
71
 
72
- class ArchitectRequest(BaseModel): # <--- NUEVO MODELO
73
  topic: str
74
  subtopic_count: int
75
 
76
- # --- ENDPOINTS EXISTENTES (Verify) ---
77
  @app.get("/")
78
  def health():
79
  return {"status": "ok", "service": "InDeck Brain", "model": MODEL_ID}
80
 
81
  @app.post("/verify")
82
  def verify_answer(req: VerifyRequest, x_service_token: str = Header(None)):
83
- if x_service_token != EXPECTED_TOKEN: raise HTTPException(401)
84
  try:
85
  scores = similarity_model.predict([(req.user_input, req.correct_answer)])
86
  score = float(scores[0])
87
  status = "WRONG"
88
- final_score = score
89
- method = "Semantic-CrossEncoder"
90
-
91
- if score >= 0.85:
92
  status = "CORRECT"
93
- elif score >= 0.75:
94
  status = "TYPO"
95
 
96
- # Fallback a LLM si hay duda
97
- if status != "CORRECT" and score > 0.4: # Solo preguntamos si no es un disparate total
98
- # (Aqu铆 ir铆a tu l贸gica verify_with_llm que ya ten铆as implementada)
99
- pass
100
-
101
- return {"status": status, "score": final_score, "method": method}
102
  except Exception as e:
 
103
  raise HTTPException(500, str(e))
104
 
105
- # --- ENDPOINTS NUEVOS PARA GENERACI脫N ---
106
-
107
  @app.post("/architect")
108
  def architect_plan(req: ArchitectRequest, x_service_token: str = Header(None)):
109
  """
110
- ROL ARQUITECTO: Genera la lista de subtemas cuando Gemini falla.
111
  """
112
- if x_service_token != EXPECTED_TOKEN: raise HTTPException(401)
113
 
114
  messages = [
115
  {
116
  "role": "system",
117
- "content": "You are an expert curriculum designer. Output ONLY a JSON Array of strings."
118
  },
119
  {
120
  "role": "user",
121
  "content": (
122
  f"Create a curriculum list of exactly {req.subtopic_count} distinct subtopics "
123
- f"to cover the subject: '{req.topic}'.\n"
124
- "Return valid JSON array of strings. Example: [\"Topic A\", \"Topic B\"]"
125
  )
126
  }
127
  ]
@@ -130,29 +131,38 @@ def architect_plan(req: ArchitectRequest, x_service_token: str = Header(None)):
130
  response = gen_client.chat_completion(
131
  messages=messages,
132
  max_tokens=1024,
133
- temperature=0.7
134
  )
135
  content = response.choices[0].message.content
136
  subtopics = extract_json_array(content)
137
 
138
  if not isinstance(subtopics, list):
139
- # Qwen a veces es verborr谩gico, intentamos limpiar de nuevo
140
- raise ValueError("Output is not a list")
141
 
142
  return {"subtopics": subtopics[:req.subtopic_count], "model": MODEL_ID}
143
  except Exception as e:
144
  print(f"Architect Error: {e}")
 
145
  raise HTTPException(500, f"Architect failed: {str(e)}")
146
 
147
  @app.post("/generate_deck")
148
  def generate_deck(req: DeckRequest, x_service_token: str = Header(None)):
149
- if x_service_token != EXPECTED_TOKEN: raise HTTPException(401)
150
 
151
- # Tu c贸digo existente de generaci贸n (Worker)
152
  field_names = [f.get('name', 'Field') for f in req.fields]
153
  messages = [
154
- {"role": "system", "content": "You are a specialized JSON generator."},
155
- {"role": "user", "content": f"Topic: {req.topic}\nCount: {req.count}\nFields: {', '.join(field_names)}\nOutput ONLY JSON Array."}
 
 
 
 
 
 
 
 
 
 
156
  ]
157
 
158
  try:
@@ -161,4 +171,5 @@ def generate_deck(req: DeckRequest, x_service_token: str = Header(None)):
161
  cards = extract_json_array(content)
162
  return {"cards": cards, "model_used": MODEL_ID}
163
  except Exception as e:
 
164
  raise HTTPException(500, str(e))
 
9
  app = FastAPI()
10
 
11
  # --- CONFIGURACI脫N ---
12
+ EXPECTED_TOKEN = os.getenv("SERVICE_SECRET", "tu_token_super_secreto_aqui")
13
  HF_TOKEN = os.getenv("HF_TOKEN")
14
 
15
  # 1. CROSS ENCODER (Local - CPU)
16
+ # Carga al inicio. Si usas HF Space con CPU b谩sica, esto tardar谩 unos segundos al arrancar.
17
  print("Cargando CrossEncoder...")
18
  similarity_model = CrossEncoder("cross-encoder/stsb-distilroberta-base")
19
  print("CrossEncoder cargado.")
20
 
21
  # 2. GENERADOR (API Hugging Face)
22
+ # Qwen 2.5 72B es excelente. Si notas Timeouts (504), cambia a:
23
+ # "Qwen/Qwen2.5-Coder-32B-Instruct" o "mistralai/Mistral-7B-Instruct-v0.3"
24
  MODEL_ID = "Qwen/Qwen2.5-72B-Instruct"
25
  print(f"Conectando a Inference API con modelo: {MODEL_ID}...")
26
  gen_client = InferenceClient(model=MODEL_ID, token=HF_TOKEN)
27
 
28
+ # --- UTILIDADES JSON ---
29
  def fix_truncated_json(json_str):
30
  """Intenta cerrar corchetes y llaves si el modelo se ha cortado."""
31
  json_str = json_str.strip()
 
38
 
39
  def extract_json_array(text):
40
  """Limpia markdown y extrae el JSON."""
41
+ # Limpieza agresiva de bloques de c贸digo
42
+ text = re.sub(r'```json\s*', '', text)
43
+ text = re.sub(r'```\s*', '', text)
44
+ text = text.strip()
45
+
46
  start_idx_arr = text.find('[')
 
47
 
48
+ if start_idx_arr != -1:
49
  candidate = text[start_idx_arr:]
 
 
50
  else:
51
  return text
52
 
 
57
  try:
58
  return json.loads(fixed_candidate)
59
  except:
60
+ # 脷ltimo intento: cortar hasta el 煤ltimo cierre
61
+ last_idx = candidate.rfind(']')
62
  if last_idx != -1:
63
+ try:
64
+ return json.loads(candidate[:last_idx+1])
65
+ except:
66
+ pass
67
+ print(f"FAILED JSON EXTRACTION: {text[:100]}...") # Log para debug
68
+ return [] # Retornar lista vac铆a es mejor que explotar
69
 
70
  # --- MODELOS Pydantic ---
71
  class VerifyRequest(BaseModel):
 
77
  count: int
78
  fields: list
79
 
80
+ class ArchitectRequest(BaseModel):
81
  topic: str
82
  subtopic_count: int
83
 
84
+ # --- ENDPOINTS ---
85
  @app.get("/")
86
  def health():
87
  return {"status": "ok", "service": "InDeck Brain", "model": MODEL_ID}
88
 
89
  @app.post("/verify")
90
  def verify_answer(req: VerifyRequest, x_service_token: str = Header(None)):
91
+ if x_service_token != EXPECTED_TOKEN: raise HTTPException(401, "Invalid Service Token")
92
  try:
93
  scores = similarity_model.predict([(req.user_input, req.correct_answer)])
94
  score = float(scores[0])
95
  status = "WRONG"
96
+
97
+ # Umbrales ajustados para l贸gica difusa
98
+ if score >= 0.82: # Un poco m谩s permisivo que 0.85
 
99
  status = "CORRECT"
100
+ elif score >= 0.70:
101
  status = "TYPO"
102
 
103
+ return {"status": status, "score": score, "method": "Semantic-CrossEncoder"}
 
 
 
 
 
104
  except Exception as e:
105
+ print(f"Verify Error: {e}")
106
  raise HTTPException(500, str(e))
107
 
 
 
108
  @app.post("/architect")
109
  def architect_plan(req: ArchitectRequest, x_service_token: str = Header(None)):
110
  """
111
+ Genera subtemas. Usado como fallback cuando Gemini falla.
112
  """
113
+ if x_service_token != EXPECTED_TOKEN: raise HTTPException(401, "Invalid Service Token")
114
 
115
  messages = [
116
  {
117
  "role": "system",
118
+ "content": "You are a specialized JSON generator. You output ONLY valid raw JSON arrays. No Markdown. No introduction."
119
  },
120
  {
121
  "role": "user",
122
  "content": (
123
  f"Create a curriculum list of exactly {req.subtopic_count} distinct subtopics "
124
+ f"for the subject: '{req.topic}'.\n"
125
+ "Format: [\"Subtopic 1\", \"Subtopic 2\", ...]"
126
  )
127
  }
128
  ]
 
131
  response = gen_client.chat_completion(
132
  messages=messages,
133
  max_tokens=1024,
134
+ temperature=0.6 # Temp baja para consistencia
135
  )
136
  content = response.choices[0].message.content
137
  subtopics = extract_json_array(content)
138
 
139
  if not isinstance(subtopics, list):
140
+ raise ValueError("Model did not return a list")
 
141
 
142
  return {"subtopics": subtopics[:req.subtopic_count], "model": MODEL_ID}
143
  except Exception as e:
144
  print(f"Architect Error: {e}")
145
+ # Retornamos error 500 para que el backend local use su propio fallback final
146
  raise HTTPException(500, f"Architect failed: {str(e)}")
147
 
148
  @app.post("/generate_deck")
149
  def generate_deck(req: DeckRequest, x_service_token: str = Header(None)):
150
+ if x_service_token != EXPECTED_TOKEN: raise HTTPException(401, "Invalid Service Token")
151
 
 
152
  field_names = [f.get('name', 'Field') for f in req.fields]
153
  messages = [
154
+ {
155
+ "role": "system",
156
+ "content": "You are a flashcard generator. Output ONLY a valid JSON Array. No formatting blocks."
157
+ },
158
+ {
159
+ "role": "user",
160
+ "content": (
161
+ f"Generate {req.count} flashcards for topic: '{req.topic}'.\n"
162
+ f"Required Fields: {', '.join(field_names)}\n"
163
+ "Example: [{\"Front\": \"Question...\", \"Back\": \"Answer...\"}]"
164
+ )
165
+ }
166
  ]
167
 
168
  try:
 
171
  cards = extract_json_array(content)
172
  return {"cards": cards, "model_used": MODEL_ID}
173
  except Exception as e:
174
+ print(f"Gen Deck Error: {e}")
175
  raise HTTPException(500, str(e))