Spaces:

habulaj
/

newapi-clone

Paused

App Files Files Community

habulaj commited on Aug 27, 2025

Commit

76cb254

verified ·

1 Parent(s): 330cc3d

Update routers/inference_createposter.py

Browse files

Files changed (1) hide show

routers/inference_createposter.py +88 -29

routers/inference_createposter.py CHANGED Viewed

@@ -1,5 +1,7 @@
 import os
 import logging
 from urllib.parse import urlencode, quote, parse_qs, urlparse, urlunparse
 from fastapi import APIRouter, HTTPException
 from pydantic import BaseModel
@@ -18,6 +20,56 @@ class PosterResponse(BaseModel):
     result: dict
     urls: list
 def fix_citation_quotes(citation_text: str) -> str:
     """
     Corrige as aspas no texto de citação:
@@ -27,15 +79,14 @@ def fix_citation_quotes(citation_text: str) -> str:
     """
     if not citation_text or citation_text.strip() == "":
         return citation_text
     text = citation_text.strip()
     # Remover todas as tags HTML
-    import re
     text = re.sub(r'<[^>]+>', '', text)
     # Verificar se já tem as aspas corretas
-    if text.startswith('“') and text.endswith('”'):
         return text
     # Remover aspas existentes do início e fim
@@ -50,7 +101,7 @@ def fix_citation_quotes(citation_text: str) -> str:
         text = text[:-1]
     # Adicionar as aspas corretas
-    return f"“{text.strip()}”"
 def clean_text_content_for_text_param(text: str) -> str:
     """
@@ -62,12 +113,9 @@ def clean_text_content_for_text_param(text: str) -> str:
     if not text:
         return text
-    import re
     # Primeiro, resolver conflitos de tags aninhadas - priorizar a segunda (mais interna)
     # <strong><em>conteúdo</em></strong> -> <em>conteúdo</em>
     text = re.sub(r'<strong>\s*<em>(.*?)</em>\s*</strong>', r'<em>\1</em>', text)
     # <em><strong>conteúdo</strong></em> -> <strong>conteúdo</strong>
     text = re.sub(r'<em>\s*<strong>(.*?)</strong>\s*</em>', r'<strong>\1</strong>', text)
@@ -84,8 +132,6 @@ def clean_text_content_remove_all_tags(text: str) -> str:
     if not text:
         return text
-    import re
     # Remove TODAS as tags HTML usando regex mais ampla
     text = re.sub(r'<[^>]*>', '', text)
@@ -143,14 +189,14 @@ def fix_url_citation(url: str) -> str:
         # Reconstruir a query string
         new_query = urlencode(
-            {k: v[0] if isinstance(v, list) and len(v) == 1 else v for k, v in query_params.items()},
             quote_via=quote
         )
         # Reconstruir a URL
         new_parsed_url = parsed_url._replace(query=new_query)
         return urlunparse(new_parsed_url)
     except Exception as e:
         logger.warning(f"Erro ao processar URL para correção de texto: {e}")
         return url
@@ -172,7 +218,6 @@ def format_url(base_url: str, endpoint: str, params: dict) -> str:
     # Construir query string
     query_string = urlencode(url_params, quote_via=quote)
     return f"{full_url}?{query_string}"
 def generate_urls_from_result(result: dict, base_url: str = "https://habulaj-newapi-clone.hf.space") -> list:
@@ -193,8 +238,8 @@ def generate_urls_from_result(result: dict, base_url: str = "https://habulaj-new
         # Adicionar URL da capa
         if "cover" in result:
             cover_url = format_url(
-                base_url,
-                result["cover"]["endpoint"],
                 result["cover"]["params"]
             )
             # Corrigir citation na URL se presente
@@ -579,9 +624,9 @@ Atenção: este artigo contém spoilers importantes sobre o enredo e o final do
             contents=contents,
             config=config
         )
         logger.info("Resposta do modelo recebida com sucesso")
         # Extrair texto da resposta
         response_text = ""
         if hasattr(response, 'text') and response.text:
@@ -593,34 +638,48 @@ Atenção: este artigo contém spoilers importantes sobre o enredo e o final do
                         for part in candidate.content.parts:
                             if hasattr(part, 'text') and part.text:
                                 response_text += part.text
         if not response_text or response_text.strip() == "":
             logger.error("Resposta do modelo está vazia")
             raise HTTPException(
-                status_code=500,
                 detail="Modelo não retornou conteúdo válido"
             )
         # Parse do JSON
-        import json
         try:
-            result_json = json.loads(response_text)
         except json.JSONDecodeError as e:
             logger.error(f"Erro ao fazer parse do JSON: {e}")
-            logger.error(f"Resposta recebida: {response_text}")
-            raise HTTPException(
-                status_code=500,
-                detail=f"Resposta do modelo não é um JSON válido: {str(e)}"
-            )
         # Gerar URLs formatadas
         formatted_urls = generate_urls_from_result(result_json)
         logger.info("Processamento concluído com sucesso")
         logger.info(f"URLs geradas: {formatted_urls}")
         return PosterResponse(result=result_json, urls=formatted_urls)
     except HTTPException:
         raise
     except Exception as e:

 import os
 import logging
+import re
+import json
 from urllib.parse import urlencode, quote, parse_qs, urlparse, urlunparse
 from fastapi import APIRouter, HTTPException
 from pydantic import BaseModel
     result: dict
     urls: list
+def clean_json_string(json_string: str) -> str:
+    """
+    Remove caracteres de controle inválidos do JSON antes do parse.
+    """
+    if not json_string:
+        return json_string
+    # Remove caracteres de controle (exceto \t, \n, \r que são válidos em JSON)
+    # mas precisamos escapar corretamente dentro das strings
+    cleaned = ""
+    i = 0
+    in_string = False
+    escape_next = False
+    while i < len(json_string):
+        char = json_string[i]
+        if escape_next:
+            # Se o caractere anterior foi \, adiciona este caractere escapado
+            cleaned += char
+            escape_next = False
+        elif char == '\\' and in_string:
+            # Caractere de escape dentro de string
+            cleaned += char
+            escape_next = True
+        elif char == '"' and not escape_next:
+            # Início ou fim de string (se não estiver escapado)
+            cleaned += char
+            in_string = not in_string
+        elif in_string:
+            # Dentro de string - tratar caracteres especiais
+            if ord(char) < 32 and char not in ['\t']:  # Remove controles exceto tab
+                if char == '\n':
+                    cleaned += '\\n'  # Escapa quebra de linha
+                elif char == '\r':
+                    cleaned += '\\r'  # Escapa carriage return
+                else:
+                    # Remove outros caracteres de controle
+                    pass
+            else:
+                cleaned += char
+        else:
+            # Fora de string - remove apenas caracteres de controle problemáticos
+            if ord(char) >= 32 or char in ['\t', '\n', '\r', ' ']:
+                cleaned += char
+        i += 1
+    return cleaned
 def fix_citation_quotes(citation_text: str) -> str:
     """
     Corrige as aspas no texto de citação:
     """
     if not citation_text or citation_text.strip() == "":
         return citation_text
     text = citation_text.strip()
     # Remover todas as tags HTML
     text = re.sub(r'<[^>]+>', '', text)
     # Verificar se já tem as aspas corretas
+    if text.startswith('"') and text.endswith('"'):
         return text
     # Remover aspas existentes do início e fim
         text = text[:-1]
     # Adicionar as aspas corretas
+    return f""{text.strip()}""
 def clean_text_content_for_text_param(text: str) -> str:
     """
     if not text:
         return text
     # Primeiro, resolver conflitos de tags aninhadas - priorizar a segunda (mais interna)
     # <strong><em>conteúdo</em></strong> -> <em>conteúdo</em>
     text = re.sub(r'<strong>\s*<em>(.*?)</em>\s*</strong>', r'<em>\1</em>', text)
     # <em><strong>conteúdo</strong></em> -> <strong>conteúdo</strong>
     text = re.sub(r'<em>\s*<strong>(.*?)</strong>\s*</em>', r'<strong>\1</strong>', text)
     if not text:
         return text
     # Remove TODAS as tags HTML usando regex mais ampla
     text = re.sub(r'<[^>]*>', '', text)
         # Reconstruir a query string
         new_query = urlencode(
+            {k: v[0] if isinstance(v, list) and len(v) == 1 else v for k, v in query_params.items()},
             quote_via=quote
         )
         # Reconstruir a URL
         new_parsed_url = parsed_url._replace(query=new_query)
         return urlunparse(new_parsed_url)
     except Exception as e:
         logger.warning(f"Erro ao processar URL para correção de texto: {e}")
         return url
     # Construir query string
     query_string = urlencode(url_params, quote_via=quote)
     return f"{full_url}?{query_string}"
 def generate_urls_from_result(result: dict, base_url: str = "https://habulaj-newapi-clone.hf.space") -> list:
         # Adicionar URL da capa
         if "cover" in result:
             cover_url = format_url(
+                base_url,
+                result["cover"]["endpoint"],
                 result["cover"]["params"]
             )
             # Corrigir citation na URL se presente
             contents=contents,
             config=config
         )
         logger.info("Resposta do modelo recebida com sucesso")
         # Extrair texto da resposta
         response_text = ""
         if hasattr(response, 'text') and response.text:
                         for part in candidate.content.parts:
                             if hasattr(part, 'text') and part.text:
                                 response_text += part.text
         if not response_text or response_text.strip() == "":
             logger.error("Resposta do modelo está vazia")
             raise HTTPException(
+                status_code=500,
                 detail="Modelo não retornou conteúdo válido"
             )
+        # Limpar caracteres de controle antes do parse
+        clean_response = clean_json_string(response_text)
         # Parse do JSON
         try:
+            result_json = json.loads(clean_response)
         except json.JSONDecodeError as e:
             logger.error(f"Erro ao fazer parse do JSON: {e}")
+            logger.error(f"Resposta original: {response_text}")
+            logger.error(f"Resposta limpa: {clean_response}")
+            # Tentar uma limpeza mais agressiva como fallback
+            try:
+                # Remove quebras de linha e espaços extras
+                fallback_clean = re.sub(r'\s+', ' ', response_text.strip())
+                # Remove caracteres de controle
+                fallback_clean = ''.join(char for char in fallback_clean if ord(char) >= 32 or char in [' ', '\t'])
+                result_json = json.loads(fallback_clean)
+                logger.info("Parse bem-sucedido com limpeza de fallback")
+            except json.JSONDecodeError as fallback_error:
+                logger.error(f"Erro no fallback também: {fallback_error}")
+                raise HTTPException(
+                    status_code=500,
+                    detail=f"Resposta do modelo não é um JSON válido: {str(e)}"
+                )
         # Gerar URLs formatadas
         formatted_urls = generate_urls_from_result(result_json)
         logger.info("Processamento concluído com sucesso")
         logger.info(f"URLs geradas: {formatted_urls}")
         return PosterResponse(result=result_json, urls=formatted_urls)
     except HTTPException:
         raise
     except Exception as e: