Spaces:
Sleeping
Sleeping
remove debug
Browse files- utils/generator.py +0 -25
utils/generator.py
CHANGED
|
@@ -62,46 +62,21 @@ def _parse_citations(response: str) -> List[int]:
|
|
| 62 |
matches = re.findall(citation_pattern, response)
|
| 63 |
citation_numbers = sorted(list(set(int(match) for match in matches)))
|
| 64 |
|
| 65 |
-
# Debug logging
|
| 66 |
-
logger.info(f"=== CITATION PARSING DEBUG ===")
|
| 67 |
-
logger.info(f"Response text length: {len(response)}")
|
| 68 |
-
logger.info(f"Found citation matches: {matches}")
|
| 69 |
-
logger.info(f"Parsed citation numbers: {citation_numbers}")
|
| 70 |
-
|
| 71 |
return citation_numbers
|
| 72 |
|
| 73 |
def _extract_sources(processed_results: List[Dict[str, Any]], cited_numbers: List[int]) -> List[Dict[str, Any]]:
|
| 74 |
"""Extract sources that were cited in the response"""
|
| 75 |
-
# Debug logging - show raw sources data
|
| 76 |
-
print("=== RAW SOURCES DATA ===")
|
| 77 |
-
print(f"Total sources: {len(processed_results)}")
|
| 78 |
-
print("All available sources:")
|
| 79 |
-
for i, source in enumerate(processed_results):
|
| 80 |
-
print(f" [{i+1}] {json.dumps(source, indent=2)}")
|
| 81 |
-
|
| 82 |
-
print(f"Cited numbers: {cited_numbers}")
|
| 83 |
-
|
| 84 |
if not cited_numbers:
|
| 85 |
-
print("No citations found - returning empty sources list")
|
| 86 |
return []
|
| 87 |
|
| 88 |
cited_sources = []
|
| 89 |
for citation_num in cited_numbers:
|
| 90 |
source_index = citation_num - 1
|
| 91 |
-
print(f"Processing citation [{citation_num}] -> source_index: {source_index}")
|
| 92 |
|
| 93 |
if 0 <= source_index < len(processed_results):
|
| 94 |
source = processed_results[source_index].copy() # Make copy to avoid modifying original
|
| 95 |
source['_citation_number'] = citation_num # Preserve original citation number
|
| 96 |
cited_sources.append(source)
|
| 97 |
-
print(f"✓ Added source {citation_num}")
|
| 98 |
-
else:
|
| 99 |
-
print(f"✗ Citation [{citation_num}] is out of range! source_index {source_index} not in range [0, {len(processed_results)-1}]")
|
| 100 |
-
|
| 101 |
-
print(f"Final filtered sources count: {len(cited_sources)}")
|
| 102 |
-
print("Filtered sources:")
|
| 103 |
-
for i, source in enumerate(cited_sources):
|
| 104 |
-
print(f" [{i+1}] {json.dumps(source, indent=2)}")
|
| 105 |
|
| 106 |
return cited_sources
|
| 107 |
|
|
|
|
| 62 |
matches = re.findall(citation_pattern, response)
|
| 63 |
citation_numbers = sorted(list(set(int(match) for match in matches)))
|
| 64 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
return citation_numbers
|
| 66 |
|
| 67 |
def _extract_sources(processed_results: List[Dict[str, Any]], cited_numbers: List[int]) -> List[Dict[str, Any]]:
|
| 68 |
"""Extract sources that were cited in the response"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
if not cited_numbers:
|
|
|
|
| 70 |
return []
|
| 71 |
|
| 72 |
cited_sources = []
|
| 73 |
for citation_num in cited_numbers:
|
| 74 |
source_index = citation_num - 1
|
|
|
|
| 75 |
|
| 76 |
if 0 <= source_index < len(processed_results):
|
| 77 |
source = processed_results[source_index].copy() # Make copy to avoid modifying original
|
| 78 |
source['_citation_number'] = citation_num # Preserve original citation number
|
| 79 |
cited_sources.append(source)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
|
| 81 |
return cited_sources
|
| 82 |
|