|
|
"""
|
|
|
Simplified demo of document text extraction without heavy ML dependencies.
|
|
|
This demonstrates the core workflow and patterns without requiring PyTorch/Transformers.
|
|
|
"""
|
|
|
|
|
|
import json
|
|
|
import re
|
|
|
from datetime import datetime
|
|
|
from pathlib import Path
|
|
|
from typing import Dict, List, Tuple, Any
|
|
|
|
|
|
|
|
|
class SimpleDocumentProcessor:
|
|
|
"""Simplified document processor for demo purposes."""
|
|
|
|
|
|
def __init__(self):
|
|
|
"""Initialize with regex patterns for entity extraction."""
|
|
|
self.entity_patterns = {
|
|
|
'NAME': [
|
|
|
r'\b(?:Mr\.|Mrs\.|Ms\.|Dr\.)\s+([A-Z][a-z]+ [A-Z][a-z]+)\b',
|
|
|
r'\b([A-Z][a-z]+ [A-Z][a-z]+)\b',
|
|
|
],
|
|
|
'DATE': [
|
|
|
r'\b(\d{1,2}[/\-]\d{1,2}[/\-]\d{2,4})\b',
|
|
|
r'\b(\d{4}[/\-]\d{1,2}[/\-]\d{1,2})\b',
|
|
|
r'\b((?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]*\s+\d{1,2},?\s+\d{2,4})\b'
|
|
|
],
|
|
|
'INVOICE_NO': [
|
|
|
r'(?:Invoice\s+(?:No|Number|#):\s*)?([A-Z]{2,4}[-]?\d{3,6})',
|
|
|
r'(INV[-]?\d{3,6})',
|
|
|
r'(BL[-]?\d{3,6})',
|
|
|
r'(REC[-]?\d{3,6})',
|
|
|
],
|
|
|
'AMOUNT': [
|
|
|
r'(\$\s*\d{1,3}(?:,\d{3})*(?:\.\d{2})?)',
|
|
|
r'(\d{1,3}(?:,\d{3})*(?:\.\d{2})?\s*(?:USD|EUR|GBP))',
|
|
|
],
|
|
|
'PHONE': [
|
|
|
r'(\+?\d{1,3}[-.\s]?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4})',
|
|
|
r'(\(\d{3}\)\s*\d{3}-\d{4})',
|
|
|
],
|
|
|
'EMAIL': [
|
|
|
r'\b([A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,})\b',
|
|
|
]
|
|
|
}
|
|
|
|
|
|
def extract_entities(self, text: str) -> List[Dict[str, Any]]:
|
|
|
"""Extract entities from text using regex patterns."""
|
|
|
entities = []
|
|
|
|
|
|
for entity_type, patterns in self.entity_patterns.items():
|
|
|
for pattern in patterns:
|
|
|
matches = re.finditer(pattern, text, re.IGNORECASE)
|
|
|
for match in matches:
|
|
|
entity_text = match.group(1) if match.groups() else match.group(0)
|
|
|
entities.append({
|
|
|
'entity': entity_type,
|
|
|
'text': entity_text.strip(),
|
|
|
'start': match.start(),
|
|
|
'end': match.end(),
|
|
|
'confidence': self.get_confidence_score(entity_type)
|
|
|
})
|
|
|
|
|
|
return entities
|
|
|
|
|
|
def get_confidence_score(self, entity_type: str) -> float:
|
|
|
"""Get confidence score for entity type."""
|
|
|
confidence_map = {
|
|
|
'NAME': 0.80,
|
|
|
'DATE': 0.85,
|
|
|
'AMOUNT': 0.85,
|
|
|
'INVOICE_NO': 0.90,
|
|
|
'EMAIL': 0.95,
|
|
|
'PHONE': 0.90,
|
|
|
'ADDRESS': 0.75
|
|
|
}
|
|
|
return confidence_map.get(entity_type, 0.70)
|
|
|
|
|
|
def create_structured_data(self, entities: List[Dict[str, Any]]) -> Dict[str, str]:
|
|
|
"""Create structured data from entities."""
|
|
|
structured = {}
|
|
|
|
|
|
|
|
|
entity_groups = {}
|
|
|
for entity in entities:
|
|
|
entity_type = entity['entity']
|
|
|
if entity_type not in entity_groups:
|
|
|
entity_groups[entity_type] = []
|
|
|
entity_groups[entity_type].append(entity)
|
|
|
|
|
|
|
|
|
for entity_type, group in entity_groups.items():
|
|
|
if group:
|
|
|
|
|
|
best_entity = max(group, key=lambda x: (x['confidence'], len(x['text'])))
|
|
|
|
|
|
|
|
|
field_mapping = {
|
|
|
'NAME': 'Name',
|
|
|
'DATE': 'Date',
|
|
|
'AMOUNT': 'Amount',
|
|
|
'INVOICE_NO': 'InvoiceNo',
|
|
|
'EMAIL': 'Email',
|
|
|
'PHONE': 'Phone',
|
|
|
'ADDRESS': 'Address'
|
|
|
}
|
|
|
|
|
|
field_name = field_mapping.get(entity_type, entity_type)
|
|
|
structured[field_name] = best_entity['text']
|
|
|
|
|
|
return structured
|
|
|
|
|
|
def process_document(self, text: str) -> Dict[str, Any]:
|
|
|
"""Process document text and extract information."""
|
|
|
entities = self.extract_entities(text)
|
|
|
structured_data = self.create_structured_data(entities)
|
|
|
|
|
|
return {
|
|
|
'text': text,
|
|
|
'entities': entities,
|
|
|
'structured_data': structured_data,
|
|
|
'entity_count': len(entities),
|
|
|
'entity_types': list(set(e['entity'] for e in entities))
|
|
|
}
|
|
|
|
|
|
|
|
|
def run_demo():
|
|
|
"""Run the simplified document extraction demo."""
|
|
|
|
|
|
print("SIMPLIFIED DOCUMENT TEXT EXTRACTION DEMO")
|
|
|
print("=" * 60)
|
|
|
print("This demo shows the core extraction logic using regex patterns")
|
|
|
print("(without the full ML pipeline for demonstration purposes)")
|
|
|
print()
|
|
|
|
|
|
|
|
|
processor = SimpleDocumentProcessor()
|
|
|
|
|
|
|
|
|
sample_documents = [
|
|
|
{
|
|
|
"name": "Invoice Example 1",
|
|
|
"text": "Invoice sent to Robert White on 15/09/2025 Invoice No: INV-1024 Amount: $1,250.00 Phone: (555) 123-4567 Email: robert.white@email.com"
|
|
|
},
|
|
|
{
|
|
|
"name": "Invoice Example 2",
|
|
|
"text": "Bill for Dr. Sarah Johnson dated March 10, 2025. Invoice Number: BL-2045. Total: $2,300.50 Email: sarah.johnson@email.com"
|
|
|
},
|
|
|
{
|
|
|
"name": "Receipt Example",
|
|
|
"text": "Receipt for Michael Brown Invoice: REC-3089 Date: 2025-04-22 Amount: $890.75 Contact: +1-555-987-6543"
|
|
|
},
|
|
|
{
|
|
|
"name": "Business Document",
|
|
|
"text": "Ms. Emma Wilson 456 Oak Street Payment due: January 15, 2025 Reference: INV-4567 Total: $1,750.25"
|
|
|
}
|
|
|
]
|
|
|
|
|
|
|
|
|
all_results = []
|
|
|
total_entities = 0
|
|
|
all_entity_types = set()
|
|
|
|
|
|
for i, doc in enumerate(sample_documents, 1):
|
|
|
print(f"\nDocument {i}: {doc['name']}")
|
|
|
print("-" * 50)
|
|
|
print(f"Text: {doc['text']}")
|
|
|
print()
|
|
|
|
|
|
|
|
|
result = processor.process_document(doc['text'])
|
|
|
all_results.append(result)
|
|
|
|
|
|
|
|
|
total_entities += result['entity_count']
|
|
|
all_entity_types.update(result['entity_types'])
|
|
|
|
|
|
print(f"Extraction Results:")
|
|
|
print(f" Found {result['entity_count']} entities")
|
|
|
print(f" Entity types: {', '.join(result['entity_types'])}")
|
|
|
|
|
|
|
|
|
if result['structured_data']:
|
|
|
print(f"\nStructured Information:")
|
|
|
for key, value in result['structured_data'].items():
|
|
|
print(f" {key}: {value}")
|
|
|
|
|
|
|
|
|
if result['entities']:
|
|
|
print(f"\nDetailed Entities:")
|
|
|
for entity in result['entities']:
|
|
|
print(f" {entity['entity']}: '{entity['text']}' (confidence: {entity['confidence']*100:.0f}%)")
|
|
|
|
|
|
|
|
|
output_dir = Path("results")
|
|
|
output_dir.mkdir(exist_ok=True)
|
|
|
output_file = output_dir / "demo_extraction_results.json"
|
|
|
|
|
|
|
|
|
output_data = {
|
|
|
'demo_info': {
|
|
|
'timestamp': datetime.now().isoformat(),
|
|
|
'documents_processed': len(sample_documents),
|
|
|
'total_entities_found': total_entities,
|
|
|
'unique_entity_types': sorted(list(all_entity_types))
|
|
|
},
|
|
|
'results': all_results
|
|
|
}
|
|
|
|
|
|
|
|
|
with open(output_file, 'w', encoding='utf-8') as f:
|
|
|
json.dump(output_data, f, indent=2, ensure_ascii=False)
|
|
|
|
|
|
print(f"\nResults saved to: {output_file}")
|
|
|
|
|
|
print(f"\nDemo Summary:")
|
|
|
print(f" Documents processed: {len(sample_documents)}")
|
|
|
print(f" Total entities found: {total_entities}")
|
|
|
print(f" Total structured fields: {sum(len(r['structured_data']) for r in all_results)}")
|
|
|
print(f" Unique entity types: {', '.join(sorted(all_entity_types))}")
|
|
|
|
|
|
print(f"\nDemo completed successfully!")
|
|
|
|
|
|
print(f"\nThis demonstrates the core extraction logic.")
|
|
|
print(f" The full system would add:")
|
|
|
print(f" - OCR for scanned documents")
|
|
|
print(f" - ML model (DistilBERT) for better accuracy")
|
|
|
print(f" - Web API for file uploads")
|
|
|
print(f" - Training pipeline for custom domains")
|
|
|
|
|
|
|
|
|
print(f"\nAPI FUNCTIONALITY SIMULATION")
|
|
|
print("=" * 40)
|
|
|
|
|
|
sample_text = "Invoice sent to John Doe on 01/15/2025 Invoice No: INV-1001 Amount: $1,500.00"
|
|
|
|
|
|
print('API Request (POST /extract-from-text):')
|
|
|
print(' {')
|
|
|
print(f' "text": "{sample_text}"')
|
|
|
print('}')
|
|
|
|
|
|
print(f"\nAPI Response:")
|
|
|
api_result = processor.process_document(sample_text)
|
|
|
|
|
|
api_response = {
|
|
|
"status": "success",
|
|
|
"data": {
|
|
|
"original_text": sample_text,
|
|
|
"entities": api_result['entities'],
|
|
|
"structured_data": api_result['structured_data'],
|
|
|
"processing_timestamp": datetime.now().isoformat(),
|
|
|
"total_entities_found": api_result['entity_count'],
|
|
|
"entity_types_found": api_result['entity_types']
|
|
|
}
|
|
|
}
|
|
|
|
|
|
print(json.dumps(api_response, indent=2))
|
|
|
|
|
|
print(f"\nTo run the full system:")
|
|
|
print(f" 1. Install ML dependencies: pip install torch transformers")
|
|
|
print(f" 2. Run training: python src/training_pipeline.py")
|
|
|
print(f" 3. Start API: python api/app.py")
|
|
|
print(f" 4. Open browser: http://localhost:8000")
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
run_demo()
|
|
|
"""Simplified document processor for demo purposes."""
|
|
|
|
|
|
def __init__(self):
|
|
|
"""Initialize with regex patterns for entity extraction."""
|
|
|
self.entity_patterns = {
|
|
|
'NAME': [
|
|
|
r'\b(?:Mr\.|Mrs\.|Ms\.|Dr\.)\s+([A-Z][a-z]+ [A-Z][a-z]+)\b',
|
|
|
r'\b([A-Z][a-z]+ [A-Z][a-z]+)\b',
|
|
|
],
|
|
|
'DATE': [
|
|
|
r'\b(\d{1,2}[/\-]\d{1,2}[/\-]\d{2,4})\b',
|
|
|
r'\b(\d{4}[/\-]\d{1,2}[/\-]\d{1,2})\b',
|
|
|
r'\b((?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]*\s+\d{1,2},?\s+\d{2,4})\b'
|
|
|
],
|
|
|
'INVOICE_NO': [
|
|
|
r'(?:Invoice\s+(?:No|Number|#):\s*)?([A-Z]{2,4}[-]?\d{3,6})',
|
|
|
r'(INV[-]?\d{3,6})',
|
|
|
r'(BL[-]?\d{3,6})',
|
|
|
r'(REC[-]?\d{3,6})',
|
|
|
],
|
|
|
'AMOUNT': [
|
|
|
r'(\$\s*\d{1,3}(?:,\d{3})*(?:\.\d{2})?)',
|
|
|
r'(\d{1,3}(?:,\d{3})*(?:\.\d{2})?\s*(?:USD|EUR|GBP))',
|
|
|
],
|
|
|
'PHONE': [
|
|
|
r'(\+?\d{1,3}[-.\s]?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4})',
|
|
|
r'(\(\d{3}\)\s*\d{3}-\d{4})',
|
|
|
],
|
|
|
'EMAIL': [
|
|
|
r'\b([A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,})\b',
|
|
|
]
|
|
|
}
|
|
|
|
|
|
def extract_entities(self, text: str) -> List[Dict[str, Any]]:
|
|
|
"""Extract entities from text using regex patterns."""
|
|
|
entities = []
|
|
|
|
|
|
for entity_type, patterns in self.entity_patterns.items():
|
|
|
for pattern in patterns:
|
|
|
matches = re.finditer(pattern, text, re.IGNORECASE)
|
|
|
for match in matches:
|
|
|
entity_text = match.group(1) if match.groups() else match.group(0)
|
|
|
|
|
|
|
|
|
start_pos = match.start()
|
|
|
end_pos = match.end()
|
|
|
|
|
|
|
|
|
confidence = self._calculate_confidence(entity_type, entity_text, pattern)
|
|
|
|
|
|
entity = {
|
|
|
'entity': entity_type,
|
|
|
'text': entity_text.strip(),
|
|
|
'start': start_pos,
|
|
|
'end': end_pos,
|
|
|
'confidence': confidence
|
|
|
}
|
|
|
|
|
|
|
|
|
if not self._is_duplicate(entity, entities):
|
|
|
entities.append(entity)
|
|
|
|
|
|
return entities
|
|
|
|
|
|
def _calculate_confidence(self, entity_type: str, text: str, pattern: str) -> float:
|
|
|
"""Calculate confidence score for extracted entity."""
|
|
|
base_confidence = 0.8
|
|
|
|
|
|
|
|
|
if entity_type == 'EMAIL' and '@' in text:
|
|
|
base_confidence = 0.95
|
|
|
elif entity_type == 'PHONE' and len(re.sub(r'[^\d]', '', text)) >= 10:
|
|
|
base_confidence = 0.90
|
|
|
elif entity_type == 'AMOUNT' and '$' in text:
|
|
|
base_confidence = 0.85
|
|
|
elif entity_type == 'DATE':
|
|
|
base_confidence = 0.85
|
|
|
elif entity_type == 'INVOICE_NO' and any(prefix in text.upper() for prefix in ['INV', 'BL', 'REC']):
|
|
|
base_confidence = 0.90
|
|
|
|
|
|
return min(base_confidence, 0.99)
|
|
|
|
|
|
def _is_duplicate(self, new_entity: Dict, existing_entities: List[Dict]) -> bool:
|
|
|
"""Check if entity is duplicate."""
|
|
|
for existing in existing_entities:
|
|
|
if (existing['entity'] == new_entity['entity'] and
|
|
|
existing['text'].lower() == new_entity['text'].lower()):
|
|
|
return True
|
|
|
return False
|
|
|
|
|
|
def postprocess_entities(self, entities: List[Dict], text: str) -> Dict[str, str]:
|
|
|
"""Convert entities to structured data format."""
|
|
|
structured_data = {}
|
|
|
|
|
|
|
|
|
entity_groups = {}
|
|
|
for entity in entities:
|
|
|
entity_type = entity['entity']
|
|
|
if entity_type not in entity_groups:
|
|
|
entity_groups[entity_type] = []
|
|
|
entity_groups[entity_type].append(entity)
|
|
|
|
|
|
|
|
|
for entity_type, group in entity_groups.items():
|
|
|
best_entity = max(group, key=lambda x: x['confidence'])
|
|
|
|
|
|
|
|
|
formatted_value = self._format_entity_value(best_entity['text'], entity_type)
|
|
|
|
|
|
|
|
|
readable_key = {
|
|
|
'NAME': 'Name',
|
|
|
'DATE': 'Date',
|
|
|
'INVOICE_NO': 'InvoiceNo',
|
|
|
'AMOUNT': 'Amount',
|
|
|
'PHONE': 'Phone',
|
|
|
'EMAIL': 'Email'
|
|
|
}.get(entity_type, entity_type)
|
|
|
|
|
|
structured_data[readable_key] = formatted_value
|
|
|
|
|
|
return structured_data
|
|
|
|
|
|
def _format_entity_value(self, text: str, entity_type: str) -> str:
|
|
|
"""Format entity value based on type."""
|
|
|
text = text.strip()
|
|
|
|
|
|
if entity_type == 'NAME':
|
|
|
return ' '.join(word.capitalize() for word in text.split())
|
|
|
elif entity_type == 'PHONE':
|
|
|
digits = re.sub(r'[^\d]', '', text)
|
|
|
if len(digits) == 10:
|
|
|
return f"({digits[:3]}) {digits[3:6]}-{digits[6:]}"
|
|
|
elif len(digits) == 11 and digits[0] == '1':
|
|
|
return f"+1 ({digits[1:4]}) {digits[4:7]}-{digits[7:]}"
|
|
|
elif entity_type == 'AMOUNT':
|
|
|
|
|
|
if not text.startswith('$'):
|
|
|
return f"${text}"
|
|
|
|
|
|
return text
|
|
|
|
|
|
def process_text(self, text: str) -> Dict[str, Any]:
|
|
|
"""Process text and return extraction results."""
|
|
|
|
|
|
entities = self.extract_entities(text)
|
|
|
|
|
|
|
|
|
structured_data = self.postprocess_entities(entities, text)
|
|
|
|
|
|
|
|
|
return {
|
|
|
'original_text': text,
|
|
|
'entities': entities,
|
|
|
'structured_data': structured_data,
|
|
|
'processing_timestamp': datetime.now().isoformat(),
|
|
|
'total_entities_found': len(entities),
|
|
|
'entity_types_found': list(set(e['entity'] for e in entities))
|
|
|
}
|
|
|
|
|
|
|
|
|
def run_demo():
|
|
|
"""Run the document extraction demo."""
|
|
|
print("SIMPLIFIED DOCUMENT TEXT EXTRACTION DEMO")
|
|
|
print("=" * 60)
|
|
|
print("This demo shows the core extraction logic using regex patterns")
|
|
|
print("(without the full ML pipeline for demonstration purposes)")
|
|
|
print()
|
|
|
|
|
|
|
|
|
processor = SimpleDocumentProcessor()
|
|
|
|
|
|
|
|
|
sample_docs = [
|
|
|
{
|
|
|
"name": "Invoice Example 1",
|
|
|
"text": "Invoice sent to Robert White on 15/09/2025 Invoice No: INV-1024 Amount: $1,250.00 Phone: (555) 123-4567"
|
|
|
},
|
|
|
{
|
|
|
"name": "Invoice Example 2",
|
|
|
"text": "Bill for Dr. Sarah Johnson dated March 10, 2025. Invoice Number: BL-2045. Total: $2,300.50 Email: sarah.johnson@email.com"
|
|
|
},
|
|
|
{
|
|
|
"name": "Receipt Example",
|
|
|
"text": "Receipt for Michael Brown Invoice: REC-3089 Date: 2025-04-22 Amount: $890.75 Contact: +1-555-987-6543"
|
|
|
},
|
|
|
{
|
|
|
"name": "Business Document",
|
|
|
"text": "Ms. Emma Wilson 456 Oak Street Payment due: January 15, 2025 Reference: INV-4567 Total: $1,750.25"
|
|
|
}
|
|
|
]
|
|
|
|
|
|
results = []
|
|
|
|
|
|
for i, doc in enumerate(sample_docs, 1):
|
|
|
print(f"\nDocument {i}: {doc['name']}")
|
|
|
print("-" * 50)
|
|
|
print(f"Text: {doc['text']}")
|
|
|
|
|
|
|
|
|
result = processor.process_text(doc['text'])
|
|
|
results.append({
|
|
|
'document_name': doc['name'],
|
|
|
**result
|
|
|
})
|
|
|
|
|
|
|
|
|
print(f"\nExtraction Results:")
|
|
|
print(f" Found {result['total_entities_found']} entities")
|
|
|
print(f" Entity types: {', '.join(result['entity_types_found'])}")
|
|
|
|
|
|
|
|
|
if result['structured_data']:
|
|
|
print(f"\nStructured Information:")
|
|
|
for key, value in result['structured_data'].items():
|
|
|
print(f" {key}: {value}")
|
|
|
|
|
|
|
|
|
if result['entities']:
|
|
|
print(f"\nDetailed Entities:")
|
|
|
for entity in result['entities']:
|
|
|
confidence_pct = int(entity['confidence'] * 100)
|
|
|
print(f" {entity['entity']}: '{entity['text']}' (confidence: {confidence_pct}%)")
|
|
|
|
|
|
|
|
|
output_dir = Path("results")
|
|
|
output_dir.mkdir(exist_ok=True)
|
|
|
|
|
|
output_file = output_dir / "demo_extraction_results.json"
|
|
|
with open(output_file, 'w', encoding='utf-8') as f:
|
|
|
json.dump(results, f, indent=2, ensure_ascii=False)
|
|
|
|
|
|
print(f"\n💾 Results saved to: {output_file}")
|
|
|
|
|
|
|
|
|
total_entities = sum(len(r['entities']) for r in results)
|
|
|
total_structured_fields = sum(len(r['structured_data']) for r in results)
|
|
|
unique_entity_types = set()
|
|
|
for r in results:
|
|
|
unique_entity_types.update(r['entity_types_found'])
|
|
|
|
|
|
print(f"\nDemo Summary:")
|
|
|
print(f" Documents processed: {len(results)}")
|
|
|
print(f" Total entities found: {total_entities}")
|
|
|
print(f" Total structured fields: {total_structured_fields}")
|
|
|
print(f" Unique entity types: {', '.join(sorted(unique_entity_types))}")
|
|
|
|
|
|
print(f"\nDemo completed successfully!")
|
|
|
print(f"\nThis demonstrates the core extraction logic.")
|
|
|
print(f" The full system would add:")
|
|
|
print(f" - OCR for scanned documents")
|
|
|
print(f" - ML model (DistilBERT) for better accuracy")
|
|
|
print(f" - Web API for file uploads")
|
|
|
print(f" - Training pipeline for custom domains")
|
|
|
|
|
|
return results
|
|
|
|
|
|
|
|
|
def show_api_simulation():
|
|
|
"""Simulate the API functionality."""
|
|
|
print(f"\n🌐 API FUNCTIONALITY SIMULATION")
|
|
|
print("=" * 40)
|
|
|
|
|
|
processor = SimpleDocumentProcessor()
|
|
|
|
|
|
|
|
|
sample_request = {
|
|
|
"text": "Invoice sent to John Doe on 01/15/2025 Invoice No: INV-1001 Amount: $1,500.00"
|
|
|
}
|
|
|
|
|
|
print(f"API Request (POST /extract-from-text):")
|
|
|
print(f" {json.dumps(sample_request, indent=2)}")
|
|
|
|
|
|
|
|
|
result = processor.process_text(sample_request["text"])
|
|
|
|
|
|
|
|
|
api_response = {
|
|
|
"status": "success",
|
|
|
"data": result
|
|
|
}
|
|
|
|
|
|
print(f"\nAPI Response:")
|
|
|
print(f" {json.dumps(api_response, indent=2)}")
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
|
results = run_demo()
|
|
|
|
|
|
|
|
|
show_api_simulation()
|
|
|
|
|
|
print(f"\nTo run the full system:")
|
|
|
print(f" 1. Install ML dependencies: pip install torch transformers")
|
|
|
print(f" 2. Run training: python src/training_pipeline.py")
|
|
|
print(f" 3. Start API: python api/app.py")
|
|
|
print(f" 4. Open browser: http://localhost:8000") |