Spaces:

Dev-ks04
/

contexto-api

Running

Dev-ks04

feat: Contexto FastAPI backend - intent-aware summarization engine

39028c9 1 day ago

1.87 kB

	{
	"project": {
	"name": "Intent-Aware Context-Preserving Summarization System",
	"version": "1.0.0",
	"description": "Advanced summarization system for long technical documents using generative AI with RAG pipeline and intelligent model selection",
	"author": "AI Development Team",
	"license": "MIT"
	},
	"model": {
	"primary_model": "t5-small",
	"alternative_models": [
	"t5-small",
	"t5-base",
	"t5-large",
	"mbart-50-small",
	"mt5-small",
	"facebook/bart-base",
	"facebook/bart-large-cnn",
	"google/pegasus-arxiv",
	"google/pegasus-pubmed",
	"allenai/led-base-16384"
	],
	"device": "auto",
	"max_input_length": 512,
	"max_output_length": 150,
	"min_output_length": 50,
	"num_beams": 2,
	"supported_languages": [
	"english", "spanish", "french", "german", "italian",
	"portuguese", "chinese", "japanese", "korean", "arabic",
	"hindi", "russian", "turkish", "vietnamese", "thai"
	],
	"default_language": "english"
	},
	"summarization": {
	"intent_types": [
	"technical_overview",
	"detailed_analysis",
	"methodology",
	"results",
	"conclusion",
	"abstract"
	],
	"chunk_size": 512,
	"chunk_overlap": 50,
	"preserve_context": true
	},
	"preprocessing": {
	"remove_citations": true,
	"remove_equations": false,
	"remove_stopwords": false,
	"clean_text": true,
	"normalize_whitespace": true
	},
	"evaluation": {
	"metrics": ["rouge1", "rouge2", "rougeL", "bert_score"],
	"use_stemmer": true,
	"human_evaluation": true
	},
	"datasets": {
	"sources": [
	"arXiv",
	"PubMed",
	"Scientific Papers"
	],
	"data_dir": "data/",
	"cache_dir": "models/"
	},
	"output": {
	"results_dir": "results/",
	"save_format": "json",
	"log_level": "INFO"
	}
	}