{ "data_id": "47", "name": "ada_agnostic", "exact_name": "ada_agnostic", "version": 1, "version_label": null, "description": "**Author**: \n**Source**: Unknown - Date unknown \n**Please cite**: \n\nDatasets from the Agnostic Learning vs. Prior Knowledge Challenge (http:\/\/www.agnostic.inf.ethz.ch)\n\nDataset from: http:\/\/www.agnostic.inf.ethz.ch\/datasets.php\n\n\nModified by TunedIT (converted to ARFF format)\n\n\nADA is the marketing database\n\nThe task of ADA is to discover high revenue people from census data. This is a two-class classification problem. The raw data from the census bureau is known as the Adult database in the UCI machine-learning repository. The 14 original attributes (features) include age, workclass, education,\nmarital status, occupation, native country, etc. It contains continuous, binary and categorical features. This dataset is from the \"agnostic learning track\", i.e. has access to a preprocessed numeric representation eliminating categorical variables, but the identity of the features is not revealed.\n\n\n\nData type: non-sparse\nNumber of features: 48\nNumber of examples and check-sums:\nPos_ex\tNeg_ex\tTot_ex\tCheck_sum\nTrain\t 1029\t 3118\t 4147\t6798109.00\nValid\t 103\t 312\t 415\t681151.00\n\n\nThis dataset contains samples from both training and validation datasets.", "format": "ARFF", "uploader": "Joaquin Vanschoren", "uploader_id": 2, "visibility": "public", "creator": null, "contributor": null, "date": "2014-10-06 23:56:15", "update_comment": null, "last_update": "2014-10-06 23:56:15", "licence": "Public", "status": "active", "error_message": null, "url": "https:\/\/www.openml.org\/data\/download\/53926\/ada_agnostic.arff", "default_target_attribute": "label", "row_id_attribute": null, "ignore_attribute": null, "runs": 0, "suggest": { "input": [ "ada_agnostic", "Datasets from the Agnostic Learning vs. Prior Knowledge Challenge (http:\/\/www.agnostic.inf.ethz.ch) Dataset from: http:\/\/www.agnostic.inf.ethz.ch\/datasets.php Modified by TunedIT (converted to ARFF format) ADA is the marketing database The task of ADA is to discover high revenue people from census data. This is a two-class classification problem. The raw data from the census bureau is known as the Adult database in the UCI machine-learning repository. The 14 original attributes (features) includ " ], "weight": 5 }, "qualities": { "NumberOfInstances": 4562, "NumberOfFeatures": 49, "NumberOfClasses": 2, "NumberOfMissingValues": 0, "NumberOfInstancesWithMissingValues": 0, "NumberOfNumericFeatures": 48, "NumberOfSymbolicFeatures": 1, "AutoCorrelation": 0.6263977197982898, "CfsSubsetEval_DecisionStumpAUC": 0.8510710164934222, "CfsSubsetEval_DecisionStumpErrRate": 0.15585269618588338, "CfsSubsetEval_DecisionStumpKappa": 0.5482180588422919, "CfsSubsetEval_NaiveBayesAUC": 0.8510710164934222, "CfsSubsetEval_NaiveBayesErrRate": 0.15585269618588338, "CfsSubsetEval_NaiveBayesKappa": 0.5482180588422919, "CfsSubsetEval_kNN1NAUC": 0.8510710164934222, "CfsSubsetEval_kNN1NErrRate": 0.15585269618588338, "CfsSubsetEval_kNN1NKappa": 0.5482180588422919, "ClassEntropy": 0.8083116159412278, "DecisionStumpAUC": 0.7454583852723321, "DecisionStumpErrRate": 0.24813678211310827, "DecisionStumpKappa": 0, "Dimensionality": 0.010740903112669882, "EquivalentNumberOfAtts": null, "J48.00001.AUC": 0.8213220750188011, "J48.00001.ErrRate": 0.16374397194213064, "J48.00001.Kappa": 0.5354539672188178, "J48.0001.AUC": 0.8213220750188011, "J48.0001.ErrRate": 0.16374397194213064, "J48.0001.Kappa": 0.5354539672188178, "J48.001.AUC": 0.8213220750188011, "J48.001.ErrRate": 0.16374397194213064, "J48.001.Kappa": 0.5354539672188178, "MajorityClassPercentage": 75.18632178868917, "MajorityClassSize": 3430, "MaxAttributeEntropy": null, "MaxKurtosisOfNumericAtts": 4562.000000000357, "MaxMeansOfNumericAtts": 634.0243314335817, "MaxMutualInformation": null, "MaxNominalAttDistinctValues": 2, "MaxSkewnessOfNumericAtts": 67.54257916307186, "MaxStdDevOfNumericAtts": 158.022174834288, "MeanAttributeEntropy": null, "MeanKurtosisOfNumericAtts": 263.55954829257803, "MeanMeansOfNumericAtts": 34.15561522723951, "MeanMutualInformation": null, "MeanNoiseToSignalRatio": null, "MeanNominalAttDistinctValues": 2, "MeanSkewnessOfNumericAtts": 7.30511641739795, "MeanStdDevOfNumericAtts": 14.345589094666968, "MinAttributeEntropy": null, "MinKurtosisOfNumericAtts": -1.9971026126886824, "MinMeansOfNumericAtts": 0, "MinMutualInformation": null, "MinNominalAttDistinctValues": 2, "MinSkewnessOfNumericAtts": -1.9307981261762805, "MinStdDevOfNumericAtts": 0, "MinorityClassPercentage": 24.813678211310826, "MinorityClassSize": 1132, "NaiveBayesAUC": 0.8779730681821225, "NaiveBayesErrRate": 0.17645769399386235, "NaiveBayesKappa": 0.5169382883898451, "NumberOfBinaryFeatures": 1, "PercentageOfBinaryFeatures": 2.0408163265306123, "PercentageOfInstancesWithMissingValues": 0, "PercentageOfMissingValues": 0, "PercentageOfNumericFeatures": 97.95918367346938, "PercentageOfSymbolicFeatures": 2.0408163265306123, "Quartile1AttributeEntropy": null, "Quartile1KurtosisOfNumericAtts": 2.6733714217933167, "Quartile1MeansOfNumericAtts": 0.03200350723366942, "Quartile1MutualInformation": null, "Quartile1SkewnessOfNumericAtts": 2.0437418198759207, "Quartile1StdDevOfNumericAtts": 0.17602861975046974, "Quartile2AttributeEntropy": null, "Quartile2KurtosisOfNumericAtts": 9.61442876405773, "Quartile2MeansOfNumericAtts": 0.09381850065760627, "Quartile2MutualInformation": null, "Quartile2SkewnessOfNumericAtts": 3.4073764027269133, "Quartile2StdDevOfNumericAtts": 0.29132739185805345, "Quartile3AttributeEntropy": null, "Quartile3KurtosisOfNumericAtts": 26.309778807627485, "Quartile3MeansOfNumericAtts": 0.29559403770276177, "Quartile3MutualInformation": null, "Quartile3SkewnessOfNumericAtts": 5.319609448163551, "Quartile3StdDevOfNumericAtts": 0.4106138147956203, "REPTreeDepth1AUC": 0.8551572850240551, "REPTreeDepth1ErrRate": 0.158702323542306, "REPTreeDepth1Kappa": 0.5344441274541535, "REPTreeDepth2AUC": 0.8551572850240551, "REPTreeDepth2ErrRate": 0.158702323542306, "REPTreeDepth2Kappa": 0.5344441274541535, "REPTreeDepth3AUC": 0.8551572850240551, "REPTreeDepth3ErrRate": 0.158702323542306, "REPTreeDepth3Kappa": 0.5344441274541535, "RandomTreeDepth1AUC": 0.7240880198621599, "RandomTreeDepth1ErrRate": 0.21109162647961421, "RandomTreeDepth1Kappa": 0.4420281332658328, "RandomTreeDepth2AUC": 0.7240880198621599, "RandomTreeDepth2ErrRate": 0.21109162647961421, "RandomTreeDepth2Kappa": 0.4420281332658328, "RandomTreeDepth3AUC": 0.7240880198621599, "RandomTreeDepth3ErrRate": 0.21109162647961421, "RandomTreeDepth3Kappa": 0.4420281332658328, "StdvNominalAttDistinctValues": 0, "kNN1NAUC": 0.6916852960265378, "kNN1NErrRate": 0.22599736957474792, "kNN1NKappa": 0.38761465082844926 }, "tags": [ { "tag": "study_14", "uploader": "1" }, { "tag": "study_1", "uploader": "0" }, { "tag": "study_429", "uploader": "0" }, { "tag": "study_480", "uploader": "0" }, { "tag": "study_661", "uploader": "0" }, { "tag": "study_745", "uploader": "0" }, { "tag": "study_721", "uploader": "0" }, { "tag": "study_344", "uploader": "0" }, { "tag": "study_293", "uploader": "0" }, { "tag": "study_722", "uploader": "0" }, { "tag": "study_572", "uploader": "0" }, { "tag": "study_509", "uploader": "0" }, { "tag": "study_463", "uploader": "0" } ], "features": [ { "name": "label", "index": "48", "type": "nominal", "distinct": "2", "missing": "0", "target": "1", "distr": [ [ "-1", "1" ], [ [ "3430", "0" ], [ "0", "1132" ] ] ] }, { "name": "attr25", "index": "25", "type": "numeric", "distinct": "2", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "attr24", "index": "24", "type": "numeric", "distinct": "2", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "attr26", "index": "26", "type": "numeric", "distinct": "2", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "attr27", "index": "27", "type": "numeric", "distinct": "2", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "attr28", "index": "28", "type": "numeric", "distinct": "2", "missing": "0", "min": "0", "max": "1", "mean": "1", "stdev": "0" }, { "name": "attr29", "index": "29", "type": "numeric", "distinct": "56", "missing": "0", "min": "0", "max": "999", "mean": "12", "stdev": "84" }, { "name": "attr30", "index": "30", "type": "numeric", "distinct": "2", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "attr31", "index": "31", "type": "numeric", "distinct": "2", "missing": "0", "min": "0", "max": "1", "mean": "1", "stdev": "0" }, { "name": "attr32", "index": "32", "type": "numeric", "distinct": "2", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "attr33", "index": "33", "type": "numeric", "distinct": "2", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "attr34", "index": "34", "type": "numeric", "distinct": "2", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "attr35", "index": "35", "type": "numeric", "distinct": "2", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "attr36", "index": "36", "type": "numeric", "distinct": "2", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "attr37", "index": "37", "type": "numeric", "distinct": "2", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "attr38", "index": "38", "type": "numeric", "distinct": "2", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "attr39", "index": "39", "type": "numeric", "distinct": "1", "missing": "0", "min": "0", "max": "0", "mean": "0", "stdev": "0" }, { "name": "attr40", "index": "40", "type": "numeric", "distinct": "2", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "attr41", "index": "41", "type": "numeric", "distinct": "2", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "attr42", "index": "42", "type": "numeric", "distinct": "2", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "attr43", "index": "43", "type": "numeric", "distinct": "2", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "attr44", "index": "44", "type": "numeric", "distinct": "2", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "attr45", "index": "45", "type": "numeric", "distinct": "2", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "attr46", "index": "46", "type": "numeric", "distinct": "2", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "attr47", "index": "47", "type": "numeric", "distinct": "2", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "attr12", "index": "12", "type": "numeric", "distinct": "2", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "attr1", "index": "1", "type": "numeric", "distinct": "77", "missing": "0", "min": "20", "max": "999", "mean": "409", "stdev": "120" }, { "name": "attr2", "index": "2", "type": "numeric", "distinct": "2", "missing": "0", "min": "0", "max": "1", "mean": "1", "stdev": "0" }, { "name": "attr3", "index": "3", "type": "numeric", "distinct": "2", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "attr4", "index": "4", "type": "numeric", "distinct": "2", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "attr5", "index": "5", "type": "numeric", "distinct": "2", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "attr6", "index": "6", "type": "numeric", "distinct": "2", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "attr7", "index": "7", "type": "numeric", "distinct": "2", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "attr8", "index": "8", "type": "numeric", "distinct": "2", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "attr9", "index": "9", "type": "numeric", "distinct": "2", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "attr10", "index": "10", "type": "numeric", "distinct": "2", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "attr11", "index": "11", "type": "numeric", "distinct": "2", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "attr0", "index": "0", "type": "numeric", "distinct": "2", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "attr13", "index": "13", "type": "numeric", "distinct": "2", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "attr14", "index": "14", "type": "numeric", "distinct": "363", "missing": "0", "min": "14", "max": "794", "mean": "128", "stdev": "72" }, { "name": "attr15", "index": "15", "type": "numeric", "distinct": "2", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "attr16", "index": "16", "type": "numeric", "distinct": "2", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "attr17", "index": "17", "type": "numeric", "distinct": "16", "missing": "0", "min": "62", "max": "999", "mean": "634", "stdev": "158" }, { "name": "attr18", "index": "18", "type": "numeric", "distinct": "2", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "attr19", "index": "19", "type": "numeric", "distinct": "70", "missing": "0", "min": "189", "max": "999", "mean": "428", "stdev": "147" }, { "name": "attr20", "index": "20", "type": "numeric", "distinct": "2", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "attr21", "index": "21", "type": "numeric", "distinct": "2", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "attr22", "index": "22", "type": "numeric", "distinct": "2", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "attr23", "index": "23", "type": "numeric", "distinct": "52", "missing": "0", "min": "0", "max": "845", "mean": "22", "stdev": "97" } ], "nr_of_issues": 0, "nr_of_downvotes": 0, "nr_of_likes": 0, "nr_of_downloads": 0, "total_downloads": 0, "reach": 0, "reuse": 11, "impact_of_reuse": 0, "reach_of_reuse": 0, "impact": 11 }