{ "data_id": "89", "name": "adult", "exact_name": "adult", "version": 2, "version_label": "2", "description": "**Author**: Ronny Kohavi and Barry Becker \n**Source**: [UCI](https:\/\/archive.ics.uci.edu\/ml\/datasets\/Adult) - 1996-05-01 \n**Please cite**: Ron Kohavi, \"Scaling Up the Accuracy of Naive-Bayes Classifiers: a Decision-Tree Hybrid\", Proceedings of the Second International Conference on Knowledge Discovery and Data Mining, 1996 \n\n**Note: this is the original version from the UCI repository, with training and test sets merged.**\n\nPrediction task is to determine whether a person makes over 50K a year. Extraction was done by Barry Becker from the 1994 Census database. A set of reasonably clean records was extracted using the following conditions: ((AAGE>16) && (AGI>100) && (AFNLWGT>1)&& (HRSWK>0))\n\nRonny Kohavi and Barry Becker. Data Mining and Visualization, Silicon Graphics. \ne-mail: ronnyk '@' live.com for questions. \n", "format": "ARFF", "uploader": "Joaquin Vanschoren", "uploader_id": 2, "visibility": "public", "creator": null, "contributor": null, "date": "2015-06-09 16:39:06", "update_comment": "added target attribute", "last_update": "2015-06-09 16:56:26", "licence": "Public", "status": "active", "error_message": null, "url": "https:\/\/www.openml.org\/data\/download\/1595261\/phpMawTba", "default_target_attribute": "class", "row_id_attribute": null, "ignore_attribute": null, "runs": 0, "suggest": { "input": [ "adult", "Prediction task is to determine whether a person makes over 50K a year. Extraction was done by Barry Becker from the 1994 Census database. A set of reasonably clean records was extracted using the following conditions: ((AAGE>16) && (AGI>100) && (AFNLWGT>1)&& (HRSWK>0)) Ronny Kohavi and Barry Becker. Data Mining and Visualization, Silicon Graphics. e-mail: ronnyk '@' live.com for questions. " ], "weight": 5 }, "qualities": { "NumberOfInstances": 48842, "NumberOfFeatures": 15, "NumberOfClasses": 2, "NumberOfMissingValues": 6465, "NumberOfInstancesWithMissingValues": 3620, "NumberOfNumericFeatures": 6, "NumberOfSymbolicFeatures": 9, "AutoCorrelation": 0.634425994553756, "CfsSubsetEval_DecisionStumpAUC": 0.8818783209106105, "CfsSubsetEval_DecisionStumpErrRate": 0.14370828385406004, "CfsSubsetEval_DecisionStumpKappa": 0.5636251551185664, "CfsSubsetEval_NaiveBayesAUC": 0.8818783209106105, "CfsSubsetEval_NaiveBayesErrRate": 0.14370828385406004, "CfsSubsetEval_NaiveBayesKappa": 0.5636251551185664, "CfsSubsetEval_kNN1NAUC": 0.8818783209106105, "CfsSubsetEval_kNN1NErrRate": 0.14370828385406004, "CfsSubsetEval_kNN1NKappa": 0.5636251551185664, "ClassEntropy": 0.7938438393644257, "DecisionStumpAUC": 0.7595182625190399, "DecisionStumpErrRate": 0.23928176569346055, "DecisionStumpKappa": 0, "Dimensionality": 0.00030711273084640267, "EquivalentNumberOfAtts": 11.068507517484338, "J48.00001.AUC": 0.8856629671731931, "J48.00001.ErrRate": 0.13906064452725114, "J48.00001.Kappa": 0.5852801575170704, "J48.0001.AUC": 0.8856629671731931, "J48.0001.ErrRate": 0.13906064452725114, "J48.0001.Kappa": 0.5852801575170704, "J48.001.AUC": 0.8856629671731931, "J48.001.ErrRate": 0.13906064452725114, "J48.001.Kappa": 0.5852801575170704, "MajorityClassPercentage": 76.07182343065395, "MajorityClassSize": 37155, "MaxAttributeEntropy": 3.44192266924963, "MaxKurtosisOfNumericAtts": 152.69309629815925, "MaxMeansOfNumericAtts": 189664.13459727284, "MaxMutualInformation": 0.16542318099233, "MaxNominalAttDistinctValues": 41, "MaxSkewnessOfNumericAtts": 11.89465899659272, "MaxStdDevOfNumericAtts": 105604.02542315713, "MeanAttributeEntropy": 1.7809891200338273, "MeanKurtosisOfNumericAtts": 30.359637681213712, "MeanMeansOfNumericAtts": 31819.974765570616, "MeanMutualInformation": 0.0717209468494675, "MeanNoiseToSignalRatio": 23.832203118732952, "MeanNominalAttDistinctValues": 11.222222222222221, "MeanSkewnessOfNumericAtts": 3.063860808334838, "MeanStdDevOfNumericAtts": 18914.620326608216, "MinAttributeEntropy": 0.795215031650176, "MinKurtosisOfNumericAtts": -0.18426874062378573, "MinMeansOfNumericAtts": 10.078088530363212, "MinMutualInformation": 0.00818704228545, "MinNominalAttDistinctValues": 2, "MinSkewnessOfNumericAtts": -0.31652485666094055, "MinStdDevOfNumericAtts": 2.5709727555918307, "MinorityClassPercentage": 23.928176569346054, "MinorityClassSize": 11687, "NaiveBayesAUC": 0.8913978617632222, "NaiveBayesErrRate": 0.16713074812661236, "NaiveBayesKappa": 0.4929623256960114, "NumberOfBinaryFeatures": 2, "PercentageOfBinaryFeatures": 13.333333333333334, "PercentageOfInstancesWithMissingValues": 7.411653904426519, "PercentageOfMissingValues": 0.8824372466319971, "PercentageOfNumericFeatures": 40, "PercentageOfSymbolicFeatures": 60, "Quartile1AttributeEntropy": 0.8343198263526672, "Quartile1KurtosisOfNumericAtts": 0.42324176943424885, "Quartile1MeansOfNumericAtts": 31.502211211662093, "Quartile1MutualInformation": 0.01068033823523, "Quartile1SkewnessOfNumericAtts": 0.09993102873665205, "Quartile1StdDevOfNumericAtts": 9.936326207089905, "Quartile2AttributeEntropy": 1.6008102873649352, "Quartile2KurtosisOfNumericAtts": 4.504453651154136, "Quartile2MeansOfNumericAtts": 63.96234797919819, "Quartile2MutualInformation": 0.0623734340938, "Quartile2SkewnessOfNumericAtts": 0.9982360975676194, "Quartile2StdDevOfNumericAtts": 208.3575310294492, "Quartile3AttributeEntropy": 2.736773252802121, "Quartile3KurtosisOfNumericAtts": 53.18403354052852, "Quartile3MeansOfNumericAtts": 48225.334368985714, "Quartile3MutualInformation": 0.14076964990508, "Quartile3SkewnessOfNumericAtts": 6.4010213924528, "Quartile3StdDevOfNumericAtts": 31990.020649029368, "REPTreeDepth1AUC": 0.8784273311027897, "REPTreeDepth1ErrRate": 0.15347446869497564, "REPTreeDepth1Kappa": 0.5465099669497687, "REPTreeDepth2AUC": 0.8784273311027897, "REPTreeDepth2ErrRate": 0.15347446869497564, "REPTreeDepth2Kappa": 0.5465099669497687, "REPTreeDepth3AUC": 0.8784273311027897, "REPTreeDepth3ErrRate": 0.15347446869497564, "REPTreeDepth3Kappa": 0.5465099669497687, "RandomTreeDepth1AUC": 0.7538927205350257, "RandomTreeDepth1ErrRate": 0.1894476065681176, "RandomTreeDepth1Kappa": 0.4732340127438301, "RandomTreeDepth2AUC": 0.7538927205350257, "RandomTreeDepth2ErrRate": 0.1894476065681176, "RandomTreeDepth2Kappa": 0.4732340127438301, "RandomTreeDepth3AUC": 0.7538927205350257, "RandomTreeDepth3ErrRate": 0.1894476065681176, "RandomTreeDepth3Kappa": 0.4732340127438301, "StdvNominalAttDistinctValues": 12.152960316089429, "kNN1NAUC": 0.7147496645704182, "kNN1NErrRate": 0.2063797551287826, "kNN1NKappa": 0.4309676913874722 }, "tags": [ { "tag": "study_14", "uploader": "1" }, { "tag": "study_1", "uploader": "0" }, { "tag": "study_358", "uploader": "0" }, { "tag": "study_298", "uploader": "0" }, { "tag": "study_322", "uploader": "0" }, { "tag": "study_358", "uploader": "0" } ], "features": [ { "name": "class", "index": "14", "type": "nominal", "distinct": "2", "missing": "0", "target": "1", "distr": [ [ ">50K", "<=50K" ], [ [ "11687", "0" ], [ "0", "37155" ] ] ] }, { "name": "age", "index": "0", "type": "numeric", "distinct": "74", "missing": "0", "min": "17", "max": "90", "mean": "39", "stdev": "14" }, { "name": "workclass", "index": "1", "type": "nominal", "distinct": "8", "missing": "2799", "distr": [ [ "Private", "Self-emp-not-inc", "Self-emp-inc", "Federal-gov", "Local-gov", "State-gov", "Without-pay", "Never-worked" ], [ [ "7387", "26519" ], [ "1077", "2785" ], [ "938", "757" ], [ "561", "871" ], [ "927", "2209" ], [ "530", "1451" ], [ "2", "19" ], [ "0", "10" ] ] ] }, { "name": "fnlwgt", "index": "2", "type": "numeric", "distinct": "28523", "missing": "0", "min": "12285", "max": "1490400", "mean": "189664", "stdev": "105604" }, { "name": "education", "index": "3", "type": "nominal", "distinct": "16", "missing": "0", "distr": [ [ "Bachelors", "Some-college", "11th", "HS-grad", "Prof-school", "Assoc-acdm", "Assoc-voc", "9th", "7th-8th", "12th", "Masters", "1st-4th", "10th", "Doctorate", "5th-6th", "Preschool" ], [ [ "3313", "4712" ], [ "2063", "8815" ], [ "92", "1720" ], [ "2503", "13281" ], [ "617", "217" ], [ "413", "1188" ], [ "522", "1539" ], [ "41", "715" ], [ "62", "893" ], [ "48", "609" ], [ "1459", "1198" ], [ "8", "239" ], [ "87", "1302" ], [ "431", "163" ], [ "27", "482" ], [ "1", "82" ] ] ] }, { "name": "education-num", "index": "4", "type": "numeric", "distinct": "16", "missing": "0", "min": "1", "max": "16", "mean": "10", "stdev": "3" }, { "name": "marital-status", "index": "5", "type": "nominal", "distinct": "7", "missing": "0", "distr": [ [ "Married-civ-spouse", "Divorced", "Never-married", "Separated", "Widowed", "Married-spouse-absent", "Married-AF-spouse" ], [ [ "9984", "12395" ], [ "671", "5962" ], [ "733", "15384" ], [ "99", "1431" ], [ "128", "1390" ], [ "58", "570" ], [ "14", "23" ] ] ] }, { "name": "occupation", "index": "6", "type": "nominal", "distinct": "14", "missing": "2809", "distr": [ [ "Tech-support", "Craft-repair", "Other-service", "Sales", "Exec-managerial", "Prof-specialty", "Handlers-cleaners", "Machine-op-inspct", "Adm-clerical", "Farming-fishing", "Transport-moving", "Priv-house-serv", "Protective-serv", "Armed-Forces" ], [ [ "420", "1026" ], [ "1383", "4729" ], [ "204", "4719" ], [ "1475", "4029" ], [ "2908", "3178" ], [ "2784", "3388" ], [ "138", "1934" ], [ "372", "2650" ], [ "768", "4843" ], [ "173", "1317" ], [ "481", "1874" ], [ "3", "239" ], [ "308", "675" ], [ "5", "10" ] ] ] }, { "name": "relationship", "index": "7", "type": "nominal", "distinct": "6", "missing": "0", "distr": [ [ "Wife", "Own-child", "Husband", "Not-in-family", "Other-relative", "Unmarried" ], [ [ "1093", "1238" ], [ "111", "7470" ], [ "8846", "10870" ], [ "1276", "11307" ], [ "52", "1454" ], [ "309", "4816" ] ] ] }, { "name": "race", "index": "8", "type": "nominal", "distinct": "5", "missing": "0", "distr": [ [ "White", "Asian-Pac-Islander", "Amer-Indian-Eskimo", "Other", "Black" ], [ [ "10607", "31155" ], [ "409", "1110" ], [ "55", "415" ], [ "50", "356" ], [ "566", "4119" ] ] ] }, { "name": "sex", "index": "9", "type": "nominal", "distinct": "2", "missing": "0", "distr": [ [ "Female", "Male" ], [ [ "1769", "14423" ], [ "9918", "22732" ] ] ] }, { "name": "capital-gain", "index": "10", "type": "numeric", "distinct": "123", "missing": "0", "min": "0", "max": "99999", "mean": "1079", "stdev": "7452" }, { "name": "capital-loss", "index": "11", "type": "numeric", "distinct": "99", "missing": "0", "min": "0", "max": "4356", "mean": "88", "stdev": "403" }, { "name": "hours-per-week", "index": "12", "type": "numeric", "distinct": "96", "missing": "0", "min": "1", "max": "99", "mean": "40", "stdev": "12" }, { "name": "native-country", "index": "13", "type": "nominal", "distinct": "41", "missing": "857", "distr": [ [ "United-States", "Cambodia", "England", "Puerto-Rico", "Canada", "Germany", "Outlying-US(Guam-USVI-etc)", "India", "Japan", "Greece", "South", "China", "Cuba", "Iran", "Honduras", "Philippines", "Italy", "Poland", "Jamaica", "Vietnam", "Mexico", "Portugal", "Ireland", "France", "Dominican-Republic", "Laos", "Ecuador", "Taiwan", "Haiti", "Columbia", "Hungary", "Guatemala", "Nicaragua", "Scotland", "Thailand", "Yugoslavia", "El-Salvador", "Trinadad&Tobago", "Peru", "Hong", "Holand-Netherlands" ], [ [ "10694", "33138" ], [ "9", "19" ], [ "47", "80" ], [ "20", "164" ], [ "63", "119" ], [ "58", "148" ], [ "1", "22" ], [ "62", "89" ], [ "32", "60" ], [ "18", "31" ], [ "20", "95" ], [ "36", "86" ], [ "34", "104" ], [ "22", "37" ], [ "2", "18" ], [ "85", "210" ], [ "34", "71" ], [ "17", "70" ], [ "15", "91" ], [ "7", "79" ], [ "47", "904" ], [ "12", "55" ], [ "11", "26" ], [ "16", "22" ], [ "5", "98" ], [ "2", "21" ], [ "6", "39" ], [ "26", "39" ], [ "9", "66" ], [ "4", "81" ], [ "6", "13" ], [ "3", "85" ], [ "3", "46" ], [ "3", "18" ], [ "5", "25" ], [ "8", "15" ], [ "11", "144" ], [ "2", "25" ], [ "4", "42" ], [ "8", "22" ], [ "0", "1" ] ] ] } ], "nr_of_issues": 0, "nr_of_downvotes": 0, "nr_of_likes": 0, "nr_of_downloads": 0, "total_downloads": 0, "reach": 0, "reuse": 11, "impact_of_reuse": 0, "reach_of_reuse": 0, "impact": 11 }