{ "data_id": "122", "name": "detroit", "exact_name": "detroit", "version": 1, "version_label": "1", "description": "**Author**: \n**Source**: Unknown - \n**Please cite**: \n\nData from StatLib (ftp stat.cmu.edu\/datasets)\n\n This is the data set called `DETROIT' in the book `Subset selection in\n regression' by Alan J. Miller published in the Chapman & Hall series of\n monographs on Statistics & Applied Probability, no. 40. The data are\n unusual in that a subset of three predictors can be found which gives a\n very much better fit to the data than the subsets found from the Efroymson\n stepwise algorithm, or from forward selection or backward elimination.\n \n The original data were given in appendix A of `Regression analysis and its\n application: A data-oriented approach' by Gunst & Mason, Statistics\n textbooks and monographs no. 24, Marcel Dekker. It has caused problems\n because some copies of the Gunst & Mason book do not contain all of the data,\n and because Miller does not say which variables he used as predictors and\n which is the dependent variable. (HOM was the dependent variable, and the\n predictors were FTP ... WE)\n \n The data were collected by J.C. Fisher and used in his paper: \"Homicide in\n Detroit: The Role of Firearms\", Criminology, vol.14, 387-400 (1976)\n \n \n The data are on the homicide rate in Detroit for the years 1961-1973.\n FTP - Full-time police per 100,000 population\n UEMP - % unemployed in the population\n MAN - number of manufacturing workers in thousands\n LIC - Number of handgun licences per 100,000 population\n GR - Number of handgun registrations per 100,000 population\n CLEAR - % homicides cleared by arrests\n WM - Number of white males in the population\n NMAN - Number of non-manufacturing workers in thousands\n GOV - Number of government workers in thousands\n HE - Average hourly earnings\n WE - Average weekly earnings\n \n HOM - Number of homicides per 100,000 of population\n ACC - Death rate in accidents per 100,000 population\n ASR - Number of assaults per 100,000 population\n \n N.B. Each case takes two lines.", "format": "ARFF", "uploader": "Jan van Rijn", "uploader_id": 1, "visibility": "public", "creator": null, "contributor": null, "date": "2014-04-23 13:17:21", "update_comment": null, "last_update": "2014-04-23 13:17:21", "licence": "Public", "status": "active", "error_message": null, "url": "https:\/\/www.openml.org\/data\/download\/3645\/dataset_2194_detroit.arff", "default_target_attribute": "ASR", "row_id_attribute": null, "ignore_attribute": null, "runs": 0, "suggest": { "input": [ "detroit", "Data from StatLib (ftp stat.cmu.edu\/datasets) This is the data set called `DETROIT' in the book `Subset selection in regression' by Alan J. Miller published in the Chapman & Hall series of monographs on Statistics & Applied Probability, no. 40. The data are unusual in that a subset of three predictors can be found which gives a very much better fit to the data than the subsets found from the Efroymson stepwise algorithm, or from forward selection or backward elimination. The original data were g " ], "weight": 5 }, "qualities": { "NumberOfInstances": 13, "NumberOfFeatures": 14, "NumberOfClasses": 0, "NumberOfMissingValues": 0, "NumberOfInstancesWithMissingValues": 0, "NumberOfNumericFeatures": 14, "NumberOfSymbolicFeatures": 0, "AutoCorrelation": -29.0975, "CfsSubsetEval_DecisionStumpAUC": null, "CfsSubsetEval_DecisionStumpErrRate": null, "CfsSubsetEval_DecisionStumpKappa": null, "CfsSubsetEval_NaiveBayesAUC": null, "CfsSubsetEval_NaiveBayesErrRate": null, "CfsSubsetEval_NaiveBayesKappa": null, "CfsSubsetEval_kNN1NAUC": null, "CfsSubsetEval_kNN1NErrRate": null, "CfsSubsetEval_kNN1NKappa": null, "ClassEntropy": null, "DecisionStumpAUC": null, "DecisionStumpErrRate": null, "DecisionStumpKappa": null, "Dimensionality": 1.0769230769230769, "EquivalentNumberOfAtts": null, "J48.00001.AUC": null, "J48.00001.ErrRate": null, "J48.00001.Kappa": null, "J48.0001.AUC": null, "J48.0001.ErrRate": null, "J48.0001.Kappa": null, "J48.001.AUC": null, "J48.001.ErrRate": null, "J48.001.Kappa": null, "MajorityClassPercentage": null, "MajorityClassSize": null, "MaxAttributeEntropy": null, "MaxKurtosisOfNumericAtts": 0.8277169417382741, "MaxMeansOfNumericAtts": 452507.53846153844, "MaxMutualInformation": null, "MaxNominalAttDistinctValues": null, "MaxSkewnessOfNumericAtts": 1.0277105820502919, "MaxStdDevOfNumericAtts": 64568.12389589694, "MeanAttributeEntropy": null, "MeanKurtosisOfNumericAtts": -0.7408342566634085, "MeanMeansOfNumericAtts": 32568.318516483512, "MeanMutualInformation": null, "MeanNoiseToSignalRatio": null, "MeanNominalAttDistinctValues": null, "MeanSkewnessOfNumericAtts": 0.27136164725935785, "MeanStdDevOfNumericAtts": 4684.078088274687, "MinAttributeEntropy": null, "MinKurtosisOfNumericAtts": -1.6423435643266524, "MinMeansOfNumericAtts": 3.9476923076923076, "MinMutualInformation": null, "MinNominalAttDistinctValues": null, "MinSkewnessOfNumericAtts": -0.8740051296230028, "MinStdDevOfNumericAtts": 0.9665587915051506, "MinorityClassPercentage": null, "MinorityClassSize": null, "NaiveBayesAUC": null, "NaiveBayesErrRate": null, "NaiveBayesKappa": null, "NumberOfBinaryFeatures": 0, "PercentageOfBinaryFeatures": 0, "PercentageOfInstancesWithMissingValues": 0, "PercentageOfMissingValues": 0, "PercentageOfNumericFeatures": 100, "PercentageOfSymbolicFeatures": 0, "Quartile1AttributeEntropy": null, "Quartile1KurtosisOfNumericAtts": -1.3401945317885595, "Quartile1MeansOfNumericAtts": 41.463653846153846, "Quartile1MutualInformation": null, "Quartile1SkewnessOfNumericAtts": -0.08862503452243534, "Quartile1StdDevOfNumericAtts": 10.779320211599078, "Quartile2AttributeEntropy": null, "Quartile2KurtosisOfNumericAtts": -1.0028597627887728, "Quartile2MeansOfNumericAtts": 245.14038461538462, "Quartile2MutualInformation": null, "Quartile2SkewnessOfNumericAtts": 0.27766822843268346, "Quartile2StdDevOfNumericAtts": 44.66144695447802, "Quartile3AttributeEntropy": null, "Quartile3KurtosisOfNumericAtts": -0.05013295006877472, "Quartile3MeansOfNumericAtts": 548.3484615384615, "Quartile3MutualInformation": null, "Quartile3SkewnessOfNumericAtts": 0.8309826158736997, "Quartile3StdDevOfNumericAtts": 148.8337488048325, "REPTreeDepth1AUC": null, "REPTreeDepth1ErrRate": null, "REPTreeDepth1Kappa": null, "REPTreeDepth2AUC": null, "REPTreeDepth2ErrRate": null, "REPTreeDepth2Kappa": null, "REPTreeDepth3AUC": null, "REPTreeDepth3ErrRate": null, "REPTreeDepth3Kappa": null, "RandomTreeDepth1AUC": null, "RandomTreeDepth1ErrRate": null, "RandomTreeDepth1Kappa": null, "RandomTreeDepth2AUC": null, "RandomTreeDepth2ErrRate": null, "RandomTreeDepth2Kappa": null, "RandomTreeDepth3AUC": null, "RandomTreeDepth3ErrRate": null, "RandomTreeDepth3Kappa": null, "StdvNominalAttDistinctValues": null, "kNN1NAUC": null, "kNN1NErrRate": null, "kNN1NKappa": null }, "tags": [ { "tag": "study_298", "uploader": "0" }, { "tag": "study_66", "uploader": "0" } ], "features": [ { "name": "ASR", "index": "13", "type": "numeric", "distinct": "13", "missing": "0", "target": "1", "min": "218", "max": "473", "mean": "312", "stdev": "73" }, { "name": "FTP", "index": "0", "type": "numeric", "distinct": "13", "missing": "0", "min": "260", "max": "390", "mean": "305", "stdev": "47" }, { "name": "UEMP", "index": "1", "type": "numeric", "distinct": "13", "missing": "0", "min": "3", "max": "11", "mean": "6", "stdev": "2" }, { "name": "MAN", "index": "2", "type": "numeric", "distinct": "13", "missing": "0", "min": "456", "max": "614", "mean": "556", "stdev": "50" }, { "name": "LIC", "index": "3", "type": "numeric", "distinct": "13", "missing": "0", "min": "156", "max": "1131", "mean": "538", "stdev": "316" }, { "name": "GR", "index": "4", "type": "numeric", "distinct": "13", "missing": "0", "min": "180", "max": "1030", "mean": "546", "stdev": "311" }, { "name": "CLEAR", "index": "5", "type": "numeric", "distinct": "13", "missing": "0", "min": "59", "max": "94", "mean": "81", "stdev": "13" }, { "name": "WM", "index": "6", "type": "numeric", "distinct": "13", "missing": "0", "min": "359647", "max": "558724", "mean": "452508", "stdev": "64568" }, { "name": "NMAN", "index": "7", "type": "numeric", "distinct": "13", "missing": "0", "min": "538", "max": "820", "mean": "674", "stdev": "95" }, { "name": "GOV", "index": "8", "type": "numeric", "distinct": "13", "missing": "0", "min": "134", "max": "231", "mean": "186", "stdev": "37" }, { "name": "HE", "index": "9", "type": "numeric", "distinct": "13", "missing": "0", "min": "3", "max": "6", "mean": "4", "stdev": "1" }, { "name": "WE", "index": "10", "type": "numeric", "distinct": "13", "missing": "0", "min": "117", "max": "258", "mean": "170", "stdev": "43" }, { "name": "HOM", "index": "11", "type": "numeric", "distinct": "13", "missing": "0", "min": "9", "max": "52", "mean": "25", "stdev": "16" }, { "name": "ACC", "index": "12", "type": "numeric", "distinct": "13", "missing": "0", "min": "39", "max": "55", "mean": "47", "stdev": "5" } ], "nr_of_issues": 0, "nr_of_downvotes": 0, "nr_of_likes": 0, "nr_of_downloads": 0, "total_downloads": 0, "reach": 0, "reuse": 0, "impact_of_reuse": 0, "reach_of_reuse": 0, "impact": 0 }