{ "data_id": "44", "name": "collins", "exact_name": "collins", "version": 1, "version_label": null, "description": "**Author**: \n**Source**: Unknown - Date unknown \n**Please cite**: \n\nThe following are data used in an analysis of the Brown and Frown corpora for my doctoral dissertation titled ``Variations in Written English: Characterizing Authors' Rhetorical Language Choices Across Corpora of Published Texts\" (Completed at Carnegie Mellon Univ, 2003). The source of the corpora was the ICAME CD-ROM (get info at ).\n\nThe data were generated from the texts using tagging and visualization software, Docuscope.\n\nThe first row is the variable names. The genre of each text (assigned by the Brown corpus compilers) is in 'Genre' column and the corpus is listed in the 'corpus' column with 1=Brown and 2=Frown corpus.\n\nThe dataset may be freely used and distributed for non-commercial purposes.\n\nJeff Collins 11 July 2003\n\n\n\nInformation about the dataset\nCLASSTYPE: nominal\nCLASSINDEX: last", "format": "ARFF", "uploader": "Joaquin Vanschoren", "uploader_id": 2, "visibility": "public", "creator": null, "contributor": null, "date": "2014-09-28 23:51:43", "update_comment": "attribute counter is a row id", "last_update": "2015-04-15 17:08:50", "licence": "Public", "status": "active", "error_message": null, "url": "https:\/\/www.openml.org\/data\/download\/52590\/collins.arff", "default_target_attribute": "Corp.Genre", "row_id_attribute": "Counter", "ignore_attribute": "\"Text\"", "runs": 28, "suggest": { "input": [ "collins", "The following are data used in an analysis of the Brown and Frown corpora for my doctoral dissertation titled ``Variations in Written English: Characterizing Authors' Rhetorical Language Choices Across Corpora of Published Texts\" (Completed at Carnegie Mellon Univ, 2003). The source of the corpora was the ICAME CD-ROM (get info at ). The data were generated from the texts using tagging and visualization software, Docuscope. The first row is the variable names. The " ], "weight": 5 }, "qualities": { "NumberOfInstances": 500, "NumberOfFeatures": 22, "NumberOfClasses": 15, "NumberOfMissingValues": 0, "NumberOfInstancesWithMissingValues": 0, "NumberOfNumericFeatures": 19, "NumberOfSymbolicFeatures": 3, "AutoCorrelation": 0.9719438877755511, "CfsSubsetEval_DecisionStumpAUC": 1, "CfsSubsetEval_DecisionStumpErrRate": 0, "CfsSubsetEval_DecisionStumpKappa": 1, "CfsSubsetEval_NaiveBayesAUC": 1, "CfsSubsetEval_NaiveBayesErrRate": 0, "CfsSubsetEval_NaiveBayesKappa": 1, "CfsSubsetEval_kNN1NAUC": 1, "CfsSubsetEval_kNN1NErrRate": 0, "CfsSubsetEval_kNN1NKappa": 1, "ClassEntropy": 3.648562076012777, "DecisionStumpAUC": 0.6860989947887339, "DecisionStumpErrRate": 0.738, "DecisionStumpKappa": 0.13972779028941518, "Dimensionality": 0.044, "EquivalentNumberOfAtts": 1.9999999999999982, "J48.00001.AUC": 1, "J48.00001.ErrRate": 0, "J48.00001.Kappa": 1, "J48.0001.AUC": 1, "J48.0001.ErrRate": 0, "J48.0001.Kappa": 1, "J48.001.AUC": 1, "J48.001.ErrRate": 0, "J48.001.Kappa": 1, "MajorityClassPercentage": 16, "MajorityClassSize": 80, "MaxAttributeEntropy": 3.648562076012777, "MaxKurtosisOfNumericAtts": 29.62455127322131, "MaxMeansOfNumericAtts": 31.48718, "MaxMutualInformation": 3.64856207601278, "MaxNominalAttDistinctValues": 15, "MaxSkewnessOfNumericAtts": 4.220469825930483, "MaxStdDevOfNumericAtts": 5.208705277603819, "MeanAttributeEntropy": 1.8242810380063885, "MeanKurtosisOfNumericAtts": 4.030981426007789, "MeanMeansOfNumericAtts": 3.31444, "MeanMutualInformation": 1.82428103800639, "MeanNoiseToSignalRatio": -8.520135889664247e-16, "MeanNominalAttDistinctValues": 10.333333333333334, "MeanSkewnessOfNumericAtts": 1.21821335635969, "MeanStdDevOfNumericAtts": 1.0452797721655507, "MinAttributeEntropy": -0, "MinKurtosisOfNumericAtts": -0.5698798457138974, "MinMeansOfNumericAtts": 0.28419999999999995, "MinMutualInformation": 0, "MinNominalAttDistinctValues": 1, "MinSkewnessOfNumericAtts": -0.06825964191203086, "MinStdDevOfNumericAtts": 0.27275253966882085, "MinorityClassPercentage": 1.2, "MinorityClassSize": 6, "NaiveBayesAUC": 0.9219646259812533, "NaiveBayesErrRate": 0.36, "NaiveBayesKappa": 0.6030906011854361, "NumberOfBinaryFeatures": 0, "PercentageOfBinaryFeatures": 0, "PercentageOfInstancesWithMissingValues": 0, "PercentageOfMissingValues": 0, "PercentageOfNumericFeatures": 86.36363636363636, "PercentageOfSymbolicFeatures": 13.636363636363635, "Quartile1AttributeEntropy": -0, "Quartile1KurtosisOfNumericAtts": 0.31263763178831194, "Quartile1MeansOfNumericAtts": 0.6347200000000002, "Quartile1MutualInformation": 0, "Quartile1SkewnessOfNumericAtts": 0.5431345075805047, "Quartile1StdDevOfNumericAtts": 0.41142614811251527, "Quartile2AttributeEntropy": 1.8242810380063885, "Quartile2KurtosisOfNumericAtts": 0.7633424303002827, "Quartile2MeansOfNumericAtts": 1.32206, "Quartile2MutualInformation": 1.82428103800639, "Quartile2SkewnessOfNumericAtts": 0.8729927853121452, "Quartile2StdDevOfNumericAtts": 0.7706544869632264, "Quartile3AttributeEntropy": 3.648562076012777, "Quartile3KurtosisOfNumericAtts": 3.7036204365443157, "Quartile3MeansOfNumericAtts": 2.6724200000000002, "Quartile3MutualInformation": 3.64856207601278, "Quartile3SkewnessOfNumericAtts": 1.6103948341786611, "Quartile3StdDevOfNumericAtts": 1.010259841572945, "REPTreeDepth1AUC": 1, "REPTreeDepth1ErrRate": 0, "REPTreeDepth1Kappa": 1, "REPTreeDepth2AUC": 1, "REPTreeDepth2ErrRate": 0, "REPTreeDepth2Kappa": 1, "REPTreeDepth3AUC": 1, "REPTreeDepth3ErrRate": 0, "REPTreeDepth3Kappa": 1, "RandomTreeDepth1AUC": 0.7744023386009782, "RandomTreeDepth1ErrRate": 0.404, "RandomTreeDepth1Kappa": 0.5552110765653487, "RandomTreeDepth2AUC": 0.7744023386009782, "RandomTreeDepth2ErrRate": 0.404, "RandomTreeDepth2Kappa": 0.5552110765653487, "RandomTreeDepth3AUC": 0.7744023386009782, "RandomTreeDepth3ErrRate": 0.404, "RandomTreeDepth3Kappa": 0.5552110765653487, "StdvNominalAttDistinctValues": 8.082903768654761, "kNN1NAUC": 1, "kNN1NErrRate": 0, "kNN1NKappa": 1 }, "tags": [ { "tag": "study_14", "uploader": "1" }, { "tag": "study_1", "uploader": "0" }, { "tag": "study_143", "uploader": "0" }, { "tag": "study_99", "uploader": "0" }, { "tag": "study_234", "uploader": "0" }, { "tag": "study_346", "uploader": "0" }, { "tag": "study_250", "uploader": "0" }, { "tag": "study_368", "uploader": "0" }, { "tag": "study_3", "uploader": "0" }, { "tag": "study_39", "uploader": "0" }, { "tag": "study_47", "uploader": "0" }, { "tag": "study_73", "uploader": "0" }, { "tag": "study_100", "uploader": "0" }, { "tag": "study_109", "uploader": "0" }, { "tag": "study_111", "uploader": "0" }, { "tag": "study_125", "uploader": "0" }, { "tag": "study_130", "uploader": "0" }, { "tag": "study_142", "uploader": "0" }, { "tag": "study_144", "uploader": "0" }, { "tag": "study_166", "uploader": "0" }, { "tag": "study_168", "uploader": "0" }, { "tag": "study_170", "uploader": "0" }, { "tag": "study_172", "uploader": "0" }, { "tag": "study_174", "uploader": "0" }, { "tag": "study_196", "uploader": "0" }, { "tag": "study_218", "uploader": "0" }, { "tag": "study_235", "uploader": "0" }, { "tag": "study_257", "uploader": "0" }, { "tag": "study_272", "uploader": "0" }, { "tag": "study_287", "uploader": "0" }, { "tag": "study_299", "uploader": "0" }, { "tag": "study_311", "uploader": "0" }, { "tag": "study_323", "uploader": "0" }, { "tag": "study_335", "uploader": "0" }, { "tag": "study_347", "uploader": "0" }, { "tag": "study_359", "uploader": "0" } ], "features": [ { "name": "Corp.Genre", "index": "23", "type": "nominal", "distinct": "15", "missing": "0", "target": "1", "distr": [ [ "101", "102", "103", "104", "105", "106", "107", "108", "109", "110", "111", "112", "113", "114", "115" ], [ [ "44", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0" ], [ "0", "27", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0" ], [ "0", "0", "17", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0" ], [ "0", "0", "0", "17", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0" ], [ "0", "0", "0", "0", "36", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0" ], [ "0", "0", "0", "0", "0", "48", "0", "0", "0", "0", "0", "0", "0", "0", "0" ], [ "0", "0", "0", "0", "0", "0", "75", "0", "0", "0", "0", "0", "0", "0", "0" ], [ "0", "0", "0", "0", "0", "0", "0", "30", "0", "0", "0", "0", "0", "0", "0" ], [ "0", "0", "0", "0", "0", "0", "0", "0", "80", "0", "0", "0", "0", "0", "0" ], [ "0", "0", "0", "0", "0", "0", "0", "0", "0", "29", "0", "0", "0", "0", "0" ], [ "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "24", "0", "0", "0", "0" ], [ "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "6", "0", "0", "0" ], [ "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "29", "0", "0" ], [ "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "29", "0" ], [ "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "9" ] ] ] }, { "name": "LinearGuidance", "index": "12", "type": "numeric", "distinct": "352", "missing": "0", "min": "0", "max": "11", "mean": "5", "stdev": "2" }, { "name": "Corpus", "index": "22", "type": "nominal", "distinct": "1", "missing": "0", "distr": [ [ "1" ], [ [ "44", "27", "17", "17", "36", "48", "75", "30", "80", "29", "24", "6", "29", "29", "9" ] ] ] }, { "name": "Counter", "index": "21", "type": "numeric", "distinct": "500", "missing": "0", "identifier": "1", "min": "1", "max": "500", "mean": "251", "stdev": "144" }, { "name": "Genre", "index": "20", "type": "nominal", "distinct": "15", "missing": "0", "distr": [ [ "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15" ], [ [ "44", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0" ], [ "0", "27", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0" ], [ "0", "0", "17", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0" ], [ "0", "0", "0", "17", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0" ], [ "0", "0", "0", "0", "36", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0" ], [ "0", "0", "0", "0", "0", "48", "0", "0", "0", "0", "0", "0", "0", "0", "0" ], [ "0", "0", "0", "0", "0", "0", "75", "0", "0", "0", "0", "0", "0", "0", "0" ], [ "0", "0", "0", "0", "0", "0", "0", "30", "0", "0", "0", "0", "0", "0", "0" ], [ "0", "0", "0", "0", "0", "0", "0", "0", "80", "0", "0", "0", "0", "0", "0" ], [ "0", "0", "0", "0", "0", "0", "0", "0", "0", "29", "0", "0", "0", "0", "0" ], [ "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "24", "0", "0", "0", "0" ], [ "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "6", "0", "0", "0" ], [ "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "29", "0", "0" ], [ "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "29", "0" ], [ "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "9" ] ] ] }, { "name": "Text_Coverage", "index": "19", "type": "numeric", "distinct": "443", "missing": "0", "min": "17", "max": "43", "mean": "31", "stdev": "5" }, { "name": "ShiftingEvents", "index": "18", "type": "numeric", "distinct": "132", "missing": "0", "min": "0", "max": "2", "mean": "1", "stdev": "0" }, { "name": "TimeInterval", "index": "17", "type": "numeric", "distinct": "173", "missing": "0", "min": "0", "max": "5", "mean": "1", "stdev": "1" }, { "name": "PastEvents", "index": "16", "type": "numeric", "distinct": "283", "missing": "0", "min": "0", "max": "6", "mean": "2", "stdev": "1" }, { "name": "Motion", "index": "15", "type": "numeric", "distinct": "123", "missing": "0", "min": "0", "max": "2", "mean": "1", "stdev": "0" }, { "name": "SpaceInterval", "index": "14", "type": "numeric", "distinct": "228", "missing": "0", "min": "0", "max": "4", "mean": "1", "stdev": "1" }, { "name": "WordPicture", "index": "13", "type": "numeric", "distinct": "371", "missing": "0", "min": "1", "max": "15", "mean": "5", "stdev": "2" }, { "name": "Text", "index": "0", "type": "nominal", "distinct": "500", "missing": "0", "ignore": "1", "distr": [] }, { "name": "Notifying", "index": "11", "type": "numeric", "distinct": "218", "missing": "0", "min": "1", "max": "5", "mean": "3", "stdev": "1" }, { "name": "Interacting", "index": "10", "type": "numeric", "distinct": "160", "missing": "0", "min": "0", "max": "8", "mean": "1", "stdev": "1" }, { "name": "Direct_Activity", "index": "9", "type": "numeric", "distinct": "78", "missing": "0", "min": "0", "max": "3", "mean": "0", "stdev": "0" }, { "name": "Share_SocTies", "index": "8", "type": "numeric", "distinct": "260", "missing": "0", "min": "0", "max": "6", "mean": "2", "stdev": "1" }, { "name": "Reasoning", "index": "7", "type": "numeric", "distinct": "262", "missing": "0", "min": "1", "max": "6", "mean": "3", "stdev": "1" }, { "name": "ThinkBack", "index": "6", "type": "numeric", "distinct": "130", "missing": "0", "min": "0", "max": "2", "mean": "1", "stdev": "0" }, { "name": "ThinkAhead", "index": "5", "type": "numeric", "distinct": "172", "missing": "0", "min": "0", "max": "4", "mean": "1", "stdev": "0" }, { "name": "ThinkNegative", "index": "4", "type": "numeric", "distinct": "216", "missing": "0", "min": "0", "max": "4", "mean": "1", "stdev": "1" }, { "name": "ThinkPositive", "index": "3", "type": "numeric", "distinct": "154", "missing": "0", "min": "0", "max": "2", "mean": "1", "stdev": "0" }, { "name": "InnerThinking", "index": "2", "type": "numeric", "distinct": "262", "missing": "0", "min": "1", "max": "6", "mean": "3", "stdev": "1" }, { "name": "FirstPerson", "index": "1", "type": "numeric", "distinct": "139", "missing": "0", "min": "0", "max": "6", "mean": "1", "stdev": "1" } ], "nr_of_issues": 0, "nr_of_downvotes": 0, "nr_of_likes": 0, "nr_of_downloads": 0, "total_downloads": 0, "reach": 0, "reuse": 11, "impact_of_reuse": 0, "reach_of_reuse": 0, "impact": 11 }