{ "data_id": "44", "name": "collins", "exact_name": "collins", "version": 1, "version_label": null, "description": "**Author**: \n**Source**: Unknown - Date unknown \n**Please cite**: \n\nThe following are data used in an analysis of the Brown and Frown corpora for my doctoral dissertation titled ``Variations in Written English: Characterizing Authors' Rhetorical Language Choices Across Corpora of Published Texts\" (Completed at Carnegie Mellon Univ, 2003). The source of the corpora was the ICAME CD-ROM (get info at ).\n\nThe data were generated from the texts using tagging and visualization software, Docuscope.\n\nThe first row is the variable names. The genre of each text (assigned by the Brown corpus compilers) is in 'Genre' column and the corpus is listed in the 'corpus' column with 1=Brown and 2=Frown corpus.\n\nThe dataset may be freely used and distributed for non-commercial purposes.\n\nJeff Collins 11 July 2003\n\n\n\nInformation about the dataset\nCLASSTYPE: nominal\nCLASSINDEX: last", "format": "ARFF", "uploader": "Joaquin Vanschoren", "uploader_id": 2, "visibility": "public", "creator": null, "contributor": null, "date": "2014-09-28 23:51:43", "update_comment": "attribute counter is a row id", "last_update": "2015-04-15 17:08:50", "licence": "Public", "status": "active", "error_message": null, "url": "https:\/\/www.openml.org\/data\/download\/52590\/collins.arff", "default_target_attribute": "Corp.Genre", "row_id_attribute": "Counter", "ignore_attribute": "\"Text\"", "runs": 13, "suggest": { "input": [ "collins", "The following are data used in an analysis of the Brown and Frown corpora for my doctoral dissertation titled ``Variations in Written English: Characterizing Authors' Rhetorical Language Choices Across Corpora of Published Texts\" (Completed at Carnegie Mellon Univ, 2003). The source of the corpora was the ICAME CD-ROM (get info at ). The data were generated from the texts using tagging and visualization software, Docuscope. The first row is the variable names. The " ], "weight": 5 }, "qualities": { "NumberOfInstances": 500, "NumberOfFeatures": 22, "NumberOfClasses": 15, "NumberOfMissingValues": 0, "NumberOfInstancesWithMissingValues": 0, "NumberOfNumericFeatures": 19, "NumberOfSymbolicFeatures": 3, "REPTreeDepth3AUC": 1, "DecisionStumpAUC": 0.68609899478873393, "MaxAttributeEntropy": 3.648562076012777, "MinKurtosisOfNumericAtts": -0.56987984571389738, "Quartile2MeansOfNumericAtts": 1.32206, "REPTreeDepth3ErrRate": 0, "DecisionStumpErrRate": 0.73799999999999999, "MaxKurtosisOfNumericAtts": 29.624551273221311, "MinMeansOfNumericAtts": 0.28419999999999995, "Quartile2MutualInformation": 1.8242810380063901, "REPTreeDepth3Kappa": 1, "DecisionStumpKappa": 0.13972779028941518, "MaxMeansOfNumericAtts": 31.487179999999999, "MinMutualInformation": 0, "Quartile2SkewnessOfNumericAtts": 0.87299278531214519, "RandomTreeDepth1AUC": 0.77440233860097818, "Dimensionality": 0.043999999999999997, "MaxMutualInformation": 3.6485620760127802, "MinNominalAttDistinctValues": 1, "PercentageOfBinaryFeatures": 0, "Quartile2StdDevOfNumericAtts": 0.77065448696322636, "RandomTreeDepth1ErrRate": 0.40400000000000003, "EquivalentNumberOfAtts": 1.9999999999999982, "MaxNominalAttDistinctValues": 15, "MinSkewnessOfNumericAtts": -0.068259641912030863, "PercentageOfInstancesWithMissingValues": 0, "Quartile3AttributeEntropy": 3.648562076012777, "RandomTreeDepth1Kappa": 0.55521107656534874, "J48.00001.AUC": 1, "MaxSkewnessOfNumericAtts": 4.2204698259304827, "MinStdDevOfNumericAtts": 0.27275253966882085, "PercentageOfMissingValues": 0, "Quartile3KurtosisOfNumericAtts": 3.7036204365443157, "AutoCorrelation": 0.97194388777555107, "RandomTreeDepth2AUC": 0.77440233860097818, "J48.00001.ErrRate": 0, "MaxStdDevOfNumericAtts": 5.2087052776038192, "MinorityClassPercentage": 1.2, "PercentageOfNumericFeatures": 86.36363636363636, "Quartile3MeansOfNumericAtts": 2.6724200000000002, "CfsSubsetEval_DecisionStumpAUC": 1, "RandomTreeDepth2ErrRate": 0.40400000000000003, "J48.00001.Kappa": 1, "MeanAttributeEntropy": 1.8242810380063885, "MinorityClassSize": 6, "PercentageOfSymbolicFeatures": 13.636363636363635, "Quartile3MutualInformation": 3.6485620760127802, "CfsSubsetEval_DecisionStumpErrRate": 0, "CfsSubsetEval_DecisionStumpKappa": 1, "RandomTreeDepth2Kappa": 0.55521107656534874, "J48.0001.AUC": 1, "MeanKurtosisOfNumericAtts": 4.0309814260077887, "NaiveBayesAUC": 0.92196462598125328, "Quartile1AttributeEntropy": -0, "Quartile3SkewnessOfNumericAtts": 1.6103948341786611, "CfsSubsetEval_NaiveBayesAUC": 1, "RandomTreeDepth3AUC": 0.77440233860097818, "J48.0001.ErrRate": 0, "MeanMeansOfNumericAtts": 3.3144399999999998, "NaiveBayesErrRate": 0.35999999999999999, "Quartile1KurtosisOfNumericAtts": 0.31263763178831194, "Quartile3StdDevOfNumericAtts": 1.0102598415729449, "CfsSubsetEval_NaiveBayesErrRate": 0, "RandomTreeDepth3ErrRate": 0.40400000000000003, "J48.0001.Kappa": 1, "MeanMutualInformation": 1.8242810380063901, "NaiveBayesKappa": 0.60309060118543611, "Quartile1MeansOfNumericAtts": 0.63472000000000017, "REPTreeDepth1AUC": 1, "CfsSubsetEval_NaiveBayesKappa": 1, "RandomTreeDepth3Kappa": 0.55521107656534874, "J48.001.AUC": 1, "MeanNoiseToSignalRatio": -8.5201358896642474e-16, "NumberOfBinaryFeatures": 0, "Quartile1MutualInformation": 0, "REPTreeDepth1ErrRate": 0, "REPTreeDepth1Kappa": 1, "CfsSubsetEval_kNN1NAUC": 1, "StdvNominalAttDistinctValues": 8.0829037686547611, "J48.001.ErrRate": 0, "MeanNominalAttDistinctValues": 10.333333333333334, "Quartile1SkewnessOfNumericAtts": 0.54313450758050474, "REPTreeDepth2AUC": 1, "CfsSubsetEval_kNN1NErrRate": 0, "kNN1NAUC": 1, "J48.001.Kappa": 1, "MeanSkewnessOfNumericAtts": 1.21821335635969, "Quartile1StdDevOfNumericAtts": 0.41142614811251527, "REPTreeDepth2ErrRate": 0, "CfsSubsetEval_kNN1NKappa": 1, "kNN1NErrRate": 0, "MajorityClassPercentage": 16, "MeanStdDevOfNumericAtts": 1.0452797721655507, "Quartile2AttributeEntropy": 1.8242810380063885, "REPTreeDepth2Kappa": 1, "ClassEntropy": 3.648562076012777, "kNN1NKappa": 1, "MajorityClassSize": 80, "MinAttributeEntropy": -0, "Quartile2KurtosisOfNumericAtts": 0.7633424303002827 }, "tags": [ { "tag": "study_14", "uploader": "1" }, { "tag": "study_1", "uploader": "0" }, { "tag": "study_2838", "uploader": "0" }, { "tag": "study_4291", "uploader": "0" }, { "tag": "study_11301", "uploader": "0" }, { "tag": "study_11460", "uploader": "0" }, { "tag": "study_16032", "uploader": "0" }, { "tag": "study_1715", "uploader": "0" }, { "tag": "study_7101", "uploader": "0" }, { "tag": "study_11266", "uploader": "0" }, { "tag": "study_11926", "uploader": "0" }, { "tag": "study_12664", "uploader": "0" }, { "tag": "study_12848", "uploader": "0" }, { "tag": "study_13025", "uploader": "0" }, { "tag": "study_14371", "uploader": "0" }, { "tag": "study_17384", "uploader": "0" }, { "tag": "study_17392", "uploader": "0" }, { "tag": "study_1782", "uploader": "0" }, { "tag": "study_3097", "uploader": "0" }, { "tag": "study_3858", "uploader": "0" }, { "tag": "study_13152", "uploader": "0" }, { "tag": "study_13624", "uploader": "0" }, { "tag": "study_1299", "uploader": "0" }, { "tag": "study_1893", "uploader": "0" }, { "tag": "study_2079", "uploader": "0" }, { "tag": "study_2794", "uploader": "0" }, { "tag": "study_2907", "uploader": "0" }, { "tag": "study_3367", "uploader": "0" }, { "tag": "study_4992", "uploader": "0" }, { "tag": "study_6416", "uploader": "0" }, { "tag": "study_6936", "uploader": "0" }, { "tag": "study_14029", "uploader": "0" }, { "tag": "study_668", "uploader": "0" }, { "tag": "study_4677", "uploader": "0" }, { "tag": "study_4910", "uploader": "0" }, { "tag": "study_14015", "uploader": "0" }, { "tag": "study_15119", "uploader": "0" }, { "tag": "study_15703", "uploader": "0" }, { "tag": "study_638", "uploader": "0" }, { "tag": "study_3858", "uploader": "0" }, { "tag": "study_4992", "uploader": "0" }, { "tag": "study_6071", "uploader": "0" }, { "tag": "study_6561", "uploader": "0" }, { "tag": "study_12215", "uploader": "0" }, { "tag": "study_13042", "uploader": "0" }, { "tag": "study_14015", "uploader": "0" }, { "tag": "study_1011", "uploader": "0" }, { "tag": "study_3858", "uploader": "0" }, { "tag": "study_3920", "uploader": "0" }, { "tag": "study_10375", "uploader": "0" }, { "tag": "study_11791", "uploader": "0" }, { "tag": "study_13775", "uploader": "0" }, { "tag": "study_2728", "uploader": "0" }, { "tag": "study_2907", "uploader": "0" }, { "tag": "study_3051", "uploader": "0" }, { "tag": "study_3524", "uploader": "0" }, { "tag": "study_13907", "uploader": "0" }, { "tag": "study_14572", "uploader": "0" }, { "tag": "study_16365", "uploader": "0" }, { "tag": "study_6835", "uploader": "0" }, { "tag": "study_11370", "uploader": "0" }, { "tag": "study_12660", "uploader": "0" }, { "tag": "study_308", "uploader": "0" }, { "tag": "study_5093", "uploader": "0" }, { "tag": "study_17174", "uploader": "0" }, { "tag": "study_13907", "uploader": "0" }, { "tag": "study_14289", "uploader": "0" }, { "tag": "study_14909", "uploader": "0" }, { "tag": "study_2600", "uploader": "0" }, { "tag": "study_3239", "uploader": "0" }, { "tag": "study_3368", "uploader": "0" }, { "tag": "study_3941", "uploader": "0" }, { "tag": "study_4949", "uploader": "0" }, { "tag": "study_6669", "uploader": "0" }, { "tag": "study_6836", "uploader": "0" }, { "tag": "study_6862", "uploader": "0" }, { "tag": "study_6998", "uploader": "0" }, { "tag": "study_7102", "uploader": "0" }, { "tag": "study_7491", "uploader": "0" }, { "tag": "study_15850", "uploader": "0" }, { "tag": "study_16256", "uploader": "0" } ], "features": [ { "name": "Corp.Genre", "index": "23", "type": "nominal", "distinct": "15", "missing": "0", "target": "1", "distr": [ [ "101", "102", "103", "104", "105", "106", "107", "108", "109", "110", "111", "112", "113", "114", "115" ], [ [ "44", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0" ], [ "0", "27", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0" ], [ "0", "0", "17", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0" ], [ "0", "0", "0", "17", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0" ], [ "0", "0", "0", "0", "36", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0" ], [ "0", "0", "0", "0", "0", "48", "0", "0", "0", "0", "0", "0", "0", "0", "0" ], [ "0", "0", "0", "0", "0", "0", "75", "0", "0", "0", "0", "0", "0", "0", "0" ], [ "0", "0", "0", "0", "0", "0", "0", "30", "0", "0", "0", "0", "0", "0", "0" ], [ "0", "0", "0", "0", "0", "0", "0", "0", "80", "0", "0", "0", "0", "0", "0" ], [ "0", "0", "0", "0", "0", "0", "0", "0", "0", "29", "0", "0", "0", "0", "0" ], [ "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "24", "0", "0", "0", "0" ], [ "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "6", "0", "0", "0" ], [ "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "29", "0", "0" ], [ "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "29", "0" ], [ "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "9" ] ] ] }, { "name": "Text", "index": "0", "type": "nominal", "distinct": "500", "missing": "0", "ignore": "1", "distr": [] }, { "name": "FirstPerson", "index": "1", "type": "numeric", "distinct": "139", "missing": "0", "min": "0", "max": "6", "mean": "1", "stdev": "1" }, { "name": "InnerThinking", "index": "2", "type": "numeric", "distinct": "262", "missing": "0", "min": "1", "max": "6", "mean": "3", "stdev": "1" }, { "name": "ThinkPositive", "index": "3", "type": "numeric", "distinct": "154", "missing": "0", "min": "0", "max": "2", "mean": "1", "stdev": "0" }, { "name": "ThinkNegative", "index": "4", "type": "numeric", "distinct": "216", "missing": "0", "min": "0", "max": "4", "mean": "1", "stdev": "1" }, { "name": "ThinkAhead", "index": "5", "type": "numeric", "distinct": "172", "missing": "0", "min": "0", "max": "4", "mean": "1", "stdev": "0" }, { "name": "ThinkBack", "index": "6", "type": "numeric", "distinct": "130", "missing": "0", "min": "0", "max": "2", "mean": "1", "stdev": "0" }, { "name": "Reasoning", "index": "7", "type": "numeric", "distinct": "262", "missing": "0", "min": "1", "max": "6", "mean": "3", "stdev": "1" }, { "name": "Share_SocTies", "index": "8", "type": "numeric", "distinct": "260", "missing": "0", "min": "0", "max": "6", "mean": "2", "stdev": "1" }, { "name": "Direct_Activity", "index": "9", "type": "numeric", "distinct": "78", "missing": "0", "min": "0", "max": "3", "mean": "0", "stdev": "0" }, { "name": "Interacting", "index": "10", "type": "numeric", "distinct": "160", "missing": "0", "min": "0", "max": "8", "mean": "1", "stdev": "1" }, { "name": "Notifying", "index": "11", "type": "numeric", "distinct": "218", "missing": "0", "min": "1", "max": "5", "mean": "3", "stdev": "1" }, { "name": "LinearGuidance", "index": "12", "type": "numeric", "distinct": "352", "missing": "0", "min": "0", "max": "11", "mean": "5", "stdev": "2" }, { "name": "WordPicture", "index": "13", "type": "numeric", "distinct": "371", "missing": "0", "min": "1", "max": "15", "mean": "5", "stdev": "2" }, { "name": "SpaceInterval", "index": "14", "type": "numeric", "distinct": "228", "missing": "0", "min": "0", "max": "4", "mean": "1", "stdev": "1" }, { "name": "Motion", "index": "15", "type": "numeric", "distinct": "123", "missing": "0", "min": "0", "max": "2", "mean": "1", "stdev": "0" }, { "name": "PastEvents", "index": "16", "type": "numeric", "distinct": "283", "missing": "0", "min": "0", "max": "6", "mean": "2", "stdev": "1" }, { "name": "TimeInterval", "index": "17", "type": "numeric", "distinct": "173", "missing": "0", "min": "0", "max": "5", "mean": "1", "stdev": "1" }, { "name": "ShiftingEvents", "index": "18", "type": "numeric", "distinct": "132", "missing": "0", "min": "0", "max": "2", "mean": "1", "stdev": "0" }, { "name": "Text_Coverage", "index": "19", "type": "numeric", "distinct": "443", "missing": "0", "min": "17", "max": "43", "mean": "31", "stdev": "5" }, { "name": "Genre", "index": "20", "type": "nominal", "distinct": "15", "missing": "0", "distr": [ [ "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15" ], [ [ "44", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0" ], [ "0", "27", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0" ], [ "0", "0", "17", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0" ], [ "0", "0", "0", "17", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0" ], [ "0", "0", "0", "0", "36", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0" ], [ "0", "0", "0", "0", "0", "48", "0", "0", "0", "0", "0", "0", "0", "0", "0" ], [ "0", "0", "0", "0", "0", "0", "75", "0", "0", "0", "0", "0", "0", "0", "0" ], [ "0", "0", "0", "0", "0", "0", "0", "30", "0", "0", "0", "0", "0", "0", "0" ], [ "0", "0", "0", "0", "0", "0", "0", "0", "80", "0", "0", "0", "0", "0", "0" ], [ "0", "0", "0", "0", "0", "0", "0", "0", "0", "29", "0", "0", "0", "0", "0" ], [ "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "24", "0", "0", "0", "0" ], [ "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "6", "0", "0", "0" ], [ "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "29", "0", "0" ], [ "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "29", "0" ], [ "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "9" ] ] ] }, { "name": "Counter", "index": "21", "type": "numeric", "distinct": "500", "missing": "0", "identifier": "1", "min": "1", "max": "500", "mean": "251", "stdev": "144" }, { "name": "Corpus", "index": "22", "type": "nominal", "distinct": "1", "missing": "0", "distr": [ [ "1" ], [ [ "44", "27", "17", "17", "36", "48", "75", "30", "80", "29", "24", "6", "29", "29", "9" ] ] ] } ], "nr_of_issues": 0, "nr_of_downvotes": 0, "nr_of_likes": 0, "nr_of_downloads": 0, "total_downloads": 0, "reach": 0, "reuse": 11, "impact_of_reuse": 0, "reach_of_reuse": 0, "impact": 11 }