{ "data_id": "59", "name": "Internet-Advertisements", "exact_name": "Internet-Advertisements", "version": 1, "version_label": null, "description": "**Author**: Nicholas Kushmerick \n**Source**: [UCI](http:\/\/archive.ics.uci.edu\/ml\/datasets\/Internet+Advertisements) - 1998 \n**Please cite**: \n\nThis dataset represents a set of possible advertisements on Internet pages. The features encode the geometry of the image (if available) as well as phrases occurring in the URL, the image's URL and alt text, the anchor text, and words occurring near the anchor text. The task is to predict whether an image is an advertisement (\"ad\") or not (\"nonad\").\n\nRelevant Papers: N. Kushmerick (1999). \"Learning to remove Internet advertisements\", 3rd Int Conf Autonomous Agents. \nAvailable at: http:\/\/rexa.info\/paper\/2fdc1cee89b7f4f2c9227d6f5d9b05d22c5ab3e9", "format": "ARFF", "uploader": "Joaquin Vanschoren", "uploader_id": 2, "visibility": "public", "creator": "\"Nicholas Kushmerick\"", "contributor": null, "date": "2014-10-30 11:15:44", "update_comment": null, "last_update": "2014-10-30 11:15:44", "licence": "Public", "status": "active", "error_message": "Problem validating uploaded description file: XML does not correspond to XSD schema. Error Element '{http:\/\/openml.org\/openml}name': [facet 'pattern'] The value 'origurl*target+®ion' is not accepted by the pattern '\\p{IsBasicLatin}*'.\n on line 8030 column 0. Error Element '{http:\/\/openml.org\/openml}name': [facet 'pattern'] The value 'origurl*®ion+0' is not accepted by the pattern '\\p{IsBasicLatin}*'.\n on line 9067 column 0. Error Element '{http:\/\/openml.org\/openml}name': [facet 'pattern'] The value 'origurl*®ion' is not accepted by the pattern '\\p{IsBasicLatin}*'.\n on line 16241 column 0. ,Problem validating uploaded description file: XML does not correspond to XSD schema. Error Element '{http:\/\/openml.org\/openml}name': [facet 'pattern'] The value 'origurl*target+®ion' is not accepted by the pattern '\\p{IsBasicLatin}*'.\n on line 8030 column 0. Error Element '{http:\/\/openml.org\/openml}name': [facet 'pattern'] The value 'origurl*®ion+0' is not accepted by the pattern '\\p{", "url": "https:\/\/www.openml.org\/data\/download\/116567\/phpCzcrGG", "default_target_attribute": "class", "row_id_attribute": null, "ignore_attribute": null, "runs": 0, "suggest": { "input": [ "Internet-Advertisements", "This dataset represents a set of possible advertisements on Internet pages. The features encode the geometry of the image (if available) as well as phrases occurring in the URL, the image's URL and alt text, the anchor text, and words occurring near the anchor text. The task is to predict whether an image is an advertisement (\"ad\") or not (\"nonad\"). Relevant Papers: N. Kushmerick (1999). \"Learning to remove Internet advertisements\", 3rd Int Conf Autonomous Agents. Available at: http:\/\/rexa.info\/ " ], "weight": 5 }, "qualities": [], "tags": [ { "tag": "study_14", "uploader": "1" }, { "tag": "study_1", "uploader": "0" }, { "tag": "study_463", "uploader": "0" }, { "tag": "study_263", "uploader": "0" }, { "tag": "study_793", "uploader": "0" }, { "tag": "study_463", "uploader": "0" }, { "tag": "study_106", "uploader": "0" }, { "tag": "study_328", "uploader": "0" } ], "features": [], "nr_of_issues": 0, "nr_of_downvotes": 0, "nr_of_likes": 0, "nr_of_downloads": 0, "total_downloads": 0, "reach": 0, "reuse": 10, "impact_of_reuse": 0, "reach_of_reuse": 0, "impact": 10 }