@inproceedings{addf16e7be4b425a9fd22293f28704a2,
title = "On the organization of cluster voting with massive distributed streams",
abstract = "Data processing is one of the important challenges on Big Data. In this paper we investigate optimal processing algorithm for massive data streams, propose a new processing algorithm called multi-buffer based majority algorithm. The algorithm maintains time complexity of O(n) and selects prevalent elements of frequencies as low as 1\%. Our experiments indicate that multi-buffer based majority algorithm has improvements on both accuracy and efficiency. Moreover, we use multibuffer based algorithm to process data streams on single system and distributed system. These experiments indicate that using multi-buffer based algorithm can have better performance on distributed system. Moreover, we give explanations of the experiments' result and indicate several major factors which influence the result accuracy: stream size, element range in the stream, frequency of predominant elements and our buffer sets.",
keywords = "big data clusterization, cloud computing, majority algorithm, stream processing",
author = "Adi Alhudhaif and Tong Yan and Simon Berkovich",
note = "Publisher Copyright: {\textcopyright} 2014 IEEE.; 5th International Conference on Computing for Geospatial Research and Application, COM.Geo 2014 ; Conference date: 04-08-2014 Through 06-08-2014",
year = "2014",
month = sep,
day = "24",
doi = "10.1109/COM.Geo.2014.3",
language = "English",
series = "Proceedings - 5th International Conference on Computing for Geospatial Research and Application, COM.Geo 2014",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "55--62",
booktitle = "Proceedings - 5th International Conference on Computing for Geospatial Research and Application, COM.Geo 2014",
address = "United States",
}