Saturday, July 16, 2011

Performance - Functors vs Functions

I recently finished Scott Meyers' book Effective STL. In Item 46 (Consider function objects instead of functions as algorithm parameters) he provides several reasons why one should use function objects (functors) over functions when using algorithms in the STT, the one point that i wanted to test out was the performance aspect. Scott claims that functors perform 50% faster in the worst case and in the best case 160% faster than functions.

Scotts Test Case
His test case involves calling the std::sort() function on a vector of 1 million random doubles. The comparison function used for the sort method is greater() for the functor case and a function that returns left > right for the function case. He describes this at a high level with a little bit of code. Here is my full implementation of his test case:

#include <stdio.h> //printf
#include <iostream> //cout, endl
#include <time.h> //timespec, clock_gettime(), CLOCK_PROCESS_CPUTIME_ID
#include <vector> //vector
#include <algorithm> //generate, sort
#include <functional> //binary_function, greater

using namespace std;

class RandomDoubleGenerator {
public:
    double operator()() {
        return (double)rand()/(double)rand();
    }
};

class TimespecAverage : public unary_function<struct timespec, void> {
public:
    TimespecAverage() : numTimespecs(0) {
    }
    
    void operator()(struct timespec t) {
        ++numTimespecs;
        sum += t.tv_sec;
        sum += t.tv_nsec * 0.000000001; // nano is 1x10^-9 seconds
    }
    
    double average() const {
        return sum / numTimespecs;
    }
    
private:
    double numTimespecs;
    double sum;
};

timespec diff(struct timespec start, struct timespec end) {
    struct timespec diff;
    if ((end.tv_nsec - start.tv_nsec) < 0) { //compensate for something like: start - 12.9, end 13.1
        diff.tv_sec = end.tv_sec - start.tv_sec - 1;
        diff.tv_nsec = 1000000000 + end.tv_nsec - start.tv_nsec;
    } else {
        diff.tv_sec = end.tv_sec - start.tv_sec;
        diff.tv_nsec = end.tv_nsec - start.tv_nsec;
    }
    return diff;
}

timespec time(void (*functionToTime)(vector<double>&), vector<double>& data) {
    struct timespec start;
    struct timespec stop;
    clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &start);
    functionToTime(data);
    clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &stop);
    return diff(start, stop);
}

void printElapsed(struct timespec functor, struct timespec function) {
    printf("%li.%.9li    %li.%.9li \n", functor.tv_sec, functor.tv_nsec,
            function.tv_sec, function.tv_nsec);
}

void printAverages(vector<timespec>& functorTimes, vector<timespec>& functionTimes) {
    TimespecAverage functorAverage = for_each(functorTimes.begin(), functorTimes.end(), TimespecAverage());
    TimespecAverage functionAverage = for_each(functionTimes.begin(), functionTimes.end(), TimespecAverage());
    cout << endl << "Average" << endl;
    printf("%1.9f    %1.9f \n", functorAverage.average(), functionAverage.average());
}

bool greaterThan(double left, double right) {
    return left > right;
}

void sortFunctor(vector<double>& data) {
    sort(data.begin(), data.end(), greater<double>());
}

void sortFunction(vector<double>& data) {
    sort(data.begin(), data.end(), greaterThan);
}

int main(int argc, char** argv) {
    const int LOOPS = 10;
    const int NUM_DATA = 10000000; //10million
    
    vector<timespec> functorTimes; 
    vector<timespec> functionTimes;
    functorTimes.reserve(LOOPS);
    functionTimes.reserve(LOOPS);

    /* timer */
    cout << "Functor        Function" << endl;
    for (int i = 0; i < LOOPS; ++i) {
        vector<double> functorData(NUM_DATA);
        generate(functorData.begin(), functorData.end(), RandomDoubleGenerator());
        //create a copy of our random data vector for use with our function sort()
        //this way both methods are using the exact same data set
        vector<double> functionData(functorData);

        /* functor */
        struct timespec functorElapsed = time(sortFunctor, functorData);
        functorTimes.push_back(functorElapsed);
        
        /* function */        
        struct timespec functionElapsed = time(sortFunction, functionData);
        functionTimes.push_back(functionElapsed);
        
        printElapsed(functorElapsed, functionElapsed);
    }
    
    printAverages(functorTimes, functionTimes);    
    return 0;
}

Worst Case
I compiled this code using g++ 4.5.2 and the following compiler options (no optimizations):
g++ -lrt -ansi -pedantic -g -Wall

Below are my results (in seconds):

Functor        Function
7.439764495    7.755845935 
7.735696621    7.713312266 
7.587360216    7.501062211 
7.576613316    8.058204506 
7.539563825    7.602603278 
7.471405721    7.776704292 
7.750183958    7.940997362 
7.721883637    7.679431332 
7.470727771    7.681166662 
7.588803904    7.776486039 


Average
7.588200346    7.748581388

As you can see the function and functor sort times are almost equal (functors are only 2% faster) and do not line up with the 50% worst case that Scott claims.

Best Case
I compiled the code again this time turning on full optimizations:
g++ -lrt -ansi -pedantic -O3 -Wall

Below are my results (in seconds):
Functor        Function
1.530773436    2.230420939
1.487986836    2.240578317
1.576760542    2.280175380
1.521323717    2.334807950
1.499050688    2.203657561
1.518878620    2.272559906
1.581513080    2.212788678
1.563577081    2.393493630
1.616584657    2.313904654
1.581532295    2.291148088

Average
1.547798095    2.277353510


As you can see the call to std::sort() where you pass a regular function as the comparator is on average 0.729555415 seconds (47.1%) slower than the call to std::sort() using the functor as the comparator. This fairly close to what Scott claimed in his book as the worst case.

As Scott explains in his book, the reason for the performance increase is due to inlining. The compiler is able to inline the comparator functor but it is unable to inline the comparator function. This is because when you give the comparator function to sort() you are really giving a pointer to that function and the compiler is unable to inline the invokation of the function pointer. Where in the functor case the compiler will happily inline the operater() function in the functor and since you are passing the functor by value to sort() there is no pointer dereferencing.

Conclusion
I was unable to produce Scott's best case where functors performed 160% faster than their function counterparts, however i was able to produce an increase of 50% by simply using a functor over a function. Either way, there is no doubt that functors do produce more performant code than their function counterparts, but compiler optimizations need to be turned on in order to notice any real difference.

Tuesday, July 12, 2011

Howto store java.lang.Class objects in db4o

I ran into an interesting problem when querying our db4o NoSQL database the other day. I had an object with a java.lang.Class as one of it's fields. Every time i queried for this object the Class field was set to null. Below is an example of what my model class looked like:
public class MessageServicePreference {
    private final UUID userId;
    private String serviceName;
    private Class<?> serviceClass;

    public MessageServicePreference(UUID userId, MessageService service) {
        this.userId = userId;
        this.serviceName = service.getName();
        this.serviceClass = service.getClass(); //gets the actual implementation class, not MessagingService.class
    }

    public UUID getUserId() {
        return userId;
    }

    public String getServiceName() {
        return serviceName;
    }

    public Class<?> getServiceClass() {
        return serviceClass;
    }

    public void setService(MessageService service) {
        serviceName = service.getName();
        serviceClass = service.getClass();
    }
}

So, like i said when i queried for this class using db4o:
public MessageServicePreference getPreferenceFor(User user) {
    MessageServicePreference preference = null;
    ObjectContainer db = DatabaseConnectionPool.aquireConnection();
    try {
        Query query = db.query();
        query.constrain(MessageServicePreference.class);
        query.descend("userId").constrain(user.getId());
        List<MessageServicePreference> preferences = query.execute();
        if (!preferences.isEmpty()) {
            preference = preferences.get(0);
        }
        return preference;
    } finally {
        DatabaseConnectionPool.releaseConnection(db);
    }
}

This query would return a MessageServicePreference, but when i called .getServiceClass() it would return null. So, by inspecting the java.lang.Class source code i found that all of the fields in the source were either static or transient, and db4o doesn't persist static or transient fields. Then it i found the method java.lang.Class.forName(String className) and came up with the following solution:

public class MessageServicePreference {
    private final UUID userId;
    private String serviceName;
    private String serviceClassName;

    public MessageServicePreference(UUID userId, MessageService service) {
        this.userId = userId;
        this.serviceName = service.getName();
        this.serviceClassName = service.getClass().getName(); //gets the actual implementation class's fully qualified name (package.to.class.MessageServiceImpl)
    }

    public UUID getUserId() {
        return userId;
    }

    public String getServiceName() {
        return serviceName;
    }

    public Class<?> getServiceClass() {
        return Class.forName(serviceClassName);
    }

    public void setService(MessageService service) {
        serviceName = service.getName();
        serviceClassName = service.getClass().getName();
    }
}

This new model class saves all of it's data properly in db4o and when queried populates the serviceClassName field correctly, which in turn makes the getServiceClass() method work as expected.

Howto map UUID's using the Dozer library

In our new server module we are using an automatic object mapping library called
Dozer. When trying to map an object with a java.util.UUID field i kept receiving the following exception:

Jul 12, 2011 5:18:01 PM org.dozer.MappingProcessor mapField
SEVERE: Field mapping error -->
  MapId: null
  Type: null
  Source parent class: path.to.some.class.TreeNode
  Source field name: parentUuid
  Source field type: class java.util.UUID
  Source field value: 7ed46d06-e715-448b-ae69-54123d4e2c82
  Dest parent class: path.to.some.class.TreeNode
  Dest field name: parentUuid
  Dest field type: java.util.UUID
org.dozer.MappingException: java.lang.NoSuchMethodException: java.util.UUID.<init>()
 at org.dozer.util.MappingUtils.throwMappingException(MappingUtils.java:82)
 at org.dozer.factory.ConstructionStrategies$ByConstructor.newInstance(ConstructionStrategies.java:261)
 at org.dozer.factory.ConstructionStrategies$ByConstructor.create(ConstructionStrategies.java:245)
 at org.dozer.factory.DestBeanCreator.create(DestBeanCreator.java:65)
 at org.dozer.MappingProcessor.mapCustomObject(MappingProcessor.java:477)
 at org.dozer.MappingProcessor.mapOrRecurseObject(MappingProcessor.java:434)
 at org.dozer.MappingProcessor.mapFromFieldMap(MappingProcessor.java:330)
 at org.dozer.MappingProcessor.mapField(MappingProcessor.java:276)
 at org.dozer.MappingProcessor.map(MappingProcessor.java:245)
 at org.dozer.MappingProcessor.mapCustomObject(MappingProcessor.java:483)
 at org.dozer.MappingProcessor.mapOrRecurseObject(MappingProcessor.java:434)
 at org.dozer.MappingProcessor.addToSet(MappingProcessor.java:703)
 at org.dozer.MappingProcessor.mapCollection(MappingProcessor.java:545)
 at org.dozer.MappingProcessor.mapOrRecurseObject(MappingProcessor.java:422)
 at org.dozer.MappingProcessor.mapFromFieldMap(MappingProcessor.java:330)
 at org.dozer.MappingProcessor.mapField(MappingProcessor.java:276)
 at org.dozer.MappingProcessor.map(MappingProcessor.java:245)
 at org.dozer.MappingProcessor.map(MappingProcessor.java:187)
 at org.dozer.MappingProcessor.map(MappingProcessor.java:133)
 at org.dozer.MappingProcessor.map(MappingProcessor.java:128)
 at org.dozer.DozerBeanMapper.map(DozerBeanMapper.java:118)
 // a bunch of my code
Caused by: java.lang.NoSuchMethodException: java.util.UUID.<init>()
 at java.lang.Class.getConstructor0(Class.java:2723)
 at java.lang.Class.getDeclaredConstructor(Class.java:2002)
 at org.dozer.factory.ConstructionStrategies$ByConstructor.newInstance(ConstructionStrategies.java:257)
 ... 34 more

My object that i'm trying to map is part of a tree structure, each node has a parent that he cares about, if the parent is null then that object is a root in the tree (there can be any number of roots). My tree node looks something like:

public class TreeNode {

   private final UUID id; //the id of this node
   private String name;
   private UUID parentId; //id of the parent, or null if there is no parent

   public TreeNode() {
       this(UUID.randomUUID(), "");
   }

   public TreeNode(UUID id, String name) { //used if we dont have a parent
       this(UUID.randomUUID(), "", null);
   }

   public TreeNode(UUID id, String name, UUID parentId) {
       this.id = id;
       this.name = name;
       this.parentId = parentId;
   }

   public UUID getId() {
       return id;
   }
   
   public String getName() {
       return name;
   }

   public void setName(String name) {
       this.name = name;
   }

   public UUID getParentId() {
       return parentId;
   }

   public void setParentId(UUID parentId) {
       this.parentId = parentId;
   }
}

Now when the DozerBeanMapper.map() function tries to map an object it tries to perform a "deep" copy, below is a rough algorithm outline of how it works.
public void map(Source to Destination) {
    for each field in Source {
        if Source.field is a primitive {
            Destination.field = Source.field;
        } else if Source.field is null {
            Destination.field = new field(); //does this using reflection (Class.newInstance())
            map(Source.field to Destination.field);
        } else {
            map(Source.field to Destination.field);
        }
    }
}


Now, at the bottom of my exception you can see the line "Caused by: java.lang.NoSuchMethodException: java.util.UUID.()" which is java's way of saying "This class doesn't a default constructor" which in java.util.UUID's case is completely true. However, java.util.UUID does have a non-default constructor with the signiture:

UUID(long mostSigBits, long leastSigBits) 

So, how do you tell Dozer to use this non-default constructor only for java.util.UUID's? Simple, you need to create an org.dozer.BeanFactory and tell Dozer to use this BeanFactory to create new destination objects instead of calling the default constructor. There are two ways to do this: XML and via the API, i will cover both.

Bean Factory
First off we need to create our BeanFactory that will be used to create our new UUID objects:
package path.to.my.bean.factory;

import java.util.UUID;
import org.dozer.BeanFactory;

public class UuidBeanFactory implements BeanFactory {

    public Object createBean(Object sourceBean, Class<?> destinationType, String mapId) {
        if (sourceBean == null) {
            return null;
        }
        UUID source = (UUID) sourceBean;
        UUID destination = new UUID(source.getMostSignificantBits(), source.getLeastSignificantBits());
        return destination;
    }
}

What this BeanFactory does under the hood is to call the non-default constructor on java.util.UUID using the information from our already constructed UUID in order to copy the fields. Since there are only two fields that matter on a UUID, mostSigBits and leastSigBits we can simply pass those values into the non-default constructor and BAM we have our copied UUID object.

Now i'll show you how to tell Dozer to use this BeanFactory when creating new instances of UUID.

XML
We need to create an XML file which contains our Dozer mapping configuration, in my case i called it "mappings.xml".
<?xml version="1.0" encoding="UTF-8"?>
<mappings xmlns="http://dozer.sourceforge.net"
          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
          xsi:schemaLocation="http://dozer.sourceforge.net
          http://dozer.sourceforge.net/schema/beanmapping.xsd">
    <mapping type="one-way">
        <class-a>java.util.UUID</class-a>
        <class-b bean-factory="path.to.my.bean.factory.UuidBeanFactory">java.util.UUID</class-b>
    </mapping> 
</mappings>

This file is telling Dozer that when it sees the source as java.util.UUID and the destination as a java.util.UUID to use the path.to.my.bean.factory.UuidBeanFactory to create the destination object.

Now this XML file needs to be put somewhere on the classpath, i put mine in my src/ folder and by default when i build everything in src/ gets copied into build/classes which is automatically put on my classpath. If you don't have that luxury you can specify the classpath on the command line when running java by passing in the -cp option, see this link for a slew of ways to set your classpath .

When Dozer loads we now need to tell it to load this custom mappings file, we do this manually when creating the new instance of the DozerBeanMapper by calling setMappingFiles() and giving it our "mappings.xml" file name:
import java.util.UUID;
import org.dozer.DozerBeanMapper;

public class MapUtils {
    
    /* DozerMapperBean is thread safe so no locking */
    private static final DozerBeanMapper 
    
    static {
        MAPPER = new DozerBeanMapper();
        MAPPER.setMappingFiles(Collections.singletonList("mappings.xml"));
    }
    
    private MapUtils() {
    }
    
    public static <T> void map(T source, T destination) {
        MAPPER.map(source, destination);
    }
}

Now you should have a fully functioning UUID mapper!

API
Alternatively we can use the API to specify that Dozer should use path.to.my.bean.factory.UuidBeanFactory to create the destination UUID objects...
import java.util.UUID;
import org.dozer.DozerBeanMapper;
import org.dozer.loader.api.BeanMappingBuilder;
import org.dozer.loader.api.TypeMappingOptions;
import path.to.my.bean.factory.UuidBeanFactory;

public class MapUtils {
    
    /* DozerMapperBean is thread safe so no locking */
    private static final DozerBeanMapper MAPPER;
    
    static {
        MAPPER = new DozerBeanMapper();
        BeanMappingBuilder builder = new BeanMappingBuilder() {
            protected void configure() {
                mapping(UUID.class, UUID.class, TypeMappingOptions.oneWay(), TypeMappingOptions.beanFactory(UuidBeanFactory.class.getName()));
            }
        };
        MAPPER.addMapping(builder);
    }
    
    private MapUtils() {
    }
    
    public static <T> void map(T source, T destination) {
        MAPPER.map(source, destination);
    }
}

Hope this helps someone else!

Saturday, July 9, 2011

Event Dispatching and Fail-Fast Iterators

In Java Event Dispatching seems simple on the surface, but there are a few details that developers need to be aware of when implementing their own systems.

First lets design a simple event system which consists of a few parts:
  • Data - the data which someone cares about
  • Event - A signal from a dispatcher to a listener signifying that some data has changed
  • Listener - An object which cares about some data and wants to get events about when this data changes
  • Dispatcher - The object which has some data that can be listened to. The dispatcher is also responsible for sending events to listeners when data changes
public class Data {
    private String name;
   
    public Data(String name) {
        this.name = name;
    }

    public String getName() {
        return name;
    }

    public void setName(String name) {
        this.name = name;
    }
}
public class DataEvent {

    private final DataDispatcher source;

    public DataEvent(DataDispatcher source) {
        this.source = source;
    }

    public DataDispatcher getSource() {
        return source;
    }
}
public interface DataListener {

    public void dataChanged(DataEvent event);
}
/*
 * Implementation of our DataListener
 */
public class SomeoneWhoCares implements DataListener {

    public SomeoneWhoCares() {
    }

    public void dataChanged(DataEvent event) {
        DataDispatcher dispatcher = event.getSource();
        for (Data data : dispatcher.getData()) {
            System.out.println("Data name: " + data.getName());
        }
        dispatcher.removeDataListener(this);
    }
}
import java.util.ArrayList;
import java.util.List;

public class DataDispatcher {

    private final List<DataListener> listeners;
    private final List<Data> dataList;

    public DataDispatcher() {
        listeners = new ArrayList<DataListener>();
        dataList = new ArrayList<Data>();
    }

    public void addDataListener(DataListener listener) {
        listeners.add(listener);
    }

    public void removeDataListener(DataListener listener) {
        listeners.remove(listener);
    }

    protected void fireDataEvent(DataEvent event) {
        for (DataListener listener : listeners) {
            listener.dataChanged(event);
        }
    }
    
    public List<Data> getData() {
        return dataList;
    }

    public void addData(Data data) {
        dataList.add(data);
        fireDataEvent(new DataEvent(this)); //tell the listeners that the Data has changed
    }

    public static void main(String[] args) {
        DataDispatcher dispatcher = new DataDispatcher();
        SomeoneWhoCares someoneWhoCares = new SomeoneWhoCares();
        dispatcher.addDataListener(someoneWhoCares); //someoneWhoCares will not get events when DataDispatcher changes
         
        //create some data
        Data john = new Data("John Smith", 12);
        Data bob = new Data("Bob Thompson", 8);
        Data adam = new Data("Adam Wells", 3);

        //add the data to the Dispatcher which causes DataEvents to be fired
        dispatcher.addData(john);
        dispatcher.addData(bob);
        dispatcher.addData(adam);
    }
}
This code looks simple and correct on the surface, however there is a fundamental problem which causes the following exception to be thrown during runtime:
Exception in thread "main" java.util.ConcurrentModificationException
 at java.util.ArrayList$Itr.checkForComodification(ArrayList.java:782)
 at java.util.ArrayList$Itr.next(ArrayList.java:754)
 at DataDispatcher.fireDataEvent(DataDispatcher.java:23)
 at DataDispatcher.addData(DataDispatcher.java:34)
 at DataDispatcher.main(DataDispatcher.java:48)

This exception is not very helpful, if you look at where the exception is throw from my code (at DataDispatcher.fireDataEvent(DataDispatcher.java:23)) this is the for loop of the fireDataEvent() method which doesn't make much sense at the first look. Lets focus on the event dispatching and listening code, below is the snippet in focus:

public class SomeoneWhoCares implements DataListener {
...
    public void dataChanged(DataEvent event) {
        DataDispatcher dispatcher = event.getSource();
        for (Data data : dispatcher.getData()) {
            System.out.println("Data name: " + data.getName());
        }
        dispatcher.removeDataListener(this);
    }
}

public class DataDispatcher {

    private final List<DataListener> listeners;
...

    public void removeDataListener(DataListener listener) {
        listeners.remove(listener);
    }

    protected void fireDataEvent(DataEvent event) {
        for (DataListener listener : listeners) {
            listener.dataChanged(event);
        }
    }
...
}

Now SomeoneWhoCares is the only listener registered for DataEvent's on the DataDispatcher, so when fireDataEvent() is called he is the only one gets called, so it must be something he is doing that is causing the exception to happen. If you notice at the end of SomeoneWhoCares.dataChanged() the listener unregister's himself from the DataDispatcher which modifies the listeners list inside of DataDispatcher. But, why does this throw an exception?

To answer this question we need to understand what the for-each loop in java actually means. When a for-each loop is compiled into byte code the compiler translates the for-each loop into a while loop using an iterator from the java.util.Collection interface. So for our example, DataDispatcher.fireDataEvent() would effectively translate into something like:
protected void fireDataEvent(DataEvent event) {
    Iterator iter = listeners.iterator();
    while (iter.hasNext()) {
        DataListener listener = iter.next();
        listener.dataChanged(event);
    }
}

Now that we understand what the for-each loop really means we can investigate why calling SomeoneWhoCares.dataChanged() causes a java.util.ConcurrentModificationException. This exception is caused by the dispatcher.removeDataListener(this) line from SomeoneWhoCares.dataChanged(), which removes the DataListener from DataDispatcher's listeners list. The exception isn't thrown though when the listener is removed, it is thrown on the next iteration through the for loop when we access our iterator. This is because java's iterator's are fail-fast iterators, which means that from the time the iterator is created, until the time that it is garbage collected, if there is any modification to the collection that they are iterating over an exception will be thrown during the next attempt to use the iterator.

So, now that we know the problem, how do we fix it? Obviously we can't use iterators (ListIterator's suffer from the same problem), so instead we have two choices. The first choice is to use a collection designed for concurrent modification's, java.util.concurrent.CopyOnWriteArrayList. This collection's iterator's are guaranteed to not throw exceptions during iteration if a modification is made to the list, however this comes at a serious performance penalty if the list is modified frequently. Every time a CopyOnWriteArrayList is modified the entire list is copied into a new array (hence the name CopyOnWrite...) which can be seriously detrimental to the performance of an application if frequent modifications occur. However if this list is set in stone and never modified then using a CopyOnWriteArrayList in place of our ArrayList will solve our problem.

public class DataDispatcher {

    private final List<DataListener> listeners;
...

    public DataDispatcher() {
        this.listeners = new CopyOnWriteArrayList<DataListener>();
        ...
    }
...

    protected void fireDataEvent(DataEvent event) {
        for (DataListener listener : listeners) {
            listener.dataChanged(event);
        }
    }
...
}

However there is a second option for those of us who need to have the ability to modify our collection of listeners frequently. There is a nice method in the list interface which is going to help solve all of our problems, that method is List.get(index i). With this method we can access the elements in our list without the use of an iterator and therefore be able to modify our listeners list during event firing. Lets look at a sample implementation:
protected void fireDataEvent(DataEvent event) {
    for (int i = 0; i < listeners.size(); i++) {
        listeners.get(i).dataChanged(event);
    }
}
This seems like a very reasonable implementation, however we need to think about the case where a listener removes himself during the firing process (just like SomoneWhoCare's does). Lets say we have a two listeners in our list: List: listenerA[0], listenerB[1] We start firing and our index i starts off at 0. Now listenerA decides to remove himself, the list becomes: List: listenerB[0] Now the end of the loop occurs, i gets incremented to two, the size of the list is 1, so the loop exists without calling listenerB! Here is a simple solution to prevent this problem. Iterate over the list backwards! By iterating backwards, we can guarantee that if listeners remove themselves that all other listeners will be called. Below is a revised sample:
protected void fireDataEvent(DataEvent event) {
    for (int i = listeners.size() - 1; i >= 0; i--) {
        listeners.get(i).dataChanged(event);
    }
}
So, if you are planning on implementing your own event dispatching system, please be aware that listeners may modify the listeners collection during iteration and your dispatching system needs to be able to handle that! Here is a summary of tips:
  • In Java, avoid iterators
  • Use CopOnWriteArrayList if you can ensure no (or extremely infrequent) modifications
  • Use List.get(index i) method to help solve your lack of iterators
  • Iterate backwards so listeners can remove themselvs
Finally, the complete and correct way to implement DataDispatcher:
public class DataDispatcher {

    private final List<DataListener> listeners;
    private final List<Data> dataList;

    public DataDispatcher() {
        listeners = new ArrayList<DataListener>();
        dataList = new ArrayList<Data>();
    }

    public void addDataListener(DataListener listener) {
        listeners.add(listener);
    }

    public void removeDataListener(DataListener listener) {
        listeners.remove(listener);
    }

    protected void fireDataEvent(DataEvent event) {
        for (int i = listeners.size() - 1; i >= 0; i--) {
            listeners.get(i).dataChanged(event);
        }
    }
    
    public List<Data> getData() {
        return dataList;
    }

    public void addData(Data data) {
        dataList.add(data);
        fireDataEvent(new DataEvent(this)); //tell the listeners that the Data has changed
    }

    public static void main(String[] args) {
        DataDispatcher dispatcher = new DataDispatcher();
        SomeoneWhoCares someoneWhoCares = new SomeoneWhoCares();
        dispatcher.addDataListener(someoneWhoCares); //someoneWhoCares will not get events when DataDispatcher changes
         
        //create some data
        Data john = new Data("John Smith", 12);
        Data bob = new Data("Bob Thompson", 8);
        Data adam = new Data("Adam Wells", 3);

        //add the data to the Dispatcher which causes DataEvents to be fired
        dispatcher.addData(john);
        dispatcher.addData(bob);
        dispatcher.addData(adam);
    }
}