Source code for analysis

#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Analysis module for estimating scores for performing statistical tests.

Run on the command line, e.g.: ::

    $ python analysis.py

.. note:: This module has dependencies not required by the CAQE web application. To install these dependencies, run ``pip install -r analysis_requirements.txt``.
"""
import argparse
import copy
import json

import pandas as pd
import seaborn as sns

from caqe.models import Condition
from caqe import app


[docs]def get_ratings_data(output_file=None):
    """
    Get the ratings data from the database as a DataFrame

    Parameters
    ----------
    output_file : str
        A filepath to an output CSV file (default is None)

    Returns
    -------
    ratings : pandas.DataFrame
        All of the rating data from the dictionary.

    """
    conditions = Condition.query.all()
    ratings = []
    for c in conditions:
        for t in c.trials.all():
            t_data = json.loads(t.data)
            trial_ratings = t_data['ratings']
            del t_data['ratings']
            row = {'test_id': c.test_id,
                   'trial_id': t.id,
                   'condition_id': c.id,
                   'participant_id': t.participant_id,
                   'participant_crowd_worker_id': t.participant.crowd_worker_id,
                   'participant_platform': t.participant.platform,
                   'participant_passed_hearing_test': t.participant_passed_hearing_test,
                   'participant_hearing_test_attempts': t.participant.hearing_test_attempts,
                   'participant_hearing_test_last_attempt': t.participant.hearing_test_last_attempt,
                   'participant_pre_test_survey': t.participant.pre_test_survey,
                   'participant_post_test_survey': t.participant.post_test_survey,
                   'participant_hearing_response_estimation': t.participant.hearing_response_estimation,
                   'data': json.dumps(t_data)}
            for k, v in trial_ratings.items():
                r = copy.deepcopy(row)
                r['stimulus'] = k
                r['rating'] = float(v)
                ratings.append(r)

    ratings = pd.DataFrame.from_records(ratings)

    if output_file is not None:
        ratings.to_csv(output_file)

    return ratings


[docs]def plot_mushra_boxplots(data, size=5, output_file=None):
    """
    Plot the MUSHRA ratings as a grid of boxplots. If `output_file` is defined, then save the plot to file.

    Parameters
    ----------
    data: pandas.DataFrame
        The ratings data obtained from `get_ratings_data`.

    size : float
        Height of each boxplot in inches. (default is 5)

    output_file: str
        Path to the output file location. (default is None)

    Returns
    -------
    g : seaborn.axisgrid.FacetGrid
    """
    g = sns.factorplot(x='stimulus', y='rating', data=data, row='condition_id', kind='box', notch=True, size=size)
    g.set(ylim=(app.config['MIN_RATING_VALUE'], app.config['MAX_RATING_VALUE']))

    if output_file is not None:
        g.savefig(output_file)


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Analyze and plot CAQE results.')
    sp = parser.add_subparsers(dest='command')

    ch = sp.add_parser('plot-mushra-boxplots', help='Plot the MUSHRA ratings as a grid of boxplots.')
    ch.add_argument('output_file', type=str, help='Path to output file location')
    ch.add_argument('--size', type=float, help='Height of each boxplot in inches.', default=5.)

    ch = sp.add_parser('save-data-to-csv', help='Save ratings data to a csv file.')
    ch.add_argument('output_file', type=str, help='Path to output file location')

    args = parser.parse_args()

    if args.command == 'plot-mushra-boxplots':
        data = get_ratings_data()
        plot_mushra_boxplots(data, size=args.size, output_file=args.output_file)
    elif args.command == 'save-data-to-csv':
        get_ratings_data(args.output_file)