#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Analysis module for estimating scores for performing statistical tests.
Run on the command line, e.g.: ::
$ python analysis.py
.. note:: This module has dependencies not required by the CAQE web application. To install these dependencies, run ``pip install -r analysis_requirements.txt``.
"""
import argparse
import copy
import json
import pandas as pd
import seaborn as sns
from caqe.models import Condition
from caqe import app
[docs]def get_ratings_data(output_file=None):
"""
Get the ratings data from the database as a DataFrame
Parameters
----------
output_file : str
A filepath to an output CSV file (default is None)
Returns
-------
ratings : pandas.DataFrame
All of the rating data from the dictionary.
"""
conditions = Condition.query.all()
ratings = []
for c in conditions:
for t in c.trials.all():
t_data = json.loads(t.data)
trial_ratings = t_data['ratings']
del t_data['ratings']
row = {'test_id': c.test_id,
'trial_id': t.id,
'condition_id': c.id,
'participant_id': t.participant_id,
'participant_crowd_worker_id': t.participant.crowd_worker_id,
'participant_platform': t.participant.platform,
'participant_passed_hearing_test': t.participant_passed_hearing_test,
'participant_hearing_test_attempts': t.participant.hearing_test_attempts,
'participant_hearing_test_last_attempt': t.participant.hearing_test_last_attempt,
'participant_pre_test_survey': t.participant.pre_test_survey,
'participant_post_test_survey': t.participant.post_test_survey,
'participant_hearing_response_estimation': t.participant.hearing_response_estimation,
'data': json.dumps(t_data)}
for k, v in trial_ratings.items():
r = copy.deepcopy(row)
r['stimulus'] = k
r['rating'] = float(v)
ratings.append(r)
ratings = pd.DataFrame.from_records(ratings)
if output_file is not None:
ratings.to_csv(output_file)
return ratings
[docs]def plot_mushra_boxplots(data, size=5, output_file=None):
"""
Plot the MUSHRA ratings as a grid of boxplots. If `output_file` is defined, then save the plot to file.
Parameters
----------
data: pandas.DataFrame
The ratings data obtained from `get_ratings_data`.
size : float
Height of each boxplot in inches. (default is 5)
output_file: str
Path to the output file location. (default is None)
Returns
-------
g : seaborn.axisgrid.FacetGrid
"""
g = sns.factorplot(x='stimulus', y='rating', data=data, row='condition_id', kind='box', notch=True, size=size)
g.set(ylim=(app.config['MIN_RATING_VALUE'], app.config['MAX_RATING_VALUE']))
if output_file is not None:
g.savefig(output_file)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Analyze and plot CAQE results.')
sp = parser.add_subparsers(dest='command')
ch = sp.add_parser('plot-mushra-boxplots', help='Plot the MUSHRA ratings as a grid of boxplots.')
ch.add_argument('output_file', type=str, help='Path to output file location')
ch.add_argument('--size', type=float, help='Height of each boxplot in inches.', default=5.)
ch = sp.add_parser('save-data-to-csv', help='Save ratings data to a csv file.')
ch.add_argument('output_file', type=str, help='Path to output file location')
args = parser.parse_args()
if args.command == 'plot-mushra-boxplots':
data = get_ratings_data()
plot_mushra_boxplots(data, size=args.size, output_file=args.output_file)
elif args.command == 'save-data-to-csv':
get_ratings_data(args.output_file)