Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Python 3 compatibility #13

Open
wants to merge 13 commits into
base: master
Choose a base branch
from
3 changes: 3 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ language: python

python:
- "2.7"
- "3.4"

sudo: false

Expand All @@ -24,6 +25,8 @@ before_install:
- pip install cython
- pip install numpy
- travis_wait pip install scipy
# setup.py import mrec and install
- pip install six>=1.9

# This is a library, not an application.
# So we do not have a requirements.txt
Expand Down
10 changes: 6 additions & 4 deletions doc/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
# All configuration values have a default; values that are commented out
# serve to show the default.

from __future__ import print_function

import sys, os

# If extensions (or modules to document with autodoc) are in another directory,
Expand Down Expand Up @@ -51,10 +53,10 @@
try:
release = pkg_resources.get_distribution('mrec').version
except pkg_resources.DistributionNotFound:
print 'To build the documentation, The distribution information of mrec'
print 'has to be available. Either install the package into your'
print 'development environment or run "python setup.py develop" to setup'
print 'the metadata.'
print('To build the documentation, The distribution information of mrec')
print('has to be available. Either install the package into your')
print('development environment or run "python setup.py develop" to setup')
print('the metadata.')
sys.exit(1)
del pkg_resources
version = '.'.join(release.split('.')[:2])
Expand Down
2 changes: 1 addition & 1 deletion doc/hybrid.rst
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ The resulting features are simply `tf-idf counts <http://en.wikipedia.org/wiki/T

and inspect the top few word counts for the first few items::

>>> for i in xrange(3):
>>> for i in range(3):
... for tfidf,word in sorted(zip(features[i].data,features[i].indices),reverse=True)[:3]:
... print '{0}\t{1}\t{2:.3f}'.format(i,word,tfidf)
...
Expand Down
18 changes: 11 additions & 7 deletions mrec/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
from itertools import izip
from __future__ import absolute_import, print_function
try:
from itertools import izip as zip
except ImportError:
pass
import numpy as np
from scipy.sparse import coo_matrix, csr_matrix
from scipy.io import mmread, mmwrite
Expand All @@ -7,8 +11,8 @@
except ImportError:
import pickle

from sparse import fast_sparse_matrix, loadtxt, loadz, savez
from base_recommender import BaseRecommender
from .sparse import fast_sparse_matrix, loadtxt, loadz, savez
from .base_recommender import BaseRecommender

__version__ = '0.3.1'

Expand Down Expand Up @@ -89,13 +93,13 @@ def save_sparse_matrix(data,fmt,filepath):
if fmt == 'tsv':
m = data.tocoo()
with open(filepath,'w') as out:
for u,i,v in izip(m.row,m.col,m.data):
print >>out,'{0}\t{1}\t{2}'.format(u+1,i+1,v)
for u,i,v in zip(m.row,m.col,m.data):
print('{0}\t{1}\t{2}'.format(u+1,i+1,v), file=out)
elif fmt == 'csv':
m = data.tocoo()
with open(filepath,'w') as out:
for u,i,v in izip(m.row,m.col,m.data):
print >>out,'{0},{1},{2}'.format(u+1,i+1,v)
for u,i,v in zip(m.row,m.col,m.data):
print('{0},{1},{2}'.format(u+1,i+1,v), file=out)
elif fmt == 'mm':
mmwrite(filepath,data)
elif fmt == 'npz':
Expand Down
12 changes: 7 additions & 5 deletions mrec/base_recommender.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
from __future__ import print_function
try:
import cPickle as pickle
except ImportError:
import pickle
from six.moves import xrange
import numpy as np
from scipy.sparse import csr_matrix

Expand Down Expand Up @@ -86,7 +88,7 @@ def save(self,filepath):
if archive:
np.savez(filepath,**archive)
else:
pickle.dump(self,open(filepath,'w'))
pickle.dump(self,open(filepath,'wb'))

def _create_archive(self):
"""
Expand Down Expand Up @@ -117,7 +119,7 @@ def load(filepath):
if isinstance(r,BaseRecommender):
model = r
else:
model = np.loads(str(r['model']))
model = np.loads(r['model'])
model._load_archive(r) # restore any fields serialized separately
return model

Expand Down Expand Up @@ -148,7 +150,7 @@ def read_recommender_description(filepath):
if isinstance(r,BaseRecommender):
model = r
else:
model = np.loads(str(r['model']))
model = np.loads(r['model'])
return str(model)

def __str__(self):
Expand Down Expand Up @@ -192,10 +194,10 @@ def batch_recommend_items(self,
recs = []
for u in xrange(self.num_users):
if show_progress and u%1000 == 0:
print u,'..',
print(u, '..',)
recs.append(self.recommend_items(dataset,u,max_items,return_scores))
if show_progress:
print
print()
return recs

def range_recommend_items(self,
Expand Down
19 changes: 10 additions & 9 deletions mrec/evaluation/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
* with hit rate, following e.g. Karypis lab SLIM and FISM papers
* with prec@k and MRR
"""

from __future__ import print_function
from six.moves import xrange
import numpy as np
from scipy import stats
from collections import defaultdict
Expand Down Expand Up @@ -62,8 +63,8 @@ def run_evaluation(models,retrain,get_split,num_runs,evaluation_func):
for i,model in enumerate(models):
retrain(model,train)
run_metrics = evaluation_func(model,train,users,test)
for m,val in run_metrics.iteritems():
print m,val
for m,val in run_metrics.items():
print(m, val)
metrics[i][m].append(val)
return metrics

Expand All @@ -83,10 +84,10 @@ def sort_metrics_by_name(names):
prefix2val[name].append(val)
else:
prefix2val[name] = []
for name,vals in prefix2val.iteritems():
for name,vals in prefix2val.items():
prefix2val[name] = sorted(vals)
ret = []
for name,vals in sorted(prefix2val.iteritems()):
for name,vals in sorted(prefix2val.items()):
if vals:
for val in vals:
ret.append('{0}@{1}'.format(name,val))
Expand All @@ -99,15 +100,15 @@ def print_report(models,metrics):
Call this to print out the metrics returned by run_evaluation().
"""
for model,results in zip(models,metrics):
print model
print(model)
if hasattr(model,'similarity_matrix'):
nnz = model.similarity_matrix.nnz
num_items = model.similarity_matrix.shape[0]
density = float(model.similarity_matrix.nnz)/num_items**2
print 'similarity matrix nnz = {0} (density {1:.3f})'.format(nnz,density)
print('similarity matrix nnz = {0} (density {1:.3f})'.format(nnz,density))
for m in sort_metrics_by_name(results.keys()):
vals = results[m]
print '{0}{1:.4f} +/- {2:.4f}'.format(m.ljust(15),np.mean(vals),stats.sem(vals,ddof=0))
print('{0}{1:.4f} +/- {2:.4f}'.format(m.ljust(15),np.mean(vals),stats.sem(vals,ddof=0)))

def evaluate(model,train,users,get_known_items,compute_metrics):
avg_metrics = defaultdict(float)
Expand All @@ -116,7 +117,7 @@ def evaluate(model,train,users,get_known_items,compute_metrics):
recommended = [r for r,_ in model.recommend_items(train,u,max_items=20)]
metrics = compute_metrics(recommended,get_known_items(u))
if metrics:
for m,val in metrics.iteritems():
for m,val in metrics.items():
avg_metrics[m] += val
count += 1
for m in avg_metrics:
Expand Down
1 change: 1 addition & 0 deletions mrec/evaluation/tests/test_metrics.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from six.moves import xrange
from sklearn.utils.testing import assert_equal
from sklearn.utils.testing import assert_raises

Expand Down
5 changes: 3 additions & 2 deletions mrec/examples/convert.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Convert sparse matrix from one file format to another.
"""
from __future__ import print_function

import os
import subprocess
Expand All @@ -18,8 +19,8 @@ def tsv2mtx(infile,outfile):
nnz += 1
headerfile = outfile+'.header'
with open(headerfile,'w') as header:
print >>header,'%%MatrixMarket matrix coordinate real general'
print >>header,'{0} {1} {2}'.format(num_users,num_items,nnz)
print('%%MatrixMarket matrix coordinate real general', file=header)
print('{0} {1} {2}'.format(num_users,num_items,nnz), file=header)
subprocess.check_call(['cat',headerfile,infile],stdout=open(outfile,'w'))
subprocess.check_call(['rm',headerfile])

Expand Down
3 changes: 2 additions & 1 deletion mrec/examples/predict.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from shutil import rmtree
import logging
from collections import defaultdict
from six.moves import xrange

from mrec import load_sparse_matrix, read_recommender_description, load_recommender
from mrec.parallel import predict
Expand Down Expand Up @@ -82,7 +83,7 @@ def process(view,opts,modelfile,trainfile,testfile,featurefile,outdir,evaluator)
tot_count = 0
for results in processed:
for cum_metrics,count in results:
for m,val in cum_metrics.iteritems():
for m,val in cum_metrics.items():
avg_metrics[m] += val
tot_count += count
for m in avg_metrics:
Expand Down
5 changes: 4 additions & 1 deletion mrec/examples/prepare.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
from __future__ import print_function
from six.moves import xrange

class Processor(object):

def __init__(self,splitter,parser,min_items_per_user,preprocess=None):
Expand All @@ -8,7 +11,7 @@ def __init__(self,splitter,parser,min_items_per_user,preprocess=None):

def output(self,user,vals,outfile):
for v,c in vals:
print >>outfile,'{0}\t{1}\t{2}'.format(user,v,c)
print('{0}\t{1}\t{2}'.format(user,v,c), file=outfile)

def handle(self,user,vals):
if len(vals) >= self.min_items_per_user:
Expand Down
12 changes: 7 additions & 5 deletions mrec/examples/tune_slim.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@
Try to find a sensible range for regularization
constants for SLIM by looking at model sparsity.
"""
from __future__ import print_function

from six.moves import xrange
import random
from math import log10
import logging
Expand Down Expand Up @@ -91,12 +93,12 @@ def main():
if candidates:
best = min(candidates,key=itemgetter(1))

print 'best parameter setting: {0}'.format(best[0])
print 'mean # positive similarity weights per item = {0:.3}'.format(best[1])
print 'proportion of items with fewer than {0} positive similarity weights = {1:.3}'.format(opts.min_sims,best[2])
print 'mean # negative similarity weights per item = {0:.3}'.format(best[3])
print('best parameter setting: {0}'.format(best[0]))
print('mean # positive similarity weights per item = {0:.3}'.format(best[1]))
print('proportion of items with fewer than {0} positive similarity weights = {1:.3}'.format(opts.min_sims,best[2]))
print('mean # negative similarity weights per item = {0:.3}'.format(best[3]))
else:
print 'no parameter settings satisfied the conditions, try increasing --min_sims, --max_sims or --max_sparse'
print('no parameter settings satisfied the conditions, try increasing --min_sims, --max_sims or --max_sparse')

if __name__ == '__main__':
main()
28 changes: 15 additions & 13 deletions mrec/item_similarity/knn.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,12 @@
Brute-force k-nearest neighbour recommenders
intended to provide evaluation baselines.
"""
from __future__ import absolute_import, print_function

from six.moves import xrange
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from recommender import ItemSimilarityRecommender
from mrec.item_similarity.recommender import ItemSimilarityRecommender

class KNNRecommender(ItemSimilarityRecommender):
"""
Expand Down Expand Up @@ -79,12 +81,12 @@ def __str__(self):
# use knn models like this:

import random
import StringIO
from io import BytesIO
from mrec import load_fast_sparse_matrix

random.seed(0)

print 'loading test data...'
print('loading test data...')
data = """\
%%MatrixMarket matrix coordinate real general
3 5 9
Expand All @@ -98,8 +100,8 @@ def __str__(self):
3 3 1
3 4 1
"""
print data
dataset = load_fast_sparse_matrix('mm',StringIO.StringIO(data))
print(data)
dataset = load_fast_sparse_matrix('mm', BytesIO(data.encode('ascii')))
num_users,num_items = dataset.shape

model = CosineKNNRecommender(k=2)
Expand All @@ -108,32 +110,32 @@ def __str__(self):

def output(i,j,val):
# convert back to 1-indexed
print '{0}\t{1}\t{2:.3f}'.format(i+1,j+1,val)
print('{0}\t{1}\t{2:.3f}'.format(i+1,j+1,val))

print 'computing some item similarities...'
print 'item\tsim\tweight'
print('computing some item similarities...')
print('item\tsim\tweight')
# if we want we can compute these individually without calling fit()
for i in random.sample(xrange(num_items),num_samples):
for j,weight in model.get_similar_items(i,max_similar_items=2,dataset=dataset):
output(i,j,weight)

print 'learning entire similarity matrix...'
print('learning entire similarity matrix...')
# more usually we just call train() on the entire dataset
model = CosineKNNRecommender(k=2)
model.fit(dataset)
print 'making some recommendations...'
print 'user\trec\tscore'
print('making some recommendations...')
print('user\trec\tscore')
for u in random.sample(xrange(num_users),num_samples):
for i,score in model.recommend_items(dataset.X,u,max_items=10):
output(u,i,score)

print 'making batch recommendations...'
print('making batch recommendations...')
recs = model.batch_recommend_items(dataset.X)
for u in xrange(num_users):
for i,score in recs[u]:
output(u,i,score)

print 'making range recommendations...'
print('making range recommendations...')
for start,end in [(0,2),(2,3)]:
recs = model.range_recommend_items(dataset.X,start,end)
for u in xrange(start,end):
Expand Down
4 changes: 2 additions & 2 deletions mrec/item_similarity/precomputed.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
"""
Make recommendations from a precomputed item similarity matrix.
"""

from recommender import ItemSimilarityRecommender
from __future__ import absolute_import
from .recommender import ItemSimilarityRecommender

class PrecomputedItemSimilarityRecommender(ItemSimilarityRecommender):
"""
Expand Down
Loading