Welcome to OGeek Q&A Community for programmer and developer-Open, Learning and Share
Welcome To Ask or Share your Answers For Others

Categories

0 votes
208 views
in Technique[技术] by (71.8m points)

c# - CuPurchase Recommendation engine returns NaN

My dataset contains about 100 k entries with 800 products. When I try to predict possible matches, it returns NaN for the most popular products sold ( so they should have the most entries).

I'm also transforming the ProductId/CoPurchaseProductId ( Guid) to string to use them.

Can anyone point out if I'm doing something wrong or if my dataset would be too small.

var mlContext = new MLContext();

IDataView traindata = mlContext.Data.LoadFromEnumerable(data: productEntries);

// Your data is already encoded so all you need to do is specify options for MatrixFactorizationTrainer with a few extra hyper parameters
// LossFunction, Alpha, Lambda and a few others like K and C as shown below and call the trainer. 
MatrixFactorizationTrainer.Options options = new MatrixFactorizationTrainer.Options();
options.MatrixColumnIndexColumnName = nameof(ProductEntry.ProductIdEncoded);
options.MatrixRowIndexColumnName = nameof(ProductEntry.CoPurchaseProductIdEncoded);
options.LabelColumnName = nameof(ProductEntry.Label);
options.LossFunction =  MatrixFactorizationTrainer.LossFunctionType.SquareLossOneClass;
options.Alpha = 0.01;
options.Lambda = 0.025;

// For better results use the following parameters
options.ApproximationRank = 100;
options.C = 0.00001;

var dataProcessLine = mlContext.Transforms.Conversion.MapValueToKey(outputColumnName: nameof(ProductEntry.ProductIdEncoded), inputColumnName: nameof(ProductEntry.ProductId))
.Append(mlContext.Transforms.Conversion.MapValueToKey(outputColumnName: nameof(ProductEntry.CoPurchaseProductIdEncoded), inputColumnName: nameof(ProductEntry.CoPurchaseProductId)));


// Step 4: Call the MatrixFactorization trainer by passing options.
var est = dataProcessLine.Append( mlContext.Recommendation().Trainers
.MatrixFactorization(options: options)) ;

// STEP 5: Train the model fitting to the DataSet
ITransformer model = est.Fit(input: traindata);


var predictionEngine = mlContext.Model.CreatePredictionEngine<ProductEntry, CoPurchasePrediction>(transformer: model);

//Manual test of the prediction
var allProducts = Products.Where(p => p.ActiveState > 0).ToList();
foreach (var popularProduct in mostPopularProducts.Take(5))
{
var product = allProducts.Where(p  => p.Id == popularProduct.Id ).FirstOrDefault();

var label = SplitByLanguageHelper.Split(product.Title);


var top5 = allProducts.Where(p => p.Id != product.Id)
    .Select(p => Prediction.GetPrediction(predictionEngine, product.Id, p.Id))
    .OrderByDescending(p => p.Score)
    .Take(5).ToList();
var result = top5.Select(prediction => new
{
    Score = prediction.Score,
    OrigProductIdLabel= SplitByLanguageHelper.Split(allProducts.Where(dl => dl.Id == prediction.ProductId).FirstOrDefault().Title),
    CoProductIdLabel = SplitByLanguageHelper.Split(allProducts.Where(dl => dl.Id == prediction.CoPurchaseProductId).FirstOrDefault().Title)

}).ToList();//all return a NaN score :(

result.Dump($"Predictions from {SplitByLanguageHelper.Split(product.Title)}");

}

public static class Prediction
{
    public static ProductCoPurchasePrediction GetPrediction(PredictionEngine<ProductEntry, CoPurchasePrediction> predictionEngine, Guid productId, Guid coPurchaseProductId)
{
        CoPurchasePrediction prediction = predictionEngine.Predict(
    new ProductEntry { ProductId = productId.ToString(), CoPurchaseProductId = coPurchaseProductId.ToString() });

        return new ProductCoPurchasePrediction
        {
            ProductId = productId,
            CoPurchaseProductId = coPurchaseProductId,
            Score = prediction.Score
    };
}
}


public class CoPurchasePrediction
{
/// <summary>
/// Gets or sets the score.
/// </summary>
/// <value>The score.</value>
public float Score { get; set; }
}

public class ProductEntry
{
/// <summary>
/// Gets or sets the co purchase product identifier.
/// </summary>
/// <value>The co purchase product identifier.</value>
//[KeyType(262111)]
//[NoColumn] 
public string CoPurchaseProductId { get; set; }

[KeyType(262111)]
public UInt32 CoPurchaseProductIdEncoded { get; set; }

public float Label { get; set; }

public string ProductId { get; set; }

[KeyType(262111)]
public UInt32 ProductIdEncoded { get; set; }

public override string ToString()
{
    return $"Prod: {ProductId}, CoPurchase: {CoPurchaseProductId}"; 
}
}

public class ProductCoPurchasePrediction
{

public Guid CoPurchaseProductId { get; set; }


public Guid ProductId { get; set; }

public float Score { get; set; }
}

public static class SplitByLanguageHelper
{
public static string Split(string text)
{
    if (string.IsNullOrEmpty(text)) return "";

    int firstChar = text.IndexOf("<NL>");
    int lastChar = text.IndexOf("</NL>");

    return text.Substring(firstChar + 4, lastChar - (firstChar + 4));
}

}

与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…
Welcome To Ask or Share your Answers For Others

1 Reply

0 votes
by (71.8m points)
等待大神答复

与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…
OGeek|极客中国-欢迎来到极客的世界,一个免费开放的程序员编程交流平台!开放,进步,分享!让技术改变生活,让极客改变未来! Welcome to OGeek Q&A Community for programmer and developer-Open, Learning and Share
Click Here to Ask a Question

...