本文整理汇总了Java中org.deeplearning4j.models.word2vec.wordstore.VocabCache类的典型用法代码示例。如果您正苦于以下问题:Java VocabCache类的具体用法?Java VocabCache怎么用?Java VocabCache使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
VocabCache类属于org.deeplearning4j.models.word2vec.wordstore包,在下文中一共展示了VocabCache类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。
示例1: fromPair
import org.deeplearning4j.models.word2vec.wordstore.VocabCache; //导入依赖的package包/类
/**
* Load word vectors from the given pair
*
* @param pair
* the given pair
* @return a read only word vectors impl based on the given lookup table and vocab
*/
public static Word2Vec fromPair(Pair<InMemoryLookupTable, VocabCache> pair) {
Word2Vec vectors = new Word2Vec();
vectors.setLookupTable(pair.getFirst());
vectors.setVocab(pair.getSecond());
vectors.setModelUtils(new BasicModelUtils());
return vectors;
}
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:15,代码来源:WordVectorSerializer.java
示例2: writeSequenceVectors
import org.deeplearning4j.models.word2vec.wordstore.VocabCache; //导入依赖的package包/类
/**
* This method saves specified SequenceVectors model to target OutputStream
*
* @param vectors SequenceVectors model
* @param factory SequenceElementFactory implementation for your objects
* @param stream Target output stream
* @param <T>
*/
public static <T extends SequenceElement> void writeSequenceVectors(@NonNull SequenceVectors<T> vectors,
@NonNull SequenceElementFactory<T> factory, @NonNull OutputStream stream) throws IOException {
WeightLookupTable<T> lookupTable = vectors.getLookupTable();
VocabCache<T> vocabCache = vectors.getVocab();
PrintWriter writer = new PrintWriter(new BufferedWriter(new OutputStreamWriter(stream, "UTF-8")));
// at first line we save VectorsConfiguration
writer.write(vectors.getConfiguration().toEncodedJson());
// now we have elements one by one
for (int x = 0; x < vocabCache.numWords(); x++) {
T element = vocabCache.elementAtIndex(x);
String json = factory.serialize(element);
INDArray d = Nd4j.create(1);
double[] vector = lookupTable.vector(element.getLabel()).dup().data().asDouble();
ElementPair pair = new ElementPair(json, vector);
writer.println(pair.toEncodedJson());
writer.flush();
}
writer.flush();
writer.close();
}
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:32,代码来源:WordVectorSerializer.java
示例3: readVocabCache
import org.deeplearning4j.models.word2vec.wordstore.VocabCache; //导入依赖的package包/类
/**
* This method reads vocab cache from provided InputStream.
* Please note: it reads only vocab content, so it's suitable mostly for BagOfWords/TF-IDF vectorizers
*
* @param stream
* @return
* @throws IOException
*/
public static VocabCache<VocabWord> readVocabCache(@NonNull InputStream stream) throws IOException {
BufferedReader reader = new BufferedReader(new InputStreamReader(stream, "UTF-8"));
AbstractCache<VocabWord> vocabCache = new AbstractCache.Builder<VocabWord>().build();
VocabWordFactory factory = new VocabWordFactory();
String line = "";
while ((line = reader.readLine()) != null) {
VocabWord word = factory.deserialize(line);
vocabCache.addToken(word);
vocabCache.addWordToIndex(word.getIndex(), word.getLabel());
}
return vocabCache;
}
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:25,代码来源:WordVectorSerializer.java
示例4: configure
import org.deeplearning4j.models.word2vec.wordstore.VocabCache; //导入依赖的package包/类
@Override
public void configure(@NonNull VocabCache<T> vocabCache, @NonNull WeightLookupTable<T> lookupTable,
@NonNull VectorsConfiguration configuration) {
this.vocabCache = vocabCache;
this.lookupTable = lookupTable;
this.configuration = configuration;
cbow.configure(vocabCache, lookupTable, configuration);
this.window = configuration.getWindow();
this.useAdaGrad = configuration.isUseAdaGrad();
this.negative = configuration.getNegative();
this.sampling = configuration.getSampling();
this.syn0 = ((InMemoryLookupTable<T>) lookupTable).getSyn0();
this.syn1 = ((InMemoryLookupTable<T>) lookupTable).getSyn1();
this.syn1Neg = ((InMemoryLookupTable<T>) lookupTable).getSyn1Neg();
this.expTable = ((InMemoryLookupTable<T>) lookupTable).getExpTable();
this.table = ((InMemoryLookupTable<T>) lookupTable).getTable();
}
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:21,代码来源:DM.java
示例5: weights
import org.deeplearning4j.models.word2vec.wordstore.VocabCache; //导入依赖的package包/类
private static INDArray weights(GloveWeightLookupTable glove, Map<String, float[]> data, VocabCache vocab) {
INDArray ret = Nd4j.create(data.size(), glove.layerSize());
for (Map.Entry<String, float[]> entry : data.entrySet()) {
String key = entry.getKey();
INDArray row = Nd4j.create(Nd4j.createBuffer(entry.getValue()));
if (row.length() != glove.layerSize())
continue;
if (vocab.indexOf(key) >= data.size())
continue;
if (vocab.indexOf(key) < 0)
continue;
ret.putRow(vocab.indexOf(key), row);
}
return ret;
}
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:17,代码来源:GloveWeightLookupTable.java
示例6: getSyn0Vector
import org.deeplearning4j.models.word2vec.wordstore.VocabCache; //导入依赖的package包/类
public INDArray getSyn0Vector(Integer wordIndex, VocabCache<VocabWord> vocabCache) {
if (!workers.contains(Thread.currentThread().getId()))
workers.add(Thread.currentThread().getId());
VocabWord word = vocabCache.elementAtIndex(wordIndex);
if (!indexSyn0VecMap.containsKey(word)) {
synchronized (this) {
if (!indexSyn0VecMap.containsKey(word)) {
indexSyn0VecMap.put(word, getRandomSyn0Vec(vectorLength.get(), wordIndex));
}
}
}
return indexSyn0VecMap.get(word);
}
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:17,代码来源:VocabHolder.java
示例7: testGlove
import org.deeplearning4j.models.word2vec.wordstore.VocabCache; //导入依赖的package包/类
@Test
public void testGlove() throws Exception {
Glove glove = new Glove(true, 5, 100);
JavaRDD<String> corpus = sc.textFile(new ClassPathResource("raw_sentences.txt").getFile().getAbsolutePath())
.map(new Function<String, String>() {
@Override
public String call(String s) throws Exception {
return s.toLowerCase();
}
});
Pair<VocabCache<VocabWord>, GloveWeightLookupTable> table = glove.train(corpus);
WordVectors vectors = WordVectorSerializer
.fromPair(new Pair<>((InMemoryLookupTable) table.getSecond(), (VocabCache) table.getFirst()));
Collection<String> words = vectors.wordsNearest("day", 20);
assertTrue(words.contains("week"));
}
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:19,代码来源:GloveTest.java
示例8: buildShallowVocabCache
import org.deeplearning4j.models.word2vec.wordstore.VocabCache; //导入依赖的package包/类
/**
* This method builds shadow vocabulary and huffman tree
*
* @param counter
* @return
*/
protected VocabCache<ShallowSequenceElement> buildShallowVocabCache(Counter<Long> counter) {
// TODO: need simplified cache here, that will operate on Long instead of string labels
VocabCache<ShallowSequenceElement> vocabCache = new AbstractCache<>();
for (Long id : counter.keySet()) {
ShallowSequenceElement shallowElement = new ShallowSequenceElement(counter.getCount(id), id);
vocabCache.addToken(shallowElement);
}
// building huffman tree
Huffman huffman = new Huffman(vocabCache.vocabWords());
huffman.build();
huffman.applyIndexes(vocabCache);
return vocabCache;
}
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:23,代码来源:SparkSequenceVectors.java
示例9: writeWordVectors
import org.deeplearning4j.models.word2vec.wordstore.VocabCache; //导入依赖的package包/类
/**
* This mehod writes word vectors to the given OutputStream.
* Please note: this method doesn't load whole vocab/lookupTable into memory, so it's able to process large vocabularies served over network.
*
* @param lookupTable
* @param stream
* @param <T>
* @throws IOException
*/
public static <T extends SequenceElement> void writeWordVectors(WeightLookupTable<T> lookupTable,
OutputStream stream) throws IOException {
VocabCache<T> vocabCache = lookupTable.getVocabCache();
PrintWriter writer = new PrintWriter(new OutputStreamWriter(stream, "UTF-8"));
// saving header as "NUM_WORDS VECTOR_SIZE NUM_DOCS"
String str = vocabCache.numWords() + " " + lookupTable.layerSize() + " " + vocabCache.totalNumberOfDocs();
log.debug("Saving header: {}", str);
writer.println(str);
// saving vocab content
for (int x = 0; x < vocabCache.numWords(); x++) {
T element = vocabCache.elementAtIndex(x);
StringBuilder builder = new StringBuilder();
builder.append(encodeB64(element.getLabel())).append(" ");
INDArray vec = lookupTable.vector(element.getLabel());
for (int i = 0; i < vec.length(); i++) {
builder.append(vec.getDouble(i));
if (i < vec.length() - 1)
builder.append(" ");
}
writer.println(builder.toString());
}
writer.flush();
writer.close();
}
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:38,代码来源:WordVectorSerializer.java
示例10: writeVocabCache
import org.deeplearning4j.models.word2vec.wordstore.VocabCache; //导入依赖的package包/类
/**
* This method saves vocab cache to provided OutputStream.
* Please note: it saves only vocab content, so it's suitable mostly for BagOfWords/TF-IDF vectorizers
*
* @param vocabCache
* @param stream
* @throws UnsupportedEncodingException
*/
public static void writeVocabCache(@NonNull VocabCache<VocabWord> vocabCache, @NonNull OutputStream stream)
throws IOException {
PrintWriter writer = new PrintWriter(new BufferedWriter(new OutputStreamWriter(stream, "UTF-8")));
for (int x = 0; x < vocabCache.numWords(); x++) {
VocabWord word = vocabCache.elementAtIndex(x);
writer.println(word.toJSON());
}
writer.flush();
writer.close();
}
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:21,代码来源:WordVectorSerializer.java
示例11: configure
import org.deeplearning4j.models.word2vec.wordstore.VocabCache; //导入依赖的package包/类
@Override
public void configure(@NonNull VocabCache<T> vocabCache, @NonNull WeightLookupTable<T> lookupTable,
@NonNull VectorsConfiguration configuration) {
this.vocabCache = vocabCache;
this.lookupTable = lookupTable;
this.configuration = configuration;
this.window = configuration.getWindow();
this.useAdaGrad = configuration.isUseAdaGrad();
this.negative = configuration.getNegative();
this.sampling = configuration.getSampling();
if (configuration.getNegative() > 0) {
if (((InMemoryLookupTable<T>) lookupTable).getSyn1Neg() == null) {
logger.info("Initializing syn1Neg...");
((InMemoryLookupTable<T>) lookupTable).setUseHS(configuration.isUseHierarchicSoftmax());
((InMemoryLookupTable<T>) lookupTable).setNegative(configuration.getNegative());
((InMemoryLookupTable<T>) lookupTable).resetWeights(false);
}
}
this.syn0 = new DeviceLocalNDArray(((InMemoryLookupTable<T>) lookupTable).getSyn0());
this.syn1 = new DeviceLocalNDArray(((InMemoryLookupTable<T>) lookupTable).getSyn1());
this.syn1Neg = new DeviceLocalNDArray(((InMemoryLookupTable<T>) lookupTable).getSyn1Neg());
this.expTable = new DeviceLocalNDArray(Nd4j.create(((InMemoryLookupTable<T>) lookupTable).getExpTable()));
this.table = new DeviceLocalNDArray(((InMemoryLookupTable<T>) lookupTable).getTable());
this.variableWindows = configuration.getVariableWindows();
}
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:30,代码来源:CBOW.java
示例12: configure
import org.deeplearning4j.models.word2vec.wordstore.VocabCache; //导入依赖的package包/类
/**
* SkipGram initialization over given vocabulary and WeightLookupTable
*
* @param vocabCache
* @param lookupTable
* @param configuration
*/
@Override
public void configure(@NonNull VocabCache<T> vocabCache, @NonNull WeightLookupTable<T> lookupTable,
@NonNull VectorsConfiguration configuration) {
this.vocabCache = vocabCache;
this.lookupTable = lookupTable;
this.configuration = configuration;
if (configuration.getNegative() > 0) {
if (((InMemoryLookupTable<T>) lookupTable).getSyn1Neg() == null) {
log.info("Initializing syn1Neg...");
((InMemoryLookupTable<T>) lookupTable).setUseHS(configuration.isUseHierarchicSoftmax());
((InMemoryLookupTable<T>) lookupTable).setNegative(configuration.getNegative());
((InMemoryLookupTable<T>) lookupTable).resetWeights(false);
}
}
this.expTable = new DeviceLocalNDArray(Nd4j.create(((InMemoryLookupTable<T>) lookupTable).getExpTable()));
this.syn0 = new DeviceLocalNDArray(((InMemoryLookupTable<T>) lookupTable).getSyn0());
this.syn1 = new DeviceLocalNDArray(((InMemoryLookupTable<T>) lookupTable).getSyn1());
this.syn1Neg = new DeviceLocalNDArray(((InMemoryLookupTable<T>) lookupTable).getSyn1Neg());
this.table = new DeviceLocalNDArray(((InMemoryLookupTable<T>) lookupTable).getTable());
this.window = configuration.getWindow();
this.useAdaGrad = configuration.isUseAdaGrad();
this.negative = configuration.getNegative();
this.sampling = configuration.getSampling();
this.variableWindows = configuration.getVariableWindows();
this.vectorLength = configuration.getLayersSize();
}
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:40,代码来源:SkipGram.java
示例13: configure
import org.deeplearning4j.models.word2vec.wordstore.VocabCache; //导入依赖的package包/类
@Override
public void configure(@NonNull VocabCache<T> vocabCache, @NonNull WeightLookupTable<T> lookupTable,
@NonNull VectorsConfiguration configuration) {
this.vocabCache = vocabCache;
this.lookupTable = lookupTable;
this.window = configuration.getWindow();
this.useAdaGrad = configuration.isUseAdaGrad();
this.negative = configuration.getNegative();
this.configuration = configuration;
skipGram.configure(vocabCache, lookupTable, configuration);
}
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:14,代码来源:DBOW.java
示例14: InMemoryLookupTable
import org.deeplearning4j.models.word2vec.wordstore.VocabCache; //导入依赖的package包/类
public InMemoryLookupTable(VocabCache<T> vocab, int vectorLength, boolean useAdaGrad, double lr, Random gen,
double negative) {
this.vocab = vocab;
this.vectorLength = vectorLength;
this.useAdaGrad = useAdaGrad;
this.lr.set(lr);
this.rng = gen;
this.negative = negative;
initExpTable();
if (useAdaGrad) {
initAdaGrad();
}
}
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:15,代码来源:InMemoryLookupTable.java
示例15: ASCIICoOccurrenceReader
import org.deeplearning4j.models.word2vec.wordstore.VocabCache; //导入依赖的package包/类
public ASCIICoOccurrenceReader(@NonNull File file, @NonNull VocabCache<T> vocabCache) {
this.vocabCache = vocabCache;
this.file = file;
try {
iterator = new PrefetchingSentenceIterator.Builder(new BasicLineIterator(file)).build();
} catch (Exception e) {
throw new RuntimeException(e);
}
}
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:10,代码来源:ASCIICoOccurrenceReader.java
示例16: BinaryCoOccurrenceReader
import org.deeplearning4j.models.word2vec.wordstore.VocabCache; //导入依赖的package包/类
public BinaryCoOccurrenceReader(@NonNull File file, @NonNull VocabCache<T> vocabCache, CountMap<T> map) {
this.vocabCache = vocabCache;
this.file = file;
this.countMap = map;
buffer = new ArrayBlockingQueue<>(200000);
try {
inputStream = new BufferedInputStream(new FileInputStream(this.file), 100 * 1024 * 1024);
//inputStream = new BufferedInputStream(new FileInputStream(file), 1024 * 1024);
readerThread = new StreamReaderThread(inputStream);
readerThread.start();
} catch (Exception e) {
throw new RuntimeException(e);
}
}
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:16,代码来源:BinaryCoOccurrenceReader.java
示例17: applyIndexes
import org.deeplearning4j.models.word2vec.wordstore.VocabCache; //导入依赖的package包/类
/**
* This method updates VocabCache and all it's elements with Huffman indexes
* Please note: it should be the same VocabCache as was used for Huffman tree initialization
*
* @param cache VocabCache to be updated.
*/
public void applyIndexes(VocabCache<? extends SequenceElement> cache) {
if (!buildTrigger)
build();
for (int a = 0; a < words.size(); a++) {
if (words.get(a).getLabel() != null) {
cache.addWordToIndex(a, words.get(a).getLabel());
} else {
cache.addWordToIndex(a, words.get(a).getStorageId());
}
words.get(a).setIndex(a);
}
}
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:21,代码来源:Huffman.java
示例18: importVocabulary
import org.deeplearning4j.models.word2vec.wordstore.VocabCache; //导入依赖的package包/类
/**
* This method imports all elements from VocabCache passed as argument
* If element already exists,
*
* @param vocabCache
*/
public void importVocabulary(@NonNull VocabCache<T> vocabCache) {
for (T element : vocabCache.vocabWords()) {
this.addToken(element);
}
//logger.info("Current state: {}; Adding value: {}", this.documentsCounter.get(), vocabCache.totalNumberOfDocs());
this.documentsCounter.addAndGet(vocabCache.totalNumberOfDocs());
}
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:14,代码来源:AbstractCache.java
示例19: importVocabulary
import org.deeplearning4j.models.word2vec.wordstore.VocabCache; //导入依赖的package包/类
@Override
public void importVocabulary(VocabCache<VocabWord> vocabCache) {
for (VocabWord word : vocabCache.vocabWords()) {
if (vocabs.containsKey(word.getLabel())) {
wordFrequencies.incrementCount(word.getLabel(), (float) word.getElementFrequency());
} else {
tokens.put(word.getLabel(), word);
vocabs.put(word.getLabel(), word);
wordFrequencies.incrementCount(word.getLabel(), (float) word.getElementFrequency());
}
totalWordOccurrences.addAndGet((long) word.getElementFrequency());
}
}
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:14,代码来源:InMemoryLookupCache.java
示例20: testStorePut
import org.deeplearning4j.models.word2vec.wordstore.VocabCache; //导入依赖的package包/类
@Test
public void testStorePut() {
VocabCache<VocabWord> cache = new InMemoryLookupCache();
assertFalse(cache.containsWord("hello"));
cache.addWordToIndex(0, "hello");
assertTrue(cache.containsWord("hello"));
assertEquals(1, cache.numWords());
assertEquals("hello", cache.wordAtIndex(0));
}
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:10,代码来源:InMemoryVocabStoreTests.java
注:本文中的org.deeplearning4j.models.word2vec.wordstore.VocabCache类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论