fix: try to sync from official replicate (#1076)

This commit is contained in:
fengmk2
2016-11-20 01:27:44 +08:00
committed by GitHub
parent 6c3140d422
commit bf5f02908f
11 changed files with 111 additions and 79 deletions

View File

@@ -163,6 +163,7 @@ var config = {
// but sometimes will request it for some package infomations
// please don't change it if not necessary
officialNpmRegistry: 'https://registry.npmjs.com',
officialNpmReplicate: 'https://replicate.npmjs.com',
// sync source, upstream registry
// If you want to directly sync from official npm's registry

View File

@@ -1,19 +1,5 @@
/**!
* cnpmjs.org - controllers/registry/package/list_since.js
*
* Copyright(c) fengmk2 and other contributors.
* MIT Licensed
*
* Authors:
* fengmk2 <fengmk2@gmail.com> (http://fengmk2.github.com)
*/
'use strict';
/**
* Module dependencies.
*/
var packageService = require('../../../services/package');
var A_WEEK_MS = 3600000 * 24 * 7;
@@ -21,7 +7,7 @@ var A_WEEK_MS = 3600000 * 24 * 7;
// GET /-/all/since?stale=update_after&startkey={key}
// List packages names since startkey
// https://github.com/npm/npm-registry-client/blob/master/lib/get.js#L89
module.exports = function* () {
module.exports = function* listSince() {
var query = this.query;
if (query.stale !== 'update_after') {
this.status = 400;
@@ -49,7 +35,7 @@ module.exports = function* () {
Date(), query, this.ip);
}
var names = yield* packageService.listPublicModuleNamesSince(startkey);
var names = yield packageService.listPublicModuleNamesSince(startkey);
var result = { _updated: updated };
names.forEach(function (name) {
result[name] = true;

View File

@@ -309,8 +309,10 @@ SyncModuleWorker.prototype.next = function* (concurrencyId) {
// get from npm
const packageUrl = '/' + name.replace('/', '%2f');
// try to sync from official replicate when source npm registry is not cnpmjs.org
const registry = config.sourceNpmRegistryIsCNpm ? config.sourceNpmRegistry : config.officialNpmReplicate;
try {
var result = yield npmSerivce.request(packageUrl);
var result = yield npmSerivce.request(packageUrl, { registry: registry });
pkg = result.data;
status = result.status;
} catch (err) {
@@ -318,7 +320,7 @@ SyncModuleWorker.prototype.next = function* (concurrencyId) {
if (!err.res || err.res.statusCode !== 404) {
var errMessage = err.name + ': ' + err.message;
that.log('[c#%s] [error] [%s] get package(%s%s) error: %s, status: %s',
concurrencyId, name, config.sourceNpmRegistry, packageUrl, errMessage, status);
concurrencyId, name, registry, packageUrl, errMessage, status);
yield that._doneOne(concurrencyId, name, false);
return;
}
@@ -337,14 +339,13 @@ SyncModuleWorker.prototype.next = function* (concurrencyId) {
if (!pkg) {
that.log('[c#%s] [error] [%s] get package(%s%s) error: package not exists, status: %s',
concurrencyId, name, config.sourceNpmRegistry, packageUrl, status);
concurrencyId, name, registry, packageUrl, status);
yield that._doneOne(concurrencyId, name, true);
return;
}
that.log('[c#%d] [%s] package(%s%s) status: %s, dist-tags: %j, time.modified: %s, start...',
concurrencyId, name, config.sourceNpmRegistry, packageUrl, status,
pkg['dist-tags'], pkg.time && pkg.time.modified);
concurrencyId, name, registry, packageUrl, status, pkg['dist-tags'], pkg.time && pkg.time.modified);
if (unpublishedInfo) {
try {
@@ -884,7 +885,10 @@ SyncModuleWorker.prototype._sync = function* (name, pkg) {
};
SyncModuleWorker.prototype._syncOneVersion = function *(versionIndex, sourcePackage) {
logger.syncInfo('[sync_module_worker] start sync %s@%s', sourcePackage.name, sourcePackage.version);
var delay = Date.now() - sourcePackage.publish_time;
logger.syncInfo('[sync_module_worker] delay: %s ms, publish_time: %s, start sync %s@%s',
delay, utility.logDate(new Date(sourcePackage.publish_time)),
sourcePackage.name, sourcePackage.version);
var that = this;
var username = this.username;
var downurl = sourcePackage.dist.tarball;
@@ -908,9 +912,11 @@ SyncModuleWorker.prototype._syncOneVersion = function *(versionIndex, sourcePack
devDependencies = Object.keys(sourcePackage.devDependencies || {});
}
that.log(' [%s:%d] syncing, version: %s, dist: %j, no deps: %s, ' +
that.log(' [%s:%d] syncing, delay: %s ms, version: %s, dist: %j, no deps: %s, ' +
'publish on cnpm: %s, dependencies: %d, devDependencies: %d, syncDevDependencies: %s',
sourcePackage.name, versionIndex, sourcePackage.version,
sourcePackage.name, versionIndex,
delay,
sourcePackage.version,
sourcePackage.dist, that.noDep, that._publish,
dependencies.length,
devDependencies.length, this.syncDevDependencies);
@@ -942,7 +948,14 @@ SyncModuleWorker.prototype._syncOneVersion = function *(versionIndex, sourcePack
try {
// get tarball
logger.syncInfo('[sync_module_worker] downloading %j to %j', downurl, filepath);
var r = yield urllib.request(downurl, options);
var r;
try {
r = yield urllib.request(downurl, options);
} catch (err) {
logger.syncInfo('[sync_module_worker] download %j to %j error: %s', downurl, filepath, err);
throw err;
}
var statusCode = r.status || -1;
// https://github.com/cnpm/cnpmjs.org/issues/325
// if (statusCode === 404) {

View File

@@ -50,6 +50,7 @@
"mysql": "^2.10.2",
"mz": "^2.4.0",
"nodemailer": "^1.3.0",
"normalize-registry-metadata": "^1.1.2",
"semver": "^5.2.0",
"sequelize": "^3.23.4",
"thunkify-wrap": "^1.0.4",

View File

@@ -1,17 +1,4 @@
/**!
* Copyright(c) cnpmjs.org and other contributors.
* MIT Licensed
*
* Authors:
* dead_horse <dead_horse@qq.com>
* fengmk2 <fengmk2@gmail.com> (http://fengmk2.com)
*/
"use strict";
/**
* Module dependencies.
*/
'use strict';
var limit = require('../middleware/limit');
var login = require('../middleware/login');

View File

@@ -1,19 +1,4 @@
/**!
* cnpmjs.org - routes/web.js
*
* Copyright(c) cnpmjs.org and other contributors.
* MIT Licensed
*
* Authors:
* dead_horse <dead_horse@qq.com>
* fengmk2 <m@fengmk2.com> (http://fengmk2.com)
*/
"use strict";
/**
* Module dependencies.
*/
'use strict';
var showPackage = require('../controllers/web/package/show');
var searchPackage = require('../controllers/web/package/search');

View File

@@ -1,6 +1,7 @@
'use strict';
var ms = require('humanize-ms');
var cleanNpmMetadata = require('normalize-registry-metadata');
var urllib = require('../common/urllib');
var config = require('../config');
@@ -19,7 +20,11 @@ function* request(url, options) {
url = registry + url;
var r;
try {
r = yield urllib.requestThunk(url, options);
r = yield urllib.request(url, options);
// https://github.com/npm/registry/issues/87#issuecomment-261450090
if (options.dataType === 'json' && r.data && config.officialNpmReplicate === registry) {
cleanNpmMetadata(r.data);
}
} catch (err) {
var statusCode = err.status || -1;
var data = err.data || '[empty]';

View File

@@ -205,7 +205,7 @@ exports.listPublicModuleNamesByUser = function* (username) {
};
// start must be a date or timestamp
exports.listPublicModuleNamesSince = function* (start) {
exports.listPublicModuleNamesSince = function* listPublicModuleNamesSince(start) {
if (!(start instanceof Date)) {
start = new Date(Number(start));
}

View File

@@ -8,7 +8,7 @@ const streamAwait = require('await-event');
const logger = require('../common/logger');
const config = require('../config');
const db = 'https://replicate.npmjs.com';
const db = config.officialNpmReplicate;
const lastSeqFile = path.join(config.dataDir, '.cnpmjs.org.last_seq.txt');
let _STREAM_ID = 0;

View File

@@ -1,45 +1,32 @@
/**!
* cnpmjs.org - test/services/npm.test.js
*
* Copyright(c) cnpmjs.org and other contributors.
* MIT Licensed
*
* Authors:
* fengmk2 <fengmk2@gmail.com> (http://fengmk2.github.com)
*/
'use strict';
/**
* Module dependencies.
*/
var should = require('should');
var mm = require('mm');
var fs = require('fs');
var path = require('path');
var ChunkStream = require('chunkstream');
var config = require('../../config');
var npm = require('../../services/npm');
var fixtures = path.join(path.dirname(__dirname), 'fixtures');
describe('services/npm.test.js', function () {
describe('services/npm.test.js', () => {
afterEach(mm.restore);
it('should return a module info from source npm', function* () {
var data = yield* npm.get('pedding');
var data = yield npm.get('pedding');
data.name.should.equal('pedding');
});
it('should return null when module not exist', function *() {
var data = yield* npm.get('pedding-not-exists');
var data = yield npm.get('pedding-not-exists');
should.not.exist(data);
});
it.skip('should return error when http error', function* () {
mm.http.request(/\//, new ChunkStream(['{']));
try {
yield* npm.get('pedding-not-exists');
yield npm.get('pedding-not-exists');
throw new Error('should not run this');
} catch (err) {
err.name.should.equal('JSONResponseFormatError');
@@ -51,7 +38,7 @@ describe('services/npm.test.js', function () {
mm.http.request(/\//, content, { statusCode: 500 });
// http://registry.npmjs.org/octopie
try {
yield* npm.get('octopie');
yield npm.get('octopie');
throw new Error('should not run this');
} catch (err) {
err.name.should.equal('NPMServerError');
@@ -60,7 +47,31 @@ describe('services/npm.test.js', function () {
}
});
describe('getPopular()', function () {
describe('request()', () => {
it('should request from replicate and clean meta data', function* () {
const result = yield npm.request('/shelljs', {
registry: config.officialNpmReplicate,
});
const pkg = result.data;
pkg.name.should.equal('shelljs');
pkg.time['0.0.1-alpha1'].should.equal('2012-03-02T21:46:14.725Z');
pkg.versions['0.0.1-alpha1'].version.should.equal('0.0.1-alpha1');
pkg.versions['0.0.1-alpha1'].dist.shasum.should.equal('cfa9394e29c3eb0fe58998f5bf5bda79aa7d3e2e');
pkg.versions['0.0.1-alpha1'].dist.tarball.should.equal('http://registry.npmjs.org/shelljs/-/shelljs-0.0.1alpha1.tgz');
pkg.time['0.7.5'].should.equal('2016-10-27T05:50:21.479Z');
pkg.versions['0.7.5'].version.should.equal('0.7.5');
pkg.versions['0.7.5'].dist.shasum.should.equal('2eef7a50a21e1ccf37da00df767ec69e30ad0675');
pkg.versions['0.7.5'].dist.tarball.should.equal('http://registry.npmjs.org/shelljs/-/shelljs-0.7.5.tgz');
pkg.time['0.0.6-pre2'].should.equal('2012-05-25T18:14:25.441Z');
pkg.versions['0.0.6-pre2'].version.should.equal('0.0.6-pre2');
pkg.versions['0.0.6-pre2'].dist.shasum.should.equal('8c3eecaddba6f425bd5cae001f80a4d224750911');
pkg.versions['0.0.6-pre2'].dist.tarball.should.equal('http://registry.npmjs.org/shelljs/-/shelljs-0.0.6pre2.tgz');
});
});
describe('getPopular()', () => {
it('should return popular modules', function* () {
mm.http.request(/\//, JSON.stringify({
rows: [
@@ -82,7 +93,7 @@ describe('services/npm.test.js', function () {
{ key: ['foo15'], value: 1 },
]
}));
var rows = yield* npm.getPopular(10);
var rows = yield npm.getPopular(10);
rows.should.length(2);
rows[0][0].should.equal('underscore');
});

View File

@@ -0,0 +1,43 @@
// try to fix https://github.com/npm/registry/issues/87
const urllib = require('urllib');
const co = require('co');
const sleep = ms => cb => setTimeout(cb, ms);
// node resync_npm.js [registry]
const registry = process.argv[2] || require('../config').sourceNpmRegistry;
const url = 'https://os.alipayobjects.com/rmsportal/eDMScnlBhNhHaGXMJWxjvCjfxMHhYwEx.html';
co(function* () {
const result = yield urllib.request(url);
const items = result.data.toString().split('\n');
let count = 0;
for (let item of items) {
item = item.trim().split(',');
let name = item[0];
name = name.substring(1, name.length - 1);
if (!name) {
continue;
}
const r = yield urllib.request(`${registry}/${name}/sync`, {
method: 'PUT',
dataType: 'json',
});
r.data = r.data || {};
count++;
console.log('#%d %s %s, log: %s',
count, name, r.status, `${registry}/${name}/sync/log/${r.data.logId}`);
if (count % 50 === 0) {
yield sleep(10000);
}
}
console.log('All %d packages sync done', items.length);
process.exit(0);
}).catch(err => {
console.error(err.stack);
process.exit(1);
});