From bf5f02908f6bfa09ff79e3a4fd429aaecbe00f9b Mon Sep 17 00:00:00 2001 From: fengmk2 Date: Sun, 20 Nov 2016 01:27:44 +0800 Subject: [PATCH] fix: try to sync from official replicate (#1076) --- config/index.js | 1 + controllers/registry/package/list_since.js | 18 +------- controllers/sync_module_worker.js | 31 +++++++++---- package.json | 1 + routes/registry.js | 15 +----- routes/web.js | 17 +------ services/npm.js | 7 ++- services/package.js | 2 +- sync/changes_stream_syncer.js | 2 +- test/services/npm.test.js | 53 +++++++++++++--------- tools/resync_npm_issue_87.js | 43 ++++++++++++++++++ 11 files changed, 111 insertions(+), 79 deletions(-) create mode 100644 tools/resync_npm_issue_87.js diff --git a/config/index.js b/config/index.js index bd5cf8c..4163367 100644 --- a/config/index.js +++ b/config/index.js @@ -163,6 +163,7 @@ var config = { // but sometimes will request it for some package infomations // please don't change it if not necessary officialNpmRegistry: 'https://registry.npmjs.com', + officialNpmReplicate: 'https://replicate.npmjs.com', // sync source, upstream registry // If you want to directly sync from official npm's registry diff --git a/controllers/registry/package/list_since.js b/controllers/registry/package/list_since.js index ab8416d..c1de65e 100644 --- a/controllers/registry/package/list_since.js +++ b/controllers/registry/package/list_since.js @@ -1,19 +1,5 @@ -/**! - * cnpmjs.org - controllers/registry/package/list_since.js - * - * Copyright(c) fengmk2 and other contributors. - * MIT Licensed - * - * Authors: - * fengmk2 (http://fengmk2.github.com) - */ - 'use strict'; -/** - * Module dependencies. - */ - var packageService = require('../../../services/package'); var A_WEEK_MS = 3600000 * 24 * 7; @@ -21,7 +7,7 @@ var A_WEEK_MS = 3600000 * 24 * 7; // GET /-/all/since?stale=update_after&startkey={key} // List packages names since startkey // https://github.com/npm/npm-registry-client/blob/master/lib/get.js#L89 -module.exports = function* () { +module.exports = function* listSince() { var query = this.query; if (query.stale !== 'update_after') { this.status = 400; @@ -49,7 +35,7 @@ module.exports = function* () { Date(), query, this.ip); } - var names = yield* packageService.listPublicModuleNamesSince(startkey); + var names = yield packageService.listPublicModuleNamesSince(startkey); var result = { _updated: updated }; names.forEach(function (name) { result[name] = true; diff --git a/controllers/sync_module_worker.js b/controllers/sync_module_worker.js index 2ce274a..ffee221 100644 --- a/controllers/sync_module_worker.js +++ b/controllers/sync_module_worker.js @@ -309,8 +309,10 @@ SyncModuleWorker.prototype.next = function* (concurrencyId) { // get from npm const packageUrl = '/' + name.replace('/', '%2f'); + // try to sync from official replicate when source npm registry is not cnpmjs.org + const registry = config.sourceNpmRegistryIsCNpm ? config.sourceNpmRegistry : config.officialNpmReplicate; try { - var result = yield npmSerivce.request(packageUrl); + var result = yield npmSerivce.request(packageUrl, { registry: registry }); pkg = result.data; status = result.status; } catch (err) { @@ -318,7 +320,7 @@ SyncModuleWorker.prototype.next = function* (concurrencyId) { if (!err.res || err.res.statusCode !== 404) { var errMessage = err.name + ': ' + err.message; that.log('[c#%s] [error] [%s] get package(%s%s) error: %s, status: %s', - concurrencyId, name, config.sourceNpmRegistry, packageUrl, errMessage, status); + concurrencyId, name, registry, packageUrl, errMessage, status); yield that._doneOne(concurrencyId, name, false); return; } @@ -337,14 +339,13 @@ SyncModuleWorker.prototype.next = function* (concurrencyId) { if (!pkg) { that.log('[c#%s] [error] [%s] get package(%s%s) error: package not exists, status: %s', - concurrencyId, name, config.sourceNpmRegistry, packageUrl, status); + concurrencyId, name, registry, packageUrl, status); yield that._doneOne(concurrencyId, name, true); return; } that.log('[c#%d] [%s] package(%s%s) status: %s, dist-tags: %j, time.modified: %s, start...', - concurrencyId, name, config.sourceNpmRegistry, packageUrl, status, - pkg['dist-tags'], pkg.time && pkg.time.modified); + concurrencyId, name, registry, packageUrl, status, pkg['dist-tags'], pkg.time && pkg.time.modified); if (unpublishedInfo) { try { @@ -884,7 +885,10 @@ SyncModuleWorker.prototype._sync = function* (name, pkg) { }; SyncModuleWorker.prototype._syncOneVersion = function *(versionIndex, sourcePackage) { - logger.syncInfo('[sync_module_worker] start sync %s@%s', sourcePackage.name, sourcePackage.version); + var delay = Date.now() - sourcePackage.publish_time; + logger.syncInfo('[sync_module_worker] delay: %s ms, publish_time: %s, start sync %s@%s', + delay, utility.logDate(new Date(sourcePackage.publish_time)), + sourcePackage.name, sourcePackage.version); var that = this; var username = this.username; var downurl = sourcePackage.dist.tarball; @@ -908,9 +912,11 @@ SyncModuleWorker.prototype._syncOneVersion = function *(versionIndex, sourcePack devDependencies = Object.keys(sourcePackage.devDependencies || {}); } - that.log(' [%s:%d] syncing, version: %s, dist: %j, no deps: %s, ' + + that.log(' [%s:%d] syncing, delay: %s ms, version: %s, dist: %j, no deps: %s, ' + 'publish on cnpm: %s, dependencies: %d, devDependencies: %d, syncDevDependencies: %s', - sourcePackage.name, versionIndex, sourcePackage.version, + sourcePackage.name, versionIndex, + delay, + sourcePackage.version, sourcePackage.dist, that.noDep, that._publish, dependencies.length, devDependencies.length, this.syncDevDependencies); @@ -942,7 +948,14 @@ SyncModuleWorker.prototype._syncOneVersion = function *(versionIndex, sourcePack try { // get tarball logger.syncInfo('[sync_module_worker] downloading %j to %j', downurl, filepath); - var r = yield urllib.request(downurl, options); + var r; + try { + r = yield urllib.request(downurl, options); + } catch (err) { + logger.syncInfo('[sync_module_worker] download %j to %j error: %s', downurl, filepath, err); + throw err; + } + var statusCode = r.status || -1; // https://github.com/cnpm/cnpmjs.org/issues/325 // if (statusCode === 404) { diff --git a/package.json b/package.json index 2f9a4e2..e93fa0e 100644 --- a/package.json +++ b/package.json @@ -50,6 +50,7 @@ "mysql": "^2.10.2", "mz": "^2.4.0", "nodemailer": "^1.3.0", + "normalize-registry-metadata": "^1.1.2", "semver": "^5.2.0", "sequelize": "^3.23.4", "thunkify-wrap": "^1.0.4", diff --git a/routes/registry.js b/routes/registry.js index 4231dbc..c787fbc 100644 --- a/routes/registry.js +++ b/routes/registry.js @@ -1,17 +1,4 @@ -/**! - * Copyright(c) cnpmjs.org and other contributors. - * MIT Licensed - * - * Authors: - * dead_horse - * fengmk2 (http://fengmk2.com) - */ - -"use strict"; - -/** - * Module dependencies. - */ +'use strict'; var limit = require('../middleware/limit'); var login = require('../middleware/login'); diff --git a/routes/web.js b/routes/web.js index 8a00b3f..cb8f6bf 100644 --- a/routes/web.js +++ b/routes/web.js @@ -1,19 +1,4 @@ -/**! - * cnpmjs.org - routes/web.js - * - * Copyright(c) cnpmjs.org and other contributors. - * MIT Licensed - * - * Authors: - * dead_horse - * fengmk2 (http://fengmk2.com) - */ - -"use strict"; - -/** - * Module dependencies. - */ +'use strict'; var showPackage = require('../controllers/web/package/show'); var searchPackage = require('../controllers/web/package/search'); diff --git a/services/npm.js b/services/npm.js index b275773..faa4122 100644 --- a/services/npm.js +++ b/services/npm.js @@ -1,6 +1,7 @@ 'use strict'; var ms = require('humanize-ms'); +var cleanNpmMetadata = require('normalize-registry-metadata'); var urllib = require('../common/urllib'); var config = require('../config'); @@ -19,7 +20,11 @@ function* request(url, options) { url = registry + url; var r; try { - r = yield urllib.requestThunk(url, options); + r = yield urllib.request(url, options); + // https://github.com/npm/registry/issues/87#issuecomment-261450090 + if (options.dataType === 'json' && r.data && config.officialNpmReplicate === registry) { + cleanNpmMetadata(r.data); + } } catch (err) { var statusCode = err.status || -1; var data = err.data || '[empty]'; diff --git a/services/package.js b/services/package.js index 43ed7d4..30d2ab5 100644 --- a/services/package.js +++ b/services/package.js @@ -205,7 +205,7 @@ exports.listPublicModuleNamesByUser = function* (username) { }; // start must be a date or timestamp -exports.listPublicModuleNamesSince = function* (start) { +exports.listPublicModuleNamesSince = function* listPublicModuleNamesSince(start) { if (!(start instanceof Date)) { start = new Date(Number(start)); } diff --git a/sync/changes_stream_syncer.js b/sync/changes_stream_syncer.js index 1059d54..e462c97 100644 --- a/sync/changes_stream_syncer.js +++ b/sync/changes_stream_syncer.js @@ -8,7 +8,7 @@ const streamAwait = require('await-event'); const logger = require('../common/logger'); const config = require('../config'); -const db = 'https://replicate.npmjs.com'; +const db = config.officialNpmReplicate; const lastSeqFile = path.join(config.dataDir, '.cnpmjs.org.last_seq.txt'); let _STREAM_ID = 0; diff --git a/test/services/npm.test.js b/test/services/npm.test.js index ab2f765..8fab7dc 100644 --- a/test/services/npm.test.js +++ b/test/services/npm.test.js @@ -1,45 +1,32 @@ -/**! - * cnpmjs.org - test/services/npm.test.js - * - * Copyright(c) cnpmjs.org and other contributors. - * MIT Licensed - * - * Authors: - * fengmk2 (http://fengmk2.github.com) - */ - 'use strict'; -/** - * Module dependencies. - */ - var should = require('should'); var mm = require('mm'); var fs = require('fs'); var path = require('path'); var ChunkStream = require('chunkstream'); +var config = require('../../config'); var npm = require('../../services/npm'); var fixtures = path.join(path.dirname(__dirname), 'fixtures'); -describe('services/npm.test.js', function () { +describe('services/npm.test.js', () => { afterEach(mm.restore); it('should return a module info from source npm', function* () { - var data = yield* npm.get('pedding'); + var data = yield npm.get('pedding'); data.name.should.equal('pedding'); }); it('should return null when module not exist', function *() { - var data = yield* npm.get('pedding-not-exists'); + var data = yield npm.get('pedding-not-exists'); should.not.exist(data); }); it.skip('should return error when http error', function* () { mm.http.request(/\//, new ChunkStream(['{'])); try { - yield* npm.get('pedding-not-exists'); + yield npm.get('pedding-not-exists'); throw new Error('should not run this'); } catch (err) { err.name.should.equal('JSONResponseFormatError'); @@ -51,7 +38,7 @@ describe('services/npm.test.js', function () { mm.http.request(/\//, content, { statusCode: 500 }); // http://registry.npmjs.org/octopie try { - yield* npm.get('octopie'); + yield npm.get('octopie'); throw new Error('should not run this'); } catch (err) { err.name.should.equal('NPMServerError'); @@ -60,7 +47,31 @@ describe('services/npm.test.js', function () { } }); - describe('getPopular()', function () { + describe('request()', () => { + it('should request from replicate and clean meta data', function* () { + const result = yield npm.request('/shelljs', { + registry: config.officialNpmReplicate, + }); + const pkg = result.data; + pkg.name.should.equal('shelljs'); + pkg.time['0.0.1-alpha1'].should.equal('2012-03-02T21:46:14.725Z'); + pkg.versions['0.0.1-alpha1'].version.should.equal('0.0.1-alpha1'); + pkg.versions['0.0.1-alpha1'].dist.shasum.should.equal('cfa9394e29c3eb0fe58998f5bf5bda79aa7d3e2e'); + pkg.versions['0.0.1-alpha1'].dist.tarball.should.equal('http://registry.npmjs.org/shelljs/-/shelljs-0.0.1alpha1.tgz'); + + pkg.time['0.7.5'].should.equal('2016-10-27T05:50:21.479Z'); + pkg.versions['0.7.5'].version.should.equal('0.7.5'); + pkg.versions['0.7.5'].dist.shasum.should.equal('2eef7a50a21e1ccf37da00df767ec69e30ad0675'); + pkg.versions['0.7.5'].dist.tarball.should.equal('http://registry.npmjs.org/shelljs/-/shelljs-0.7.5.tgz'); + + pkg.time['0.0.6-pre2'].should.equal('2012-05-25T18:14:25.441Z'); + pkg.versions['0.0.6-pre2'].version.should.equal('0.0.6-pre2'); + pkg.versions['0.0.6-pre2'].dist.shasum.should.equal('8c3eecaddba6f425bd5cae001f80a4d224750911'); + pkg.versions['0.0.6-pre2'].dist.tarball.should.equal('http://registry.npmjs.org/shelljs/-/shelljs-0.0.6pre2.tgz'); + }); + }); + + describe('getPopular()', () => { it('should return popular modules', function* () { mm.http.request(/\//, JSON.stringify({ rows: [ @@ -82,7 +93,7 @@ describe('services/npm.test.js', function () { { key: ['foo15'], value: 1 }, ] })); - var rows = yield* npm.getPopular(10); + var rows = yield npm.getPopular(10); rows.should.length(2); rows[0][0].should.equal('underscore'); }); diff --git a/tools/resync_npm_issue_87.js b/tools/resync_npm_issue_87.js new file mode 100644 index 0000000..25665d7 --- /dev/null +++ b/tools/resync_npm_issue_87.js @@ -0,0 +1,43 @@ +// try to fix https://github.com/npm/registry/issues/87 + +const urllib = require('urllib'); +const co = require('co'); + +const sleep = ms => cb => setTimeout(cb, ms); + +// node resync_npm.js [registry] +const registry = process.argv[2] || require('../config').sourceNpmRegistry; +const url = 'https://os.alipayobjects.com/rmsportal/eDMScnlBhNhHaGXMJWxjvCjfxMHhYwEx.html'; + +co(function* () { + const result = yield urllib.request(url); + const items = result.data.toString().split('\n'); + let count = 0; + for (let item of items) { + item = item.trim().split(','); + let name = item[0]; + name = name.substring(1, name.length - 1); + if (!name) { + continue; + } + + const r = yield urllib.request(`${registry}/${name}/sync`, { + method: 'PUT', + dataType: 'json', + }); + + r.data = r.data || {}; + + count++; + console.log('#%d %s %s, log: %s', + count, name, r.status, `${registry}/${name}/sync/log/${r.data.logId}`); + if (count % 50 === 0) { + yield sleep(10000); + } + } + console.log('All %d packages sync done', items.length); + process.exit(0); +}).catch(err => { + console.error(err.stack); + process.exit(1); +});