| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129 |
- 'use strict';
- const fs = require('fs');
- const path = require('path');
- const config = require('../build.json');
- var languages = ['de', 'en'];
- var lang = process.argv[2];
- if(languages.indexOf(lang) === -1) {
- lang = languages[0];
- }
- // Change current directory to script location so that
- // the relative paths of the resources can be resolved
- process.chdir(path.dirname(process.argv[1]));
- function createDirectory(dir) {
- fs.mkdirSync(
- dir,
- { recursive: true },
- (error) => {
- if (error !== null) {
- console.error('Error while creating directory', error);
- }
- }
- );
- }
- function transformData() {
- var matrix = fs.readFileSync(path.join('../', config.path.data, 'jobloss_' + lang + '.tsv'), 'utf8');
- // TSV wird geparst
- matrix = matrix.trim();
- var lines = matrix.split('\n');
- matrix = matrix.split('\n').map(l => l.split('\t'));
- var rowCount = matrix.length;
- var result = [];
- // Welche Spalten enthalten die Punkte für Fähig- und Fertigkeiten
- var pointCols = [6,7,8,9,10,11,12,13];
- var pointColHeaders = matrix[1].map(function(h, i){
- if (pointCols.indexOf(i) !== -1) {
- return h;
- }
- });
- for (var r = 2; r < rowCount; r++) {
- var row = matrix[r];
- // Lese die einzelnen Jobs ein
- var entry = {
- level0: row[0],
- level1: row[1],
- label: row[2],
- prob: parseFloat(row[4].replace(/,/g, '.')),
- points: pointCols.map(c => parseFloat(row[c].replace(/,/g, '.'))),
- // index: result.length
- };
- // Jobs mit leerer Wahrscheinlichkeit werden ignoriert
- if (entry.prob) {
- entry.index = result.length;
- result.push(entry);
- }
- }
- // Vergleiche jeden Job mit jedem Job, um die ähnlichsten zu finden
- result.forEach(entry1 => {
- var vec1 = entry1.points;
- var similar = [];
- result.forEach(entry2 => {
- // Vergleiche nicht Jobs mit sich selbst
- if (entry1 === entry2) return;
- // Jobs die eine höhere Verlust-Wahrscheinlichkeit haben, werden ignoriert
- if (entry2.prob >= entry1.prob) return;
- var vec2 = entry2.points;
- var sum = 0;
- vec1.forEach((v1,i) => sum += sqr(v1-vec2[i]));
- similar.push({
- index: entry2.index,
- prob: entry2.prob,
- distance: sum
- });
- function sqr(v) { return v*v; }
- });
- // Finde die 5 besten der ähnlichen Jobs
- similar.sort((a,b) => a.distance-b.distance);
- similar = similar.slice(0,5);
- entry1.similar = similar;
- });
- // Bereite JSON fürs Speichern vor.
- result.forEach(entry => {
- delete entry.index;
- entry.skills = entry.points.map(function(value,i) {
- return value;
- // return {
- // 'name': pointColHeaders[i + pointCols[0]],
- // 'value': value
- // };
- });
- delete entry.points;
- entry.similar.forEach(s => {delete s.distance;});
- });
- // Resultat als JSON speichern
- fs.writeFileSync(
- path.join(
- '../',
- config.path.output,
- config.path.data,
- 'data_' + lang + '.json'),
- JSON.stringify(result, null, '\t'),
- 'utf8'
- );
- }
- createDirectory(path.join('../', config.path.output, config.path.data));
- transformData();
|