mirror of
https://github.com/nadimkobeissi/mkbsd.git
synced 2025-01-19 03:37:47 -05:00
Keep track of previously downloaded files
Refactor image download script to track and skip previously downloaded files based on unique key and consistent naming - Added logic to use unique key from JSON data for consistent filenames and tracking - Improved file existence check to skip downloading files that already exist in the directory - Updated JSON list to store keys of downloaded files for persistent tracking across runs - Incorporated delay between downloads for smoother processing - Cleaned up old redundant code and ensured consistency in file naming and tracking - Added ascii art to indicate the start of the download process
This commit is contained in:
parent
82e50c64f0
commit
815a9eabdc
1 changed files with 79 additions and 17 deletions
90
mkbsd.js
90
mkbsd.js
|
@ -1,14 +1,19 @@
|
||||||
// Copyright 2024 Nadim Kobeissi
|
const fs = require('fs');
|
||||||
// Licensed under the WTFPL License
|
const path = require('path');
|
||||||
|
const crypto = require('crypto');
|
||||||
const fs = require(`fs`);
|
|
||||||
const path = require(`path`);
|
|
||||||
|
|
||||||
async function main() {
|
async function main() {
|
||||||
const url = 'https://storage.googleapis.com/panels-api/data/20240916/media-1a-i-p~s';
|
const url = 'https://storage.googleapis.com/panels-api/data/20240916/media-1a-i-p~s';
|
||||||
const delay = (ms) => {
|
const delay = (ms) => new Promise(resolve => setTimeout(resolve, ms));
|
||||||
return new Promise(resolve => setTimeout(resolve, ms));
|
const downloadedListPath = path.join(__dirname, 'downloadedList.json');
|
||||||
|
let downloadedList = [];
|
||||||
|
|
||||||
|
// Load existing downloaded list if it exists
|
||||||
|
if (fs.existsSync(downloadedListPath)) {
|
||||||
|
const downloadedData = await fs.promises.readFile(downloadedListPath, 'utf8');
|
||||||
|
downloadedList = JSON.parse(downloadedData);
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const response = await fetch(url);
|
const response = await fetch(url);
|
||||||
if (!response.ok) {
|
if (!response.ok) {
|
||||||
|
@ -16,30 +21,63 @@ async function main() {
|
||||||
}
|
}
|
||||||
const jsonData = await response.json();
|
const jsonData = await response.json();
|
||||||
const data = jsonData.data;
|
const data = jsonData.data;
|
||||||
|
|
||||||
if (!data) {
|
if (!data) {
|
||||||
throw new Error('⛔ JSON does not have a "data" property at its root.');
|
throw new Error('⛔ JSON does not have a "data" property at its root.');
|
||||||
}
|
}
|
||||||
const downloadDir = path.join(__dirname, 'downloads');
|
|
||||||
|
const downloadDir = path.join(__dirname, 'downloads-1');
|
||||||
if (!fs.existsSync(downloadDir)) {
|
if (!fs.existsSync(downloadDir)) {
|
||||||
fs.mkdirSync(downloadDir);
|
fs.mkdirSync(downloadDir);
|
||||||
console.info(`📁 Created directory: ${downloadDir}`);
|
console.info(`📁 Created directory: ${downloadDir}`);
|
||||||
}
|
}
|
||||||
let fileIndex = 1;
|
|
||||||
|
let downloadedCount = 0;
|
||||||
|
let skippedCount = 0;
|
||||||
|
|
||||||
for (const key in data) {
|
for (const key in data) {
|
||||||
const subproperty = data[key];
|
const subproperty = data[key];
|
||||||
if (subproperty && subproperty.dhd) {
|
if (subproperty && subproperty.dhd) {
|
||||||
|
// Use the unique key to track downloads and in the file name
|
||||||
const imageUrl = subproperty.dhd;
|
const imageUrl = subproperty.dhd;
|
||||||
console.info(`🔍 Found image URL!`);
|
const imageName = `${extractNameFromUrl(imageUrl)}-${key}`;
|
||||||
await delay(100);
|
|
||||||
const ext = path.extname(new URL(imageUrl).pathname) || '.jpg';
|
const ext = path.extname(new URL(imageUrl).pathname) || '.jpg';
|
||||||
const filename = `${fileIndex}${ext}`;
|
const filePath = path.join(downloadDir, `${imageName}${ext}`);
|
||||||
const filePath = path.join(downloadDir, filename);
|
|
||||||
|
// Check if the file already exists
|
||||||
|
if (fs.existsSync(filePath)) {
|
||||||
|
// If the file exists but the key is missing in the JSON, add it to avoid re-downloading
|
||||||
|
if (!downloadedList.includes(key)) {
|
||||||
|
downloadedList.push(key);
|
||||||
|
console.info(`✅ Found existing file, added key to list: ${filePath}`);
|
||||||
|
await fs.promises.writeFile(downloadedListPath, JSON.stringify(downloadedList, null, 2));
|
||||||
|
}
|
||||||
|
skippedCount++;
|
||||||
|
} else {
|
||||||
|
// Download the image only if it doesn't exist
|
||||||
|
downloadedCount++;
|
||||||
|
console.info(`🔍 Found new image URL: ${imageUrl}`);
|
||||||
|
|
||||||
|
// Download the image
|
||||||
await downloadImage(imageUrl, filePath);
|
await downloadImage(imageUrl, filePath);
|
||||||
console.info(`🖼️ Saved image to ${filePath}`);
|
console.info(`🖼️ Saved image to ${filePath}`);
|
||||||
fileIndex++;
|
|
||||||
|
// Add the unique key to the downloaded list
|
||||||
|
downloadedList.push(key);
|
||||||
|
|
||||||
|
// Save the updated downloaded list to JSON file
|
||||||
|
await fs.promises.writeFile(downloadedListPath, JSON.stringify(downloadedList, null, 2));
|
||||||
|
console.info(`📄 Updated downloaded list with key: ${key}`);
|
||||||
|
|
||||||
|
// Delay for the next download
|
||||||
await delay(250);
|
await delay(250);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`🚀 🚀 🚀 Downloaded ${downloadedCount} new images`);
|
||||||
|
console.info(`✅ Skipped ${skippedCount} images that already exist`);
|
||||||
|
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error(`Error: ${error.message}`);
|
console.error(`Error: ${error.message}`);
|
||||||
}
|
}
|
||||||
|
@ -55,6 +93,30 @@ async function downloadImage(url, filePath) {
|
||||||
await fs.promises.writeFile(filePath, buffer);
|
await fs.promises.writeFile(filePath, buffer);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function extractNameFromUrl(url) {
|
||||||
|
try {
|
||||||
|
const urlParts = new URL(url).pathname.split('/');
|
||||||
|
const nameWithExtension = urlParts[urlParts.length - 1]; // Get the last part of the URL
|
||||||
|
|
||||||
|
// Remove the query string from the name (everything after the '?' symbol)
|
||||||
|
const nameWithoutQuery = nameWithExtension.split('?')[0];
|
||||||
|
|
||||||
|
// Get the prefix part (e.g., 'hytha', 'outrunyouth', etc.)
|
||||||
|
const prefixPart = urlParts.find(part => part.startsWith('a~'));
|
||||||
|
const prefix = prefixPart ? prefixPart.split('~')[1].split('_')[0].toLowerCase() : 'unknown'; // Clean up the prefix
|
||||||
|
// Simplify the base name by removing everything after the first tilde (~)
|
||||||
|
const baseName = nameWithoutQuery.split('.')[0].split('~')[0].replace(/[^a-zA-Z0-9]+/g, '').toLowerCase();
|
||||||
|
|
||||||
|
return `${prefix}-${baseName}`; // Return cleaned prefix and simplified base name
|
||||||
|
} catch (error) {
|
||||||
|
console.error(`Error extracting name from URL: ${error.message}, ${url}`);
|
||||||
|
|
||||||
|
// Fallback to deterministic name using hash if extraction fails
|
||||||
|
const hash = crypto.createHash('md5').update(url).digest('hex');
|
||||||
|
return `image-${hash}`;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
function asciiArt() {
|
function asciiArt() {
|
||||||
console.info(`
|
console.info(`
|
||||||
/$$ /$$ /$$ /$$ /$$$$$$$ /$$$$$$ /$$$$$$$
|
/$$ /$$ /$$ /$$ /$$$$$$$ /$$$$$$ /$$$$$$$
|
||||||
|
|
Loading…
Reference in a new issue