From 7e50299854322309bf3ee374f44777115cf9649b Mon Sep 17 00:00:00 2001 From: Surya Paolo Date: Fri, 16 May 2025 10:26:55 +0200 Subject: [PATCH] - ver 1.2.47 : - corretto errore di modifica scheda - aggiunto scraping (fase 1) --- src/server/models/mypage.js | 5 ++ src/server/models/product.js | 8 +- src/server/models/productInfo.js | 21 ++++- src/server/modules/CronMod.js | 2 + src/server/modules/Scraping.js | 125 ++++++++++++++++++++++++++++++ src/server/router/admin_router.js | 20 ++++- src/server/server.js | 10 +-- src/server/tools/general.js | 39 +++------- src/server/version.txt | 2 +- 9 files changed, 194 insertions(+), 38 deletions(-) create mode 100644 src/server/modules/Scraping.js diff --git a/src/server/models/mypage.js b/src/server/models/mypage.js index 3a5c715..63103ad 100755 --- a/src/server/models/mypage.js +++ b/src/server/models/mypage.js @@ -56,6 +56,9 @@ const MyPageSchema = new Schema({ only_residenti: { type: Boolean, }, + only_admin: { + type: Boolean, + }, color: { type: String, }, @@ -200,6 +203,7 @@ MyPageSchema.statics.findOnlyStruttRec = async function (idapp) { active: 1, onlyif_logged: 1, only_residenti: 1, + only_admin: 1, inmenu: 1, submenu: 1, iconsize: 1, @@ -227,6 +231,7 @@ MyPageSchema.statics.findInternalPages = async function (idapp) { path: 1, onlyif_logged: 1, only_residenti: 1, + only_admin: 1, }).lean(); return result; diff --git a/src/server/models/product.js b/src/server/models/product.js index 9d5edcf..626ccee 100755 --- a/src/server/models/product.js +++ b/src/server/models/product.js @@ -29,6 +29,9 @@ const productSchema = new Schema({ idapp: { type: String, }, + delete: { + type: Boolean, + }, active: { type: Boolean, default: true, @@ -462,7 +465,10 @@ module.exports.findAllIdApp = async function (idapp, code, id, all) { } if (idapp) { - myfind = { idapp }; + myfind = { + idapp, + $or: [{ delete: { $exists: false } }, { delete: false }], + }; } if (!all) { diff --git a/src/server/models/productInfo.js b/src/server/models/productInfo.js index c1edd8f..532b303 100755 --- a/src/server/models/productInfo.js +++ b/src/server/models/productInfo.js @@ -16,6 +16,9 @@ const productInfoSchema = new Schema({ idapp: { type: String, }, + delete: { + type: Boolean, + }, department: { type: String, ref: 'Department' }, @@ -206,7 +209,13 @@ module.exports.findAllIdApp = async function (idapp, code, id) { try { if (idapp) - myfind = { idapp }; + myfind = { + idapp, + $or: [ + { delete: { $exists: false } }, + { delete: false } + ] + }; if (code) { myfind = { ...myfind, code } @@ -539,11 +548,17 @@ module.exports.removeProductInfoWithoutDateUpdatedFromGM = async function (idapp for (const productinfo of arrproductInfo) { // cerca nella tabella Product se esiste idProductInfo = _id e cancella tutti i record che hanno questa corrispondenza if (Product) { - await Product.deleteMany({ idProductInfo: productinfo._id }); + await Product.updateMany( + { idProductInfo: productinfo._id }, + { $set: { delete: true } } + ); } // Ora rimuovi anche questo productInfo - await ProductInfo.deleteOne({ _id: productinfo._id }); + await ProductInfo.updateOne( + { _id: productinfo._id }, + { $set: { delete: true } } + ); } } diff --git a/src/server/modules/CronMod.js b/src/server/modules/CronMod.js index 0fd53f8..ab4b587 100644 --- a/src/server/modules/CronMod.js +++ b/src/server/modules/CronMod.js @@ -53,6 +53,8 @@ class CronMod { if (mydata.dbop === "") { // } else if (mydata.dbop === 'rigeneraTutto') { // await ListaIngresso.Esegui_CronTab(idapp, mydata); + } else if (mydata.dbop === "ScraperDataAmazon") { + await ScraperDataAmazon(idapp, mydata.options) } else if (mydata.dbop === "ReplaceAllCircuits") { // ++ Replace All Circuitname with 'Circuito RIS %s' await Circuit.replaceAllCircuitNames(idapp); diff --git a/src/server/modules/Scraping.js b/src/server/modules/Scraping.js new file mode 100644 index 0000000..7d7c0db --- /dev/null +++ b/src/server/modules/Scraping.js @@ -0,0 +1,125 @@ +import axios from 'axios'; +import cheerio from 'cheerio'; + +class AmazonBookScraper { + constructor() { + this.baseUrl = 'https://www.amazon.it/dp/'; + } + + async fetchPage(isbn) { + const url = `${this.baseUrl}${isbn}`; + try { + const { data } = await axios.get(url, { + headers: { + 'User-Agent': + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) ' + + 'AppleWebKit/537.36 (KHTML, like Gecko) ' + + 'Chrome/113.0.0.0 Safari/537.36', + // altri header se necessario + }, + }); + return data; + } catch (err) { + console.error(`Errore fetching ISBN ${isbn}:`, err.message); + return null; + } + } + + extractData(html) { + const $ = cheerio.load(html); + + // Titolo + let title = $('#productTitle').text().trim() || null; + + // Sottotitolo (Amazon spesso lo mette in #productSubtitle o nel titolo, proveremo) + let subtitle = $('#productSubtitle').text().trim() || null; + + // Numero pagine, formato, edizione + // Questi dati spesso sono nella tabella dettagli prodotto con id #detailBullets_feature_div o #productDetailsTable + // Proviamo a estrarre da #detailBullets_feature_div + + let pages = null; + let format = null; + let edition = null; + + $('#detailBullets_feature_div li').each((i, el) => { + const label = $(el).find('span.a-text-bold').text().trim().toLowerCase(); + const value = $(el).find('span').last().text().trim(); + + if (label.includes('pagine') || label.includes('pagine stampate')) { + pages = value; + } else if (label.includes('formato')) { + format = value; + } else if (label.includes('edizione')) { + edition = value; + } + }); + + // fallback su #productDetailsTable (altro possibile layout) + if (!pages || !format || !edition) { + $('#productDetailsTable .content tr').each((i, el) => { + const label = $(el).find('th').text().trim().toLowerCase(); + const value = $(el).find('td').text().trim(); + + if (!pages && (label.includes('pagine') || label.includes('pagine stampate'))) { + pages = value; + } else if (!format && label.includes('formato')) { + format = value; + } else if (!edition && label.includes('edizione')) { + edition = value; + } + }); + } + + return { title, subtitle, pages, format, edition }; + } + + async scrapeISBN(isbn) { + const html = await this.fetchPage(isbn); + if (!html) return null; + + const data = this.extractData(html); + return data; + } + + async scrapeMultiple(isbnList) { + const results = []; + for (const isbn of isbnList) { + console.log(`Scraping ISBN: ${isbn}`); + const data = await this.scrapeISBN(isbn); + results.push({ isbn, ...data }); + // Per evitare blocchi, metti una pausa (es. 2 secondi) + await new Promise((r) => setTimeout(r, 2000)); + } + return results; + } +} + +export async function ScraperDataAmazon(idapp, options) { + const scraper = new AmazonBookScraper(); + const isbn = options.isbn; + + try { + const data = await scraper.scrapeISBN(isbn); + console.log(data); + return data; + } catch (e) { + console.error(e); + return res.status(400).send({ code: server_constants.RIS_CODE_ERR, msg: '' }); + } +} + +export async function ScraperMultipleDataAmazon(idapp, options) { + const scraper = new AmazonBookScraper(); + const isbnList = ['8850224248']; // metti i tuoi ISBN qui + + try { + const books = await scraper.scrapeMultiple(isbnList); + console.log(books); + } catch (e) { + console.error(e); + return res.status(400).send({ code: server_constants.RIS_CODE_ERR, msg: '' }); + } +} + +export default AmazonBookScraper; diff --git a/src/server/router/admin_router.js b/src/server/router/admin_router.js index 1776df2..f62d813 100755 --- a/src/server/router/admin_router.js +++ b/src/server/router/admin_router.js @@ -27,6 +27,8 @@ const Gasordine = require('../models/gasordine'); const { User } = require('../models/user'); +const AmazonBookScraper = require('../modules/Scraping'); + const { Catalog } = require('../models/catalog'); const { RaccoltaCataloghi } = require('../models/raccoltacataloghi'); @@ -547,7 +549,8 @@ router.post('/join-pdf', authenticate, async (req, res) => { ); if (options.stampa) { - outputFileStampa = path.join(full_dir_out, path.basename(tools.removeFileExtension(outputFile))) + '-stampabile.pdf'; + outputFileStampa = + path.join(full_dir_out, path.basename(tools.removeFileExtension(outputFile))) + '-stampabile.pdf'; // Creazione file per STAMPA const ris_stampa = await JoinPDFCatalogs(cataloghi, options, outputFileStampa, true); if (ris_stampa) { @@ -2359,6 +2362,21 @@ router.post('/cloudflare', authenticate, async (req, res) => { } }); +router.post('/scraper', authenticate, async (req, res) => { + const scraper = new AmazonBookScraper(); + const isbn = req.data.options.isbn; + + try { + const data = await scraper.scrapeISBN(isbn); + console.log(data); + + return res.send(data); + } catch (e) { + console.error(e); + return res.status(400).send({ code: server_constants.RIS_CODE_ERR, msg: '' }); + } +}); + router.post('/miab', authenticate, async (req, res) => { try { idapp = req.body.idapp; diff --git a/src/server/server.js b/src/server/server.js index e3c5e2d..92378bd 100755 --- a/src/server/server.js +++ b/src/server/server.js @@ -534,14 +534,14 @@ connectToDatabase(connectionUrl, options) try { // console.log('checkdir', folderprof); - if (!tools.existsSync(folderprof)) { + if (!tools.isFileExists(folderprof)) { console.log('*** Creadir', folderprof); await fs.mkdirSync(folderprof); } folderprof = dir + 'profile/' + myuser.username + '/' + table; // console.log('checkdir', folderprof); - if (!tools.existsSync(folderprof)) { + if (!tools.isFileExists(folderprof)) { console.log('creadir', folderprof); await fs.mkdirSync(folderprof); } @@ -691,10 +691,10 @@ connectToDatabase(connectionUrl, options) } // Verifica esistenza file - if (!tools.existsSync(keyPath)) { + if (!tools.isFileExists(keyPath)) { throw new Error(`Chiave privata non trovata: ${keyPath}`); } - if (!tools.existsSync(certPath)) { + if (!tools.isFileExists(certPath)) { throw new Error(`Certificato non trovato: ${certPath}`); } @@ -944,7 +944,7 @@ connectToDatabase(connectionUrl, options) if (scriptProcess) scriptProcess.kill(); const scriptPath = path.join(__dirname, '..', '..', parsed.scriptName); - if (!tools.existsSync(scriptPath)) { + if (!tools.isFileExists(scriptPath)) { return ws.send(JSON.stringify({ type: 'error', data: 'Script non trovato o non autorizzato' })); } diff --git a/src/server/tools/general.js b/src/server/tools/general.js index 9fd78f2..da1aa69 100755 --- a/src/server/tools/general.js +++ b/src/server/tools/general.js @@ -445,7 +445,7 @@ class ImageDownloader { for (let attempt = 1; attempt <= maxRetries; attempt++) { try { // Verifica se il filepath esiste già - if (await this.existsSync(filepath)) { + if (await this.isFileExists(filepath)) { fs.unlinkSync(filepath); } @@ -533,7 +533,7 @@ class ImageDownloader { console.error(`❌ Errore nel tentativo ${attempt}/${maxRetries}:`, error.message); // Pulizia del file in caso di errore - if (await this.existsSync(filepath)) { + if (await this.isFileExists(filepath)) { fs.unlinkSync(filepath); } @@ -554,15 +554,6 @@ class ImageDownloader { } } - async existsSync(tempFolder) { - try { - await fs.access(tempFolder); - // La directory esiste - } catch { - // La directory NON esiste - } - } - // Funzione per estrarre il nome del file dall'URL extractFileNameFromUrl(url) { const match = url.match(/\/([^/?#]+)(?:[?#]|$)/); @@ -736,15 +727,6 @@ module.exports = { console.log(args); }, - existsSync: async function (tempFolder) { - try { - await fs.access(tempFolder); - // La directory esiste - } catch { - // La directory NON esiste - } - }, - mylogserr: function (...args) { console.error(args); }, @@ -4204,7 +4186,7 @@ module.exports = { async mkdirpath(dirPath) { try { - if (!await this.existsSync(dirPath)) { + if (!await this.isFileExists(dirPath)) { fs.mkdirSync(dirPath, { recursive: true }); } } catch (e) { @@ -4262,8 +4244,11 @@ module.exports = { async isFileExists(filename) { try { - return await this.existsSync(filename); + let fileExists = await fs.promises.stat(filename).then(() => true).catch(() => false); + // console.log(filename, 'esiste', fileExists) + return fileExists; } catch (e) { + // console.log(filename, 'esiste', 'FALSE') return false } }, @@ -5976,7 +5961,7 @@ module.exports = { img = dir + img; /*if (checkifExist) { - if (!this.existsSync(img)) { + if (!this.isFileExists(img)) { return ''; } }*/ @@ -6150,9 +6135,9 @@ module.exports = { server_constants.DIR_UPLOAD + '/products/' + productInfo.image_link.split('/').pop(); const savePath = path.resolve(__dirname, img); // Sostituisci con il percorso dove salvare l'immagine - let scaricaimg = !productInfo.imagefile || !await this.existsSync(savePath); + let scaricaimg = !productInfo.imagefile || !await this.isFileExists(savePath); - if (!productInfo.imagefile && await this.existsSync(savePath)) { + if (!productInfo.imagefile && await this.isFileExists(savePath)) { // esiste il file, ma sul DB non è corretto const stats = fs.statSync(savePath); // Ottieni informazioni sul file @@ -6166,7 +6151,7 @@ module.exports = { } - if (productInfo.imagefile && await this.existsSync(savePath)) { + if (productInfo.imagefile && await this.isFileExists(savePath)) { // esiste il file, ma sul DB non è corretto const stats = fs.statSync(savePath); // Ottieni informazioni sul file @@ -6209,7 +6194,7 @@ module.exports = { const filecompleto = path.resolve(__dirname, img); // Sostituisci con il percorso dove salvare l'immagine // Se non esiste lo scarico ! - fileesistente = await this.existsSync(filecompleto); + fileesistente = await this.isFileExists(filecompleto); } if (!vecchiomodo && (!productInfo.image_link || !fileesistente)) { diff --git a/src/server/version.txt b/src/server/version.txt index 6e10d36..c3df286 100644 --- a/src/server/version.txt +++ b/src/server/version.txt @@ -1 +1 @@ -1.2.46 \ No newline at end of file +1.2.47 \ No newline at end of file