const axios = require('axios'); const cheerio = require('cheerio'); const fs = require('fs/promises'); const https = require('https'); const tls = require('tls'); class EnhancedProductScraper { constructor() { this.activeRequests = 0; this.maxConcurrent = 150; this.results = new Map(); this.totalProducts = 0; this.completedProducts = 0; this.currentUserAgent = this.generateRandomChromeVersion().userAgent; this.currentLanguage = this.generateRandomLanguage(); this.BATCH_SIZE = 500; this.API_URL = 'http://164.132.203.174:3004/products/prices'; this.pendingPrices = []; this.ALERTS_API_URL = 'http://164.132.203.174:3004/alerts/pending'; this.UPDATE_ALERT_STATUS_URL = 'http://164.132.203.174:3004/alerts'; this.checkAlertsInterval = null; this.isProcessingAlerts = false; this.NOTIFICATIONS_API_URL = 'http://164.132.203.174:3004/notifications/send'; this.processedAsins = new Set(); this.inProgressAsins = new Set(); this.shouldTerminate = false; // Configuration TLS personnalisée avec randomisation this.tlsConfig = this.generateRandomTLSConfig(); // Agent HTTPS personnalisé avec config TLS this.httpsAgent = new https.Agent({ ...this.tlsConfig, keepAlive: true, timeout: 60000, }); // Ajout de la configuration des empreintes de navigateur this.browserFingerprint = this.generateBrowserFingerprint(); } generateRandomChromeVersion() { const osVersions = [ { os: 'Windows NT 10.0', name: 'Windows 10', platformVersion: '10.0.0' }, { os: 'Windows NT 10.0', name: 'Windows 11', platformVersion: '10.0.0' }, { os: 'Windows NT 11.0', name: 'Windows 11', platformVersion: '11.0.0' } ]; const architectures = [ 'Win64; x64', 'Win64; ARM64' ]; const browsers = [ { name: 'Chrome', baseUA: 'Mozilla/5.0 ({osInfo}) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/{version} Safari/537.36', majorVersion: { min: 120, max: 122 } } ]; const selectedOS = osVersions[Math.floor(Math.random() * osVersions.length)]; const selectedArch = architectures[Math.floor(Math.random() * architectures.length)]; const osInfo = `${selectedOS.os}; ${selectedArch}`; const selectedBrowser = browsers[0]; const majorVersion = Math.floor(Math.random() * (selectedBrowser.majorVersion.max - selectedBrowser.majorVersion.min + 1)) + selectedBrowser.majorVersion.min; const minorVersion = Math.floor(Math.random() * 100); const buildVersion = Math.floor(Math.random() * 200); const patchVersion = Math.floor(Math.random() * 100); const version = `${majorVersion}.${minorVersion}.${buildVersion}.${patchVersion}`; return { userAgent: selectedBrowser.baseUA .replace('{osInfo}', osInfo) .replace(/\{version\}/g, version), chromeVersion: majorVersion, platformVersion: selectedOS.platformVersion, fullVersion: version }; } generateRandomLanguage() { const mainLangs = ['fr-FR', 'fr-BE', 'fr-CA', 'fr-CH']; const secondaryLangs = ['en-US', 'en-GB', 'de-DE', 'es-ES', 'it-IT']; const selectedMainLang = mainLangs[Math.floor(Math.random() * mainLangs.length)]; const [mainLang] = selectedMainLang.split('-'); // Sélection aléatoire de 1 à 3 langues secondaires const numSecondaryLangs = Math.floor(Math.random() * 3) + 1; const shuffledSecondaryLangs = [...secondaryLangs].sort(() => Math.random() - 0.5); const selectedSecondaryLangs = shuffledSecondaryLangs.slice(0, numSecondaryLangs); let acceptLanguage = `${selectedMainLang},${mainLang};q=0.9`; selectedSecondaryLangs.forEach((lang, index) => { const qValue = (0.8 - (index * 0.1)).toFixed(1); acceptLanguage += `,${lang};q=${qValue}`; }); return acceptLanguage; } generateBrowserFingerprint() { const screenConfigs = [ { width: 1920, height: 1080 }, { width: 2560, height: 1440 }, { width: 1680, height: 1050 }, { width: 3440, height: 1440 } ]; const webglConfigs = [ { vendor: "Google Inc.", renderer: "ANGLE (Intel, Intel(R) UHD Graphics Direct3D11 vs_5_0)", glVersion: "WebGL 2.0" }, { vendor: "Google Inc.", renderer: "ANGLE (NVIDIA, NVIDIA GeForce RTX 3060 Direct3D11 vs_5_0)", glVersion: "WebGL 2.0" }, { vendor: "Google Inc.", renderer: "ANGLE (AMD, AMD Radeon RX 6700 XT Direct3D11 vs_5_0)", glVersion: "WebGL 2.0" } ]; const browserInfo = this.generateRandomChromeVersion(); const screen = screenConfigs[Math.floor(Math.random() * screenConfigs.length)]; const webgl = webglConfigs[Math.floor(Math.random() * webglConfigs.length)]; const hardwareConcurrencyOptions = [4, 6, 8, 12, 16]; const deviceMemoryOptions = [4, 8, 16]; const colorDepthOptions = [24, 30, 32]; const pixelRatioOptions = [1, 1.25, 1.5, 2]; return { screen, webgl, chromeVersion: browserInfo.chromeVersion, platformVersion: browserInfo.platformVersion, fullVersion: browserInfo.fullVersion, colorDepth: colorDepthOptions[Math.floor(Math.random() * colorDepthOptions.length)], pixelRatio: pixelRatioOptions[Math.floor(Math.random() * pixelRatioOptions.length)], hardwareConcurrency: hardwareConcurrencyOptions[Math.floor(Math.random() * hardwareConcurrencyOptions.length)], deviceMemory: deviceMemoryOptions[Math.floor(Math.random() * deviceMemoryOptions.length)], userAgent: browserInfo.userAgent }; } generateRandomTLSConfig() { const modernCiphers = [ 'TLS_AES_128_GCM_SHA256', 'TLS_AES_256_GCM_SHA384', 'TLS_CHACHA20_POLY1305_SHA256', 'ECDHE-ECDSA-AES128-GCM-SHA256', 'ECDHE-RSA-AES128-GCM-SHA256', 'ECDHE-ECDSA-AES256-GCM-SHA384', 'ECDHE-RSA-AES256-GCM-SHA384', 'ECDHE-ECDSA-CHACHA20-POLY1305', 'ECDHE-RSA-CHACHA20-POLY1305' ]; const curves = [ 'x25519', 'prime256v1', 'secp384r1', 'secp521r1' ]; // Sélection aléatoire de 4 à 6 ciphers const numCiphers = Math.floor(Math.random() * 3) + 4; const selectedCiphers = [...modernCiphers] .sort(() => Math.random() - 0.5) .slice(0, numCiphers); // Sélection aléatoire de 2 à 3 courbes const numCurves = Math.floor(Math.random() * 2) + 2; const selectedCurves = [...curves] .sort(() => Math.random() - 0.5) .slice(0, numCurves); const sessionTimeouts = [3600, 7200, 10800]; // 1h, 2h, 3h en secondes return { ciphers: selectedCiphers.join(':'), secureOptions: tls.SSL_OP_NO_SSLv2 | tls.SSL_OP_NO_SSLv3 | tls.SSL_OP_NO_TLSv1 | tls.SSL_OP_NO_TLSv1_1, minVersion: 'TLSv1.2', maxVersion: 'TLSv1.3', ecdhCurve: selectedCurves.join(':'), honorCipherOrder: Math.random() < 0.7, // 70% de chance d'être true sessionTimeout: sessionTimeouts[Math.floor(Math.random() * sessionTimeouts.length)], rejectUnauthorized: true, sigalgs: [ 'ecdsa_secp256r1_sha256', 'rsa_pss_rsae_sha256', 'rsa_pkcs1_sha256', 'ecdsa_secp384r1_sha384', 'rsa_pss_rsae_sha384', 'rsa_pkcs1_sha384' ].join(':') }; } updateProgress() { this.completedProducts++; const remaining = this.totalProducts - this.completedProducts; const percentComplete = ((this.completedProducts / this.totalProducts) * 100).toFixed(2); console.log(`Completed: ${this.completedProducts}/${this.totalProducts} (${percentComplete}%) - Remaining: ${remaining}`); } async scrapeProduct(asin) { if (this.processedAsins.has(asin)) { console.log(`ASIN ${asin} déjà traité, ignoré`); return true; } if (this.inProgressAsins.has(asin)) { console.log(`ASIN ${asin} en cours de traitement, ignoré`); return true; } this.inProgressAsins.add(asin); try { const response = await axios.get('https://www.amazon.fr/gp/product/ajax/ref=dp_aod_NEW_mbc', { params: { asin: asin, experienceId: 'aodAjaxMain', pc: 'dp', }, httpsAgent: this.httpsAgent, // Utiliser l'agent HTTPS personnalisé headers: { "upgrade-insecure-requests": "1", "user-agent": this.currentUserAgent, "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8", "accept-encoding": "gzip, deflate, br", "accept-language": this.currentLanguage, "sec-ch-ua": `"Chrome";v="${this.browserFingerprint.chromeVersion}", "Not_A Brand";v="8"`, "sec-ch-ua-platform-version": this.browserFingerprint.platformVersion, "sec-ch-ua-full-version-list": `"Chrome";v="${this.browserFingerprint.chromeVersion}.0.0.0"`, "sec-ch-device-memory": `${this.browserFingerprint.deviceMemory}`, "sec-ch-viewport-width": this.browserFingerprint.screen.width.toString(), "sec-ch-viewport-height": this.browserFingerprint.screen.height.toString(), "sec-ch-ua-model": "", "sec-ch-prefers-color-scheme": "light", "device-pixel-ratio": this.browserFingerprint.pixelRatio.toString(), "device-memory": `${this.browserFingerprint.deviceMemory}`, "hardware-concurrency": this.browserFingerprint.hardwareConcurrency.toString(), "color-depth": this.browserFingerprint.colorDepth.toString() }, decompress: true, maxRedirects: 5, timeout: 30000, validateStatus: (status) => status }); const isValidResponse = response.headers['content-type']?.includes('text/html;charset=UTF-8') && response.headers['set-cookie']?.some(cookie => cookie.includes('session-id')); if (!isValidResponse) { console.log(`Réponse invalide pour l'ASIN ${asin} - Arrêt programmé`); this.shouldTerminate = true; return false; } const $ = cheerio.load(response.data); const offers = []; let bestOffer = null; // Extraire l'URL de l'image const imageUrl = $('#pinned-image-id img#aod-asin-image-id').attr('src') || ''; // Modification de l'extraction du titre const pageTitle = $('#aod-asin-title-text').text().trim() || ''; $('div#aod-offer, div#aod-pinned-offer').each((index, element) => { // Extraction de l'ASIN de l'offre depuis data-aod-atc-action let offerAsin = ''; const atcSpan = $(element).find('span[data-action="aod-atc-action"]'); if (atcSpan.length > 0) { try { const atcData = JSON.parse(atcSpan.attr('data-aod-atc-action')); offerAsin = atcData.asin || ''; } catch (e) { console.log(`Erreur lors de l'extraction de l'ASIN: ${e.message}`); } } // Si l'ASIN de l'offre ne correspond pas à l'ASIN de l'URL, ignorer cette offre if (offerAsin && offerAsin !== asin) { console.log(`ASIN de l'offre (${offerAsin}) différent de l'ASIN recherché (${asin}), offre ignorée`); return; } const priceElement = $(element).find('.a-price.aok-align-center.centralizedApexPricePriceToPayMargin, .a-price.aod-price-strength-price'); const priceWhole = priceElement.find('.a-price-whole').text().trim(); const priceFraction = priceElement.find('.a-price-fraction').text().trim(); let price = priceWhole && priceFraction ? Number(parseFloat(`${priceWhole.replace(/[^\d]/g, '')}.${priceFraction}`).toFixed(2)) : 0.00; // Vérifier et appliquer le coupon si disponible const promotionElement = $(element).find('[data-csa-c-owner="PromotionsDiscovery"]'); if (promotionElement.length > 0) { const couponLabel = promotionElement.find('label[id^="couponText"]').text().trim(); const priceMatch = couponLabel.match(/Cliquer pour payer\s+(\d+[,.]\d+)/); if (priceMatch) { price = parseFloat(priceMatch[1].replace(',', '.')); } } let seller = $(element).find('#aod-offer-soldBy a').text().trim() || 'Vendeur non trouvé'; const shipper = $(element).find('#aod-offer-shipsFrom .a-color-base, .aod-ship-from-text').text().trim() || 'Expéditeur non trouvé'; if(seller === 'Vendeur non trouvé' && shipper.toLowerCase().includes('amazon')) { seller = 'Amazon'; } // Ignorer les offres invalides if (price === 0 && seller === 'Vendeur non trouvé' && shipper === 'Expéditeur non trouvé') { return; } // Extraction du coût de livraison const deliveryElement = $(element).find('.aod-delivery-promise-column span[data-csa-c-delivery-price]'); let shippingCost = 0; if (deliveryElement.length > 0) { const deliveryPrice = deliveryElement.attr('data-csa-c-delivery-price'); if (deliveryPrice && deliveryPrice.toLowerCase() !== 'gratuite') { const match = deliveryPrice.match(/(?:à\s+)?(\d+(?:[.,]\d+)?)\s*(?:€| €|€)/); if (match) { shippingCost = Number(match[1].replace(',', '.')); } else { console.log(`Format de prix de livraison non reconnu: ${deliveryPrice}`); } } } const condition = $(element) .find('#aod-offer-heading') .text() .replace(/\s+/g, ' ') .trim() || 'Condition non trouvée'; let offerId = ''; if (atcSpan.length > 0) { try { const atcData = JSON.parse(atcSpan.attr('data-aod-atc-action')); offerId = atcData.oid || ''; // Décodage de l'offerId s'il est encodé en URL if (offerId) { offerId = decodeURIComponent(offerId); } } catch (e) { console.log(`Erreur lors de l'extraction de l'offerId: ${e.message}`); } } if (!offerId) { offerId = 'XXXX'; } const totalPrice = Number((price + shippingCost).toFixed(2)); const currentOffer = { price, shippingCost, totalPrice, seller, shipper, condition, offerId, asin: offerAsin }; offers.push(currentOffer); if (!bestOffer || currentOffer.totalPrice < bestOffer.totalPrice) { bestOffer = currentOffer; } // console.log(`Offre ${index + 1}: Prix: ${price} | Frais de port: ${shippingCost} | Prix total: ${totalPrice} | Vendeur: ${seller} | Expéditeur: ${shipper} | Condition: ${condition} | OfferID: ${offerId}`); }); const ratings = $('h5 + div').text().trim(); const truncatedTitle = pageTitle.length > 20 ? pageTitle.substring(0, 20) + '...' : pageTitle; console.log(`\nProduit [${asin}]: ${truncatedTitle}`); console.log(`Nombre d'offres trouvées: ${offers.length}`); console.log(`Meilleure offre: Prix: ${bestOffer?.price} | Vendeur: ${bestOffer?.seller} | Expéditeur: ${bestOffer?.shipper}\n`); // Après la boucle, vérifier que la meilleure offre a le bon ASIN if (bestOffer && bestOffer.asin !== asin) { console.log(`La meilleure offre a un ASIN différent (${bestOffer.asin}) de celui recherché (${asin}), résultat ignoré`); this.results.set(asin, { title: pageTitle, ratings, offers: [], bestOffer: null, imageUrl, success: true }); } else { this.results.set(asin, { title: pageTitle, ratings, offers, bestOffer, imageUrl, success: true }); if (bestOffer) { this.pendingPrices.push({ asin, bestOffer, success: true }); } } this.updateProgress(); this.processedAsins.add(asin); this.inProgressAsins.delete(asin); return true; } catch (error) { // Gestion améliorée des erreurs TLS if (error.code && error.code.includes('TLS')) { console.error(`Erreur TLS pour l'ASIN ${asin}:`, error.code); // Attendre avant de réessayer en cas d'erreur TLS await new Promise(resolve => setTimeout(resolve, 5000)); return false; } console.log(`Erreur pour l'ASIN ${asin}: ${error.message}`); this.processedAsins.add(asin); this.inProgressAsins.delete(asin); return false; } } async processNextBatch(asins) { const batch = []; while (this.activeRequests < this.maxConcurrent && asins.length > 0) { const asin = asins.shift(); this.activeRequests++; const promise = this.scrapeProduct(asin) .finally(() => { this.activeRequests--; if (asins.length > 0) { this.processNextBatch(asins); } }); batch.push(promise); } return Promise.all(batch); } async fetchProductsFromAPI() { try { const response = await axios.get('http://164.132.203.174:3004/products/random'); const uniqueAsins = new Set( response.data .map(product => { const match = product.url.match(/\/dp\/([A-Z0-9]{10})/); return match ? match[1] : null; }) .filter(asin => asin !== null) ); return Array.from(uniqueAsins); } catch (error) { console.error('Erreur lors de la récupération des produits:', error.message); throw error; } } async sendPricesToAPI(products, isFinal = false) { try { const uniqueProducts = new Map(); products .filter(product => product.bestOffer && product.bestOffer.totalPrice) .forEach(product => { const url = `https://www.amazon.fr/dp/${product.asin}`; uniqueProducts.set(url, { url, price: product.bestOffer.totalPrice.toFixed(2), seller: product.bestOffer.seller, shipper: product.bestOffer.shipper, condition: product.bestOffer.condition, offerId: product.bestOffer.offerId }); }); const formattedProducts = Array.from(uniqueProducts.values()); if (formattedProducts.length === 0) { console.log('Aucun produit avec prix valide à envoyer'); return; } console.log(`Envoi de ${formattedProducts.length} prix à l'API${isFinal ? ' (envoi final)' : ''}`); const response = await axios.post(this.API_URL, { products: formattedProducts }); if (response.status === 200) { console.log(`Prix mis à jour avec succès (${formattedProducts.length} produits)`); } else { console.error(`Échec de l'envoi des prix. Code: ${response.status}`); } } catch (error) { console.error('Erreur lors de l\'envoi des prix:', error.message); } } async checkAndSendPendingPrices(isFinal = false) { if (isFinal || this.pendingPrices.length >= this.BATCH_SIZE) { // Prendre seulement les premiers BATCH_SIZE éléments const batchToSend = isFinal ? [...this.pendingPrices] // Si c'est l'envoi final, prendre tout ce qui reste : this.pendingPrices.slice(0, this.BATCH_SIZE); // Sinon, prendre seulement BATCH_SIZE éléments // Envoyer le lot actuel await this.sendPricesToAPI(batchToSend, isFinal); // Retirer les éléments envoyés de pendingPrices this.pendingPrices = isFinal ? [] // Si c'est l'envoi final, vider complètement : this.pendingPrices.slice(this.BATCH_SIZE); // Sinon, garder le reste pour le prochain lot } } async fetchPendingAlerts() { try { const response = await axios.get(this.ALERTS_API_URL); return response.data; } catch (error) { console.error('Erreur lors de la récupération des alertes:', error.message); return []; } } async processAlert(alert) { try { const asinMatch = alert.url.match(/\/dp\/([A-Z0-9]{10})/); const asin = asinMatch[1]; const success = await this.scrapeProduct(asin); const result = this.results.get(asin); if (result && result.bestOffer) { const currentPrice = result.bestOffer.totalPrice; const previousPrice = parseFloat(alert.previous_price); const newPrice = parseFloat(alert.new_price); const priceThreshold = 0.01; const isPriceConfirmed = Math.abs(currentPrice - newPrice) <= (newPrice * priceThreshold); await this.sendNotification({ alert_id: alert.id, url: alert.url, previous_price: alert.previous_price, newPrice: currentPrice.toFixed(2), confirmed: isPriceConfirmed, seller: result.bestOffer.seller, shipper: result.bestOffer.shipper, condition: result.bestOffer.condition, title: result.title || '', imageUrl: result.imageUrl || '', offerId: result.bestOffer.offerId }); } } catch (error) { console.error(`Erreur lors du traitement de l'alerte ${alert.id}:`, error.message); } } async sendNotification(alertData) { try { console.log(`Envoi de la notification pour l'alerte ${alertData.alert_id}...`); const response = await axios.post(this.NOTIFICATIONS_API_URL, alertData); if (response.status === 200) { console.log(`Notification envoyée avec succès pour l'alerte ${alertData.alert_id}`); } else { console.error(`Échec de l'envoi de la notification. Code: ${response.status}`); } } catch (error) { console.error('Erreur lors de l\'envoi de la notification:', error.message); throw error; } } async checkAlerts() { if (this.isProcessingAlerts) return; this.isProcessingAlerts = true; try { const alerts = await this.fetchPendingAlerts(); console.log(`${alerts.length} alertes en attente trouvées`); for (const alert of alerts) { await this.processAlert(alert); } } catch (error) { console.error('Erreur lors du traitement des alertes:', error.message); } finally { this.isProcessingAlerts = false; } } async start() { console.log('Démarrage du processus de scraping, vérification des alertes et des produits surveillés...'); try { const startTime = Date.now(); // Exécuter en parallèle la vérification des alertes et des produits surveillés await Promise.all([ this.checkAlerts() ]); const asins = await this.fetchProductsFromAPI(); this.totalProducts = asins.length; console.log(`${this.totalProducts} ASINs trouvés à traiter`); let asinsCopy = [...asins]; while (asinsCopy.length > 0 && !this.shouldTerminate) { const batch = asinsCopy.splice(0, this.maxConcurrent); await this.processNextBatch(batch); // Attendre que tous les requêtes actives soient terminées while (this.activeRequests > 0) { await new Promise(resolve => setTimeout(resolve, 100)); } } // Envoi final des prix en lots de 500 console.log(`\nEnvoi final des prix accumulés (${this.pendingPrices.length} produits)...`); for (let i = 0; i < this.pendingPrices.length; i += this.BATCH_SIZE) { const batch = this.pendingPrices.slice(i, i + this.BATCH_SIZE); await this.sendPricesToAPI(batch, i + this.BATCH_SIZE >= this.pendingPrices.length); console.log(`Lot ${Math.floor(i/this.BATCH_SIZE) + 1} envoyé (${batch.length} produits)`); } const endTime = Date.now(); const duration = (endTime - startTime) / 1000; console.log('\n=== Scraping terminé ==='); console.log(`Durée: ${duration.toFixed(2)} secondes`); console.log(`Total des produits traités: ${this.results.size}`); console.log(`Scrapes réussis: ${[...this.results.values()].filter(r => r.success).length}`); console.log(`Scrapes échoués: ${[...this.results.values()].filter(r => !r.success).length}`); // Journalisation uniquement des prix envoyés const now = new Date(); const dateStr = now.toLocaleDateString('fr-FR', { day: '2-digit', month: '2-digit', year: 'numeric' }); const timeStr = now.toLocaleTimeString('fr-FR', { hour: '2-digit', minute: '2-digit', second: '2-digit' }); const logEntry = `${dateStr} ${timeStr} - Prix envoyés à l'API: ${this.pendingPrices.length}\n`; await fs.appendFile('price_logs.txt', logEntry) .catch(err => console.error('Erreur lors de l\'écriture du log:', err)); } catch (error) { console.error('Erreur fatale:', error); // Envoi final des prix en cas d'erreur if (this.pendingPrices.length > 0) { console.log(`\nEnvoi final des prix après erreur (${this.pendingPrices.length} produits)...`); for (let i = 0; i < this.pendingPrices.length; i += this.BATCH_SIZE) { const batch = this.pendingPrices.slice(i, i + this.BATCH_SIZE); await this.sendPricesToAPI(batch, i + this.BATCH_SIZE >= this.pendingPrices.length); } // Journalisation même en cas d'erreur const now = new Date(); const dateStr = now.toLocaleDateString('fr-FR', { day: '2-digit', month: '2-digit', year: 'numeric' }); const timeStr = now.toLocaleTimeString('fr-FR', { hour: '2-digit', minute: '2-digit', second: '2-digit' }); const logEntry = `${dateStr} ${timeStr} - Prix envoyés à l'API (après erreur): ${this.pendingPrices.length}\n`; await fs.appendFile('price_logs.txt', logEntry) .catch(err => console.error('Erreur lors de l\'écriture du log:', err)); } } } } // Utilisation const scraper = new EnhancedProductScraper(); scraper.start().catch(console.error);