From c98ad299cc00c3c49ba3f90c6ce815594101e5e3 Mon Sep 17 00:00:00 2001 From: lamacchinadesiderante Date: Sun, 28 Apr 2024 13:17:14 +0200 Subject: [PATCH] add random user-agent and more request headers --- src/utils/scrape/gallery.ts | 9 +++------ src/utils/scrape/headers.ts | 38 +++++++++++++++++++++++++++++++++++++ src/utils/scrape/video.ts | 7 ++----- src/utils/string.ts | 11 ++++++++++- 4 files changed, 53 insertions(+), 12 deletions(-) create mode 100644 src/utils/scrape/headers.ts diff --git a/src/utils/scrape/gallery.ts b/src/utils/scrape/gallery.ts index a53aec7..06038b5 100644 --- a/src/utils/scrape/gallery.ts +++ b/src/utils/scrape/gallery.ts @@ -1,8 +1,9 @@ import { XVIDEOS_BASE_URL } from '@/constants/urls'; -import { GalleryData, VideoData } from '@/meta/data'; +import { GalleryData } from '@/meta/data'; import axios, { AxiosError } from 'axios'; import * as cheerio from "cheerio"; +import { getHeaders } from './headers'; interface FetchParams { baseUrl?: string @@ -13,11 +14,7 @@ export const fetchGalleryData = async (params?: FetchParams): Promise { + + const userAgents: string[] = [ + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11_14) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.5397.215 Safari/537.36', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:124.0) Gecko/20100101 Firefox/124.0', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36 Edg/123.0.2420.81', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36 OPR/109.0.0.0', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 14.4; rv:124.0) Gecko/20100101 Firefox/124.0', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 14_4_1) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.4.1 Safari/605.1.15', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 14_4_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36 OPR/109.0.0.0', + 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36', + 'Mozilla/5.0 (X11; Linux i686; rv:124.0) Gecko/20100101 Firefox/124.0' + ]; + + const rand = Math.floor(Math.random() * userAgents.length); + + return userAgents[rand] +} + +export const getHeaders = (host:string = XVIDEOS_BASE_URL) => { + return { + headers: { + "User-Agent": getRandomUserAgent(), + "Accept-Language": "en-gb, en, en-US, it", + "Accept-Encoding": "gzip, deflate, br", + "Connection": "keep-alive", + "Sec-Fetch-Dest": "document", + "Sec-Fetch-Mode": "navigate", + "Sec-Fetch-Site": "none", + "Host": removeHttpS(host) + }, + } +}; \ No newline at end of file diff --git a/src/utils/scrape/video.ts b/src/utils/scrape/video.ts index e06383d..8313ce1 100644 --- a/src/utils/scrape/video.ts +++ b/src/utils/scrape/video.ts @@ -5,6 +5,7 @@ import axios, { AxiosError } from 'axios'; import * as cheerio from "cheerio"; import { findRelatedVideos, findVideoUrlInsideTagStringByFunctionNameAndExtension } from '../string'; +import { getHeaders } from './headers'; interface FetchParams { baseUrl?: string @@ -19,11 +20,7 @@ export const fetchVideoData = async (videoId: string, params?: FetchParams): Pro let related: GalleryData[] = []; - const reqHeaders = { - headers: { - "User-Agent": 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11_14) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.5397.215 Safari/537.36' - }, - }; + const reqHeaders = getHeaders() const queryUrl = `${(params && params.baseUrl) ?? XVIDEOS_BASE_URL}${videoId}` diff --git a/src/utils/string.ts b/src/utils/string.ts index 5217a5b..4b7bac2 100644 --- a/src/utils/string.ts +++ b/src/utils/string.ts @@ -39,4 +39,13 @@ export const findRelatedVideos = (tagBlock: string): GalleryData[]|null => { })); return parsedArray; -} \ No newline at end of file +} + +export const removeHttpS = (url: string): string => { + if (url.startsWith("http://")) { + return url.slice(7); + } else if (url.startsWith("https://")) { + return url.slice(8); + } + return url; + }; \ No newline at end of file