add xhamster video/related scrape

This commit is contained in:
La macchina desiderante 2024-05-28 21:40:32 +02:00
parent 1905707b92
commit 9ede9a6b9b
2 changed files with 83 additions and 2 deletions

View File

@ -16,6 +16,6 @@ export const DEFAULT_REDTUBE_GALLERY_EXPIRY = { EX: EX_HOURLY };
export const DEFAULT_REDTUBE_VIDEO_EXPIRY = { EX: EX_HOURLY }; export const DEFAULT_REDTUBE_VIDEO_EXPIRY = { EX: EX_HOURLY };
export const DEFAULT_XHAMSTER_GALLERY_EXPIRY = { EX: EX_HOURLY }; export const DEFAULT_XHAMSTER_GALLERY_EXPIRY = { EX: EX_HOURLY };
export const DEFAULT_XHAMSTER_VIDEO_EXPIRY = { EX: EX_MIN }; export const DEFAULT_XHAMSTER_VIDEO_EXPIRY = { EX: EX_HOURLY };
export const DEFAULT_RELATED_VIDEO_KEY_PATH = '/related/' export const DEFAULT_RELATED_VIDEO_KEY_PATH = '/related/'

View File

@ -1,13 +1,94 @@
import { XHAMSTER_BASE_URL, XHAMSTER_BASE_URL_VIDEOS } from "@/constants/urls";
import { FetchParams, GalleryData, VideoData, VideoSourceItem } from "@/meta/data"; import { FetchParams, GalleryData, VideoData, VideoSourceItem } from "@/meta/data";
import { getHeaders } from "../common/headers";
import { getDataFromRedis, storeDataIntoRedis } from "@/redis/client";
import { DEFAULT_RELATED_VIDEO_KEY_PATH, DEFAULT_XHAMSTER_GALLERY_EXPIRY, DEFAULT_XHAMSTER_VIDEO_EXPIRY } from "@/constants/redis";
import * as cheerio from "cheerio";
import axios, { AxiosError } from "axios";
import { findGetMediaUrlInTagblock } from "../common/mindgeek";
import { Platforms } from "@/meta/settings";
import { encodeUrl } from "@/utils/string";
import { DEFAULT_VIDEO_STREAM_ROUTE_PREFIX } from "@/constants/stream";
export const fetchXHamsterVideoData = async (videoId: string, params?: FetchParams): Promise<[VideoData, GalleryData[]]> => { export const fetchXHamsterVideoData = async (videoId: string, params?: FetchParams): Promise<[VideoData, GalleryData[]]> => {
let data: VideoData = { let data: VideoData = {
hlsUrl: '',
srcSet: [] srcSet: []
} }
let relatedData: GalleryData[] = []; let relatedData: GalleryData[] = [];
let reqHeaders = getHeaders(XHAMSTER_BASE_URL);
const queryUrl = `${XHAMSTER_BASE_URL_VIDEOS}/${videoId.replace(/\//g, '')}`
const cachedVideoData = await getDataFromRedis(queryUrl)
const cachedRelatedData = await getDataFromRedis(queryUrl + DEFAULT_RELATED_VIDEO_KEY_PATH)
if (cachedVideoData) {
return [cachedVideoData as VideoData, cachedRelatedData as GalleryData[] ?? []]
}
await axios.get(queryUrl, reqHeaders)
.then(async response => {
const html = response.data;
const $ = cheerio.load(html);
const scriptTags = $("script");
scriptTags.map((idx, elem) => {
const hlsUrl = findGetMediaUrlInTagblock($(elem).toString().replace(/\\/g, ''), 'media=hls4') ?? null
if (hlsUrl) {
['144', '240', '360', '480', '720', '1080'].map((res: string) => {
let resUrl = findGetMediaUrlInTagblock($(elem).toString().replace(/\\/g, ''), `${res}p.h264.mp4`) ?? null
if (resUrl) {
data.srcSet?.push({
src: `${DEFAULT_VIDEO_STREAM_ROUTE_PREFIX}/${Platforms.xhamster}/${encodeUrl(resUrl)}`,
type: 'video/mp4',
size: res
})
}
});
}
})
const wrapperId = '.thumb-list .thumb-list__item'
const thumbs = $(wrapperId);
thumbs.map((key, thumb) => {
const videoUrl = $(thumb).find("a.video-thumb__image-container").attr("href")?.replace(XHAMSTER_BASE_URL_VIDEOS, '')
const imgUrl = $(thumb).find("a.video-thumb__image-container img").attr("src")
const text = $(thumb).find("a.video-thumb-info__name").attr("title")
videoUrl && imgUrl && text && relatedData.push({
videoUrl,
imgUrl,
text,
platform: Platforms.xhamster
})
})
}).catch((error: AxiosError) => {
// error handling goes here
});
if (data.srcSet && data.srcSet?.length > 0) {
await storeDataIntoRedis(queryUrl, data, DEFAULT_XHAMSTER_VIDEO_EXPIRY);
}
if (relatedData.length > 0) {
await storeDataIntoRedis(queryUrl + DEFAULT_RELATED_VIDEO_KEY_PATH, relatedData, DEFAULT_XHAMSTER_GALLERY_EXPIRY);
}
return [ data, relatedData ] return [ data, relatedData ]
} }