add RedTube video/related scraping

This commit is contained in:
La macchina desiderante 2024-05-26 11:12:52 +02:00
parent 401351289b
commit dd8e9838a3
2 changed files with 111 additions and 0 deletions

View File

@ -2,6 +2,10 @@ import { getCookie } from "@/utils/cookies/read"
import { Cookies, RedTubeOrientations } from "@/meta/settings" import { Cookies, RedTubeOrientations } from "@/meta/settings"
import { REDTUBE_BASE_SEARCH, REDTUBE_BASE_GAY_SEARCH, REDTUBE_BASE_URL_GAY, REDTUBE_BASE_URL, REDTUBE_BASE_URL_TRANS } from "@/constants/urls" import { REDTUBE_BASE_SEARCH, REDTUBE_BASE_GAY_SEARCH, REDTUBE_BASE_URL_GAY, REDTUBE_BASE_URL, REDTUBE_BASE_URL_TRANS } from "@/constants/urls"
import { getHeadersWithCookie } from "../common/headers"
import { MindGeekVideoSrcElem, VideoSourceItem } from "@/meta/data"
import axios from "axios"
export const getRedTubeQueryUrl = async (query?: string): Promise<string> => { export const getRedTubeQueryUrl = async (query?: string): Promise<string> => {
const orientation = await getCookie(Cookies.orientation) const orientation = await getCookie(Cookies.orientation)
@ -35,4 +39,33 @@ export const getRedTubeResultsWrapperId = async (query?: string): Promise<string
} }
return "#most_recent_videos li" return "#most_recent_videos li"
}
export const getRedTubeMediaUrlList = async (url: string, sessionCookie: string): Promise<VideoSourceItem[]> => {
const headersWithCookie = getHeadersWithCookie(REDTUBE_BASE_URL, sessionCookie)
let videos: VideoSourceItem[] = []
await axios.get(url, headersWithCookie)
.then(async response => {
if (response.data) {
videos = await response.data.map((elem: MindGeekVideoSrcElem) => ({
src: elem?.videoUrl,
type: 'video/mp4',
size: elem?.quality
})) as VideoSourceItem[]
return videos
} else {
return []
}
})
.catch(error => console.log(error))
return videos
} }

View File

@ -1,4 +1,15 @@
import { REDTUBE_BASE_URL } from "@/constants/urls";
import { FetchParams, GalleryData, VideoData, VideoSourceItem } from "@/meta/data"; import { FetchParams, GalleryData, VideoData, VideoSourceItem } from "@/meta/data";
import { getHeaders } from "../common/headers";
import { getDataFromRedis, storeDataIntoRedis } from "@/redis/client";
import { DEFAULT_REDTUBE_GALLERY_EXPIRY, DEFAULT_REDTUBE_VIDEO_EXPIRY, DEFAULT_RELATED_VIDEO_KEY_PATH } from "@/constants/redis";
import * as cheerio from "cheerio";
import axios, { AxiosError } from "axios";
import { createSessionCookie, findGetMediaUrlInTagblock } from "../common/mindgeek";
import { Platforms } from "@/meta/settings";
import { getRedTubeMediaUrlList } from "./url";
export const fetchRedTubeVideoData = async (videoId: string, params?: FetchParams): Promise<[VideoData, GalleryData[]]> => { export const fetchRedTubeVideoData = async (videoId: string, params?: FetchParams): Promise<[VideoData, GalleryData[]]> => {
@ -9,5 +20,72 @@ export const fetchRedTubeVideoData = async (videoId: string, params?: FetchParam
let relatedData: GalleryData[] = []; let relatedData: GalleryData[] = [];
let mediaUrl, sessionCookie, convertedData: VideoSourceItem[]
let reqHeaders = getHeaders(REDTUBE_BASE_URL)
const queryUrl = `${REDTUBE_BASE_URL}/${videoId.replace(/\//g, '')}`
const cachedVideoData = await getDataFromRedis(queryUrl)
const cachedRelatedData = await getDataFromRedis(queryUrl + DEFAULT_RELATED_VIDEO_KEY_PATH)
if (cachedVideoData) {
return [cachedVideoData as VideoData, cachedRelatedData as GalleryData[] ?? []]
}
await axios.get(queryUrl, reqHeaders)
.then(async response => {
sessionCookie = response?.headers["set-cookie"] ? createSessionCookie(response?.headers["set-cookie"]) : '';
const html = response.data;
const $ = cheerio.load(html);
const scriptTags = $("script");
scriptTags.map((idx, elem) => {
const getMediaUrl = findGetMediaUrlInTagblock($(elem).toString().replace(/\\/g, ''), 'media/mp4') ?? null
if (getMediaUrl) {
mediaUrl = `${REDTUBE_BASE_URL}${getMediaUrl}`
}
})
const wrapperId = "#related_videos_center li.tm_video_block"
const thumbs = $(wrapperId);
thumbs.map((key, thumb) => {
const videoUrl = $(thumb).find("a.tm_video_link").attr("href")?.split('/')[1];
const imgUrl = $(thumb).find("img.js_thumbImageTag").attr("data-src")
const text = $(thumb).find("a.tm_video_title").attr("title");
videoUrl && imgUrl && text && relatedData.push({
videoUrl,
imgUrl,
text,
platform: Platforms.redtube
})
})
}).catch((error: AxiosError) => {
// error handling goes here
});
if (sessionCookie && mediaUrl) {
convertedData = await getRedTubeMediaUrlList(mediaUrl, sessionCookie)
data.srcSet = convertedData.reverse()
await storeDataIntoRedis(queryUrl, data, DEFAULT_REDTUBE_VIDEO_EXPIRY);
}
if (relatedData.length > 0) {
await storeDataIntoRedis(queryUrl + DEFAULT_RELATED_VIDEO_KEY_PATH, relatedData, DEFAULT_REDTUBE_GALLERY_EXPIRY);
}
return [ data, relatedData ] return [ data, relatedData ]
} }