add RedTube video/related scraping
This commit is contained in:
parent
401351289b
commit
dd8e9838a3
|
@ -2,6 +2,10 @@ import { getCookie } from "@/utils/cookies/read"
|
|||
|
||||
import { Cookies, RedTubeOrientations } from "@/meta/settings"
|
||||
import { REDTUBE_BASE_SEARCH, REDTUBE_BASE_GAY_SEARCH, REDTUBE_BASE_URL_GAY, REDTUBE_BASE_URL, REDTUBE_BASE_URL_TRANS } from "@/constants/urls"
|
||||
import { getHeadersWithCookie } from "../common/headers"
|
||||
import { MindGeekVideoSrcElem, VideoSourceItem } from "@/meta/data"
|
||||
|
||||
import axios from "axios"
|
||||
|
||||
export const getRedTubeQueryUrl = async (query?: string): Promise<string> => {
|
||||
const orientation = await getCookie(Cookies.orientation)
|
||||
|
@ -36,3 +40,32 @@ export const getRedTubeResultsWrapperId = async (query?: string): Promise<string
|
|||
|
||||
return "#most_recent_videos li"
|
||||
}
|
||||
|
||||
export const getRedTubeMediaUrlList = async (url: string, sessionCookie: string): Promise<VideoSourceItem[]> => {
|
||||
|
||||
const headersWithCookie = getHeadersWithCookie(REDTUBE_BASE_URL, sessionCookie)
|
||||
|
||||
let videos: VideoSourceItem[] = []
|
||||
|
||||
await axios.get(url, headersWithCookie)
|
||||
.then(async response => {
|
||||
|
||||
if (response.data) {
|
||||
|
||||
videos = await response.data.map((elem: MindGeekVideoSrcElem) => ({
|
||||
src: elem?.videoUrl,
|
||||
type: 'video/mp4',
|
||||
size: elem?.quality
|
||||
})) as VideoSourceItem[]
|
||||
|
||||
return videos
|
||||
|
||||
} else {
|
||||
return []
|
||||
}
|
||||
|
||||
})
|
||||
.catch(error => console.log(error))
|
||||
|
||||
return videos
|
||||
}
|
|
@ -1,4 +1,15 @@
|
|||
import { REDTUBE_BASE_URL } from "@/constants/urls";
|
||||
import { FetchParams, GalleryData, VideoData, VideoSourceItem } from "@/meta/data";
|
||||
import { getHeaders } from "../common/headers";
|
||||
import { getDataFromRedis, storeDataIntoRedis } from "@/redis/client";
|
||||
import { DEFAULT_REDTUBE_GALLERY_EXPIRY, DEFAULT_REDTUBE_VIDEO_EXPIRY, DEFAULT_RELATED_VIDEO_KEY_PATH } from "@/constants/redis";
|
||||
|
||||
import * as cheerio from "cheerio";
|
||||
|
||||
import axios, { AxiosError } from "axios";
|
||||
import { createSessionCookie, findGetMediaUrlInTagblock } from "../common/mindgeek";
|
||||
import { Platforms } from "@/meta/settings";
|
||||
import { getRedTubeMediaUrlList } from "./url";
|
||||
|
||||
export const fetchRedTubeVideoData = async (videoId: string, params?: FetchParams): Promise<[VideoData, GalleryData[]]> => {
|
||||
|
||||
|
@ -9,5 +20,72 @@ export const fetchRedTubeVideoData = async (videoId: string, params?: FetchParam
|
|||
|
||||
let relatedData: GalleryData[] = [];
|
||||
|
||||
let mediaUrl, sessionCookie, convertedData: VideoSourceItem[]
|
||||
|
||||
let reqHeaders = getHeaders(REDTUBE_BASE_URL)
|
||||
|
||||
const queryUrl = `${REDTUBE_BASE_URL}/${videoId.replace(/\//g, '')}`
|
||||
|
||||
const cachedVideoData = await getDataFromRedis(queryUrl)
|
||||
const cachedRelatedData = await getDataFromRedis(queryUrl + DEFAULT_RELATED_VIDEO_KEY_PATH)
|
||||
|
||||
if (cachedVideoData) {
|
||||
return [cachedVideoData as VideoData, cachedRelatedData as GalleryData[] ?? []]
|
||||
}
|
||||
|
||||
await axios.get(queryUrl, reqHeaders)
|
||||
|
||||
.then(async response => {
|
||||
|
||||
sessionCookie = response?.headers["set-cookie"] ? createSessionCookie(response?.headers["set-cookie"]) : '';
|
||||
|
||||
const html = response.data;
|
||||
|
||||
const $ = cheerio.load(html);
|
||||
|
||||
const scriptTags = $("script");
|
||||
|
||||
scriptTags.map((idx, elem) => {
|
||||
const getMediaUrl = findGetMediaUrlInTagblock($(elem).toString().replace(/\\/g, ''), 'media/mp4') ?? null
|
||||
|
||||
if (getMediaUrl) {
|
||||
mediaUrl = `${REDTUBE_BASE_URL}${getMediaUrl}`
|
||||
}
|
||||
|
||||
})
|
||||
|
||||
const wrapperId = "#related_videos_center li.tm_video_block"
|
||||
|
||||
const thumbs = $(wrapperId);
|
||||
|
||||
thumbs.map((key, thumb) => {
|
||||
|
||||
const videoUrl = $(thumb).find("a.tm_video_link").attr("href")?.split('/')[1];
|
||||
const imgUrl = $(thumb).find("img.js_thumbImageTag").attr("data-src")
|
||||
const text = $(thumb).find("a.tm_video_title").attr("title");
|
||||
|
||||
videoUrl && imgUrl && text && relatedData.push({
|
||||
videoUrl,
|
||||
imgUrl,
|
||||
text,
|
||||
platform: Platforms.redtube
|
||||
})
|
||||
})
|
||||
|
||||
}).catch((error: AxiosError) => {
|
||||
// error handling goes here
|
||||
});
|
||||
|
||||
if (sessionCookie && mediaUrl) {
|
||||
convertedData = await getRedTubeMediaUrlList(mediaUrl, sessionCookie)
|
||||
data.srcSet = convertedData.reverse()
|
||||
|
||||
await storeDataIntoRedis(queryUrl, data, DEFAULT_REDTUBE_VIDEO_EXPIRY);
|
||||
}
|
||||
|
||||
if (relatedData.length > 0) {
|
||||
await storeDataIntoRedis(queryUrl + DEFAULT_RELATED_VIDEO_KEY_PATH, relatedData, DEFAULT_REDTUBE_GALLERY_EXPIRY);
|
||||
}
|
||||
|
||||
return [ data, relatedData ]
|
||||
}
|
Loading…
Reference in New Issue