add YouPorn video scrape
This commit is contained in:
parent
b84ad5127a
commit
7c5ca07ab4
|
@ -4,8 +4,12 @@ const EX_DAILY = 60 * 60 * 24
|
||||||
|
|
||||||
export const DEFAULT_PORNHUB_GALLERY_EXPIRY = { EX: EX_HOURLY };
|
export const DEFAULT_PORNHUB_GALLERY_EXPIRY = { EX: EX_HOURLY };
|
||||||
export const DEFAULT_PORNHUB_VIDEO_EXPIRY = { EX: EX_MIN };
|
export const DEFAULT_PORNHUB_VIDEO_EXPIRY = { EX: EX_MIN };
|
||||||
|
|
||||||
export const DEFAULT_XVIDEOS_CONTENT_EXPIRY = { EX: EX_HOURLY };
|
export const DEFAULT_XVIDEOS_CONTENT_EXPIRY = { EX: EX_HOURLY };
|
||||||
|
|
||||||
export const DEFAULT_XNXX_CONTENT_EXPIRY = { EX: EX_HOURLY };
|
export const DEFAULT_XNXX_CONTENT_EXPIRY = { EX: EX_HOURLY };
|
||||||
|
|
||||||
export const DEFAULT_YOUPORN_GALLERY_EXPIRY = { EX: EX_HOURLY };
|
export const DEFAULT_YOUPORN_GALLERY_EXPIRY = { EX: EX_HOURLY };
|
||||||
|
export const DEFAULT_YOUPORN_VIDEO_EXPIRY = { EX: EX_HOURLY };
|
||||||
|
|
||||||
export const DEFAULT_RELATED_VIDEO_KEY_PATH = '/related/'
|
export const DEFAULT_RELATED_VIDEO_KEY_PATH = '/related/'
|
|
@ -26,4 +26,6 @@ export const PORNHUB_BASE_URL_GAY_SEARCH: string = 'https://www.pornhub.com/gay'
|
||||||
// YOUPORN
|
// YOUPORN
|
||||||
|
|
||||||
export const YOUPORN_BASE_URL: string = 'https://www.youporn.com'
|
export const YOUPORN_BASE_URL: string = 'https://www.youporn.com'
|
||||||
|
export const YOUPORN_BASE_URL_VIDEO: string = 'https://www.youporn.com/watch'
|
||||||
|
|
||||||
export const YOUPORN_BASE_SEARCH: string = 'https://www.youporn.com/search/?search-btn=&query='
|
export const YOUPORN_BASE_SEARCH: string = 'https://www.youporn.com/search/?search-btn=&query='
|
|
@ -23,6 +23,11 @@ export interface VideoData {
|
||||||
srcSet?: VideoSourceItem[]
|
srcSet?: VideoSourceItem[]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export interface MindGeekVideoSrcElem {
|
||||||
|
videoUrl: string
|
||||||
|
quality: string
|
||||||
|
}
|
||||||
|
|
||||||
export interface VideoAgent {
|
export interface VideoAgent {
|
||||||
getGallery(params?: FetchParams): Promise<GalleryData[]>
|
getGallery(params?: FetchParams): Promise<GalleryData[]>
|
||||||
getVideo(id: string, params?: FetchParams): Promise<[VideoData, GalleryData[]]>
|
getVideo(id: string, params?: FetchParams): Promise<[VideoData, GalleryData[]]>
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
export const findGetMediaUrlInTagblock = (
|
export const findGetMediaUrlInTagblock = (
|
||||||
tagBlock: string): string | null => {
|
tagBlock: string, key?: string): string | null => {
|
||||||
|
|
||||||
const getMediaIndex = tagBlock.indexOf('get_media');
|
const getMediaIndex = tagBlock.indexOf(key ?? 'get_media');
|
||||||
|
|
||||||
if (getMediaIndex === -1) {
|
if (getMediaIndex === -1) {
|
||||||
return null
|
return null
|
||||||
|
|
|
@ -1,17 +1,12 @@
|
||||||
import { PORNHUB_BASE_URL, PORNHUB_BASE_URL_GAY, PORNHUB_BASE_URL_GAY_SEARCH } from "@/constants/urls"
|
import { PORNHUB_BASE_URL, PORNHUB_BASE_URL_GAY, PORNHUB_BASE_URL_GAY_SEARCH } from "@/constants/urls"
|
||||||
import axios, { AxiosHeaders } from "axios"
|
import axios, { AxiosHeaders } from "axios"
|
||||||
import { getHeadersWithCookie } from "../common/headers"
|
import { getHeadersWithCookie } from "../common/headers"
|
||||||
import { GalleryData, VideoSourceItem } from "@/meta/data"
|
import { GalleryData, MindGeekVideoSrcElem, VideoSourceItem } from "@/meta/data"
|
||||||
import { Cookies, Platforms, PornHubOrientations } from "@/meta/settings"
|
import { Cookies, Platforms, PornHubOrientations } from "@/meta/settings"
|
||||||
import { getCookie } from "@/utils/cookies/read"
|
import { getCookie } from "@/utils/cookies/read"
|
||||||
import { encodeUrl } from "@/utils/string"
|
import { encodeUrl } from "@/utils/string"
|
||||||
import { DEFAULT_VIDEO_STREAM_ROUTE_PREFIX } from "@/constants/stream"
|
import { DEFAULT_VIDEO_STREAM_ROUTE_PREFIX } from "@/constants/stream"
|
||||||
|
|
||||||
interface PornHubVideoSrcElem {
|
|
||||||
videoUrl: string
|
|
||||||
quality: string
|
|
||||||
}
|
|
||||||
|
|
||||||
export const getPornHubQueryUrl = async (query?: string): Promise<string> => {
|
export const getPornHubQueryUrl = async (query?: string): Promise<string> => {
|
||||||
const orientation = await getCookie(Cookies.orientation)
|
const orientation = await getCookie(Cookies.orientation)
|
||||||
|
|
||||||
|
@ -49,7 +44,7 @@ export const getPornHubMediaUrlList = async (url: string, sessionCookie: string)
|
||||||
|
|
||||||
if (response.data) {
|
if (response.data) {
|
||||||
|
|
||||||
videos = await response.data.map((elem: PornHubVideoSrcElem) => ({
|
videos = await response.data.map((elem: MindGeekVideoSrcElem) => ({
|
||||||
src: `${DEFAULT_VIDEO_STREAM_ROUTE_PREFIX}/${Platforms.pornhub}/${encodeUrl(elem?.videoUrl)}`,
|
src: `${DEFAULT_VIDEO_STREAM_ROUTE_PREFIX}/${Platforms.pornhub}/${encodeUrl(elem?.videoUrl)}`,
|
||||||
type: 'video/mp4',
|
type: 'video/mp4',
|
||||||
size: elem?.quality
|
size: elem?.quality
|
||||||
|
|
|
@ -1,4 +1,7 @@
|
||||||
import { YOUPORN_BASE_SEARCH, YOUPORN_BASE_URL } from "@/constants/urls"
|
import { YOUPORN_BASE_SEARCH, YOUPORN_BASE_URL } from "@/constants/urls"
|
||||||
|
import { getHeadersWithCookie } from "../common/headers"
|
||||||
|
import axios from "axios"
|
||||||
|
import { MindGeekVideoSrcElem, VideoSourceItem } from "@/meta/data"
|
||||||
|
|
||||||
export const getYouPornQueryUrl = async (query?: string): Promise<string> => {
|
export const getYouPornQueryUrl = async (query?: string): Promise<string> => {
|
||||||
|
|
||||||
|
@ -8,3 +11,32 @@ export const getYouPornQueryUrl = async (query?: string): Promise<string> => {
|
||||||
|
|
||||||
return YOUPORN_BASE_URL
|
return YOUPORN_BASE_URL
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export const getYouPornMediaUrlList = async (url: string, sessionCookie: string): Promise<VideoSourceItem[]> => {
|
||||||
|
|
||||||
|
const headersWithCookie = getHeadersWithCookie(YOUPORN_BASE_URL, sessionCookie)
|
||||||
|
|
||||||
|
let videos: VideoSourceItem[] = []
|
||||||
|
|
||||||
|
await axios.get(url, headersWithCookie)
|
||||||
|
.then(async response => {
|
||||||
|
|
||||||
|
if (response.data) {
|
||||||
|
|
||||||
|
videos = await response.data.map((elem: MindGeekVideoSrcElem) => ({
|
||||||
|
src: elem?.videoUrl,
|
||||||
|
type: 'video/mp4',
|
||||||
|
size: elem?.quality
|
||||||
|
})) as VideoSourceItem[]
|
||||||
|
|
||||||
|
return videos
|
||||||
|
|
||||||
|
} else {
|
||||||
|
return []
|
||||||
|
}
|
||||||
|
|
||||||
|
})
|
||||||
|
.catch(error => console.log(error))
|
||||||
|
|
||||||
|
return videos
|
||||||
|
}
|
|
@ -1,4 +1,14 @@
|
||||||
import { FetchParams, GalleryData, VideoData } from "@/meta/data";
|
import { YOUPORN_BASE_URL, YOUPORN_BASE_URL_VIDEO } from "@/constants/urls";
|
||||||
|
import { FetchParams, GalleryData, VideoData, VideoSourceItem } from "@/meta/data";
|
||||||
|
import { getHeaders } from "../common/headers";
|
||||||
|
import { getDataFromRedis, storeDataIntoRedis } from "@/redis/client";
|
||||||
|
import { DEFAULT_RELATED_VIDEO_KEY_PATH, DEFAULT_YOUPORN_VIDEO_EXPIRY } from "@/constants/redis";
|
||||||
|
|
||||||
|
import * as cheerio from "cheerio";
|
||||||
|
|
||||||
|
import axios, { AxiosError } from "axios";
|
||||||
|
import { createSessionCookie, findGetMediaUrlInTagblock, findGetRelatedUrlInTagblock } from "../common/mindgeek";
|
||||||
|
import { getYouPornMediaUrlList } from "./url";
|
||||||
|
|
||||||
export const fetchYouPornVideoData = async (videoId: string, params?: FetchParams): Promise<[VideoData, GalleryData[]]> => {
|
export const fetchYouPornVideoData = async (videoId: string, params?: FetchParams): Promise<[VideoData, GalleryData[]]> => {
|
||||||
|
|
||||||
|
@ -9,5 +19,59 @@ export const fetchYouPornVideoData = async (videoId: string, params?: FetchParam
|
||||||
|
|
||||||
let relatedData: GalleryData[] = [];
|
let relatedData: GalleryData[] = [];
|
||||||
|
|
||||||
|
let mediaUrl, relatedUrl, sessionCookie, convertedData: VideoSourceItem[]
|
||||||
|
|
||||||
|
let reqHeaders = getHeaders(YOUPORN_BASE_URL)
|
||||||
|
|
||||||
|
const queryUrl = `${YOUPORN_BASE_URL_VIDEO}/${videoId.replace(/\//g, '')}`
|
||||||
|
|
||||||
|
const cachedVideoData = await getDataFromRedis(queryUrl)
|
||||||
|
const cachedRelatedData = await getDataFromRedis(queryUrl + DEFAULT_RELATED_VIDEO_KEY_PATH)
|
||||||
|
|
||||||
|
if (cachedVideoData) {
|
||||||
|
return [cachedVideoData as VideoData, cachedRelatedData as GalleryData[] ?? []]
|
||||||
|
}
|
||||||
|
|
||||||
|
await axios.get(queryUrl, reqHeaders)
|
||||||
|
|
||||||
|
.then(async response => {
|
||||||
|
|
||||||
|
sessionCookie = response?.headers["set-cookie"] ? createSessionCookie(response?.headers["set-cookie"]) : '';
|
||||||
|
|
||||||
|
const html = response.data;
|
||||||
|
|
||||||
|
const $ = cheerio.load(html);
|
||||||
|
|
||||||
|
const scriptTags = $("script");
|
||||||
|
|
||||||
|
scriptTags.map((idx, elem) => {
|
||||||
|
const getMediaUrl = findGetMediaUrlInTagblock($(elem).toString().replace(/\\/g, ''), 'media/mp4') ?? null
|
||||||
|
|
||||||
|
if (getMediaUrl) {
|
||||||
|
mediaUrl = getMediaUrl
|
||||||
|
}
|
||||||
|
|
||||||
|
})
|
||||||
|
|
||||||
|
scriptTags.map((idx, elem) => {
|
||||||
|
const getRelatedUrl = findGetRelatedUrlInTagblock($(elem).toString()) ?? null
|
||||||
|
|
||||||
|
if (getRelatedUrl) {
|
||||||
|
relatedUrl = getRelatedUrl
|
||||||
|
}
|
||||||
|
|
||||||
|
})
|
||||||
|
|
||||||
|
}).catch((error: AxiosError) => {
|
||||||
|
// error handling goes here
|
||||||
|
});
|
||||||
|
|
||||||
|
if (sessionCookie && mediaUrl) {
|
||||||
|
convertedData = await getYouPornMediaUrlList(mediaUrl, sessionCookie)
|
||||||
|
data.srcSet = convertedData.reverse()
|
||||||
|
|
||||||
|
await storeDataIntoRedis(queryUrl, data, DEFAULT_YOUPORN_VIDEO_EXPIRY);
|
||||||
|
}
|
||||||
|
|
||||||
return [ data, relatedData ]
|
return [ data, relatedData ]
|
||||||
}
|
}
|
Loading…
Reference in New Issue