Initial commit for gallery-dl support

This commit is contained in:
anonymousx97 2023-05-26 16:14:55 +05:30
parent ba92dad18b
commit fdfd8ce2c1
2 changed files with 122 additions and 325 deletions

View File

@ -5,3 +5,4 @@ python-dotenv==0.21.0
tgCrypto==1.2.3
wget
yt-dlp
gallery-dl

View File

@ -71,7 +71,123 @@ USERS = json.loads(os.environ.get("USERS"))
TRIGGER = os.environ.get("TRIGGER")
E_JSON = base64.b64decode("Lz9fX2E9MSZfX2Q9MQ==").decode("utf-8")
# Download Section
# Gallery-dl for all types of media
async def gallery_dl(url, caption,doc):
download_dir = f"downloads/{time.time()}"
await run_shell_cmd(f"gallery-dl -q -D {download_dir} '{url}'")
files = glob.glob(f"{download_dir}/*")
if not files:
return "failed"
ret_dict = {"path": download_dir, "caption": caption}
if doc:
ret_dict['media']=files
# If more than 1 file return grouped media
elif len(files) > 1:
grouped_images, grouped_videos, animations = [], [], []
for file in files:
if file.endswith((".png", ".jpg", ".jpeg",".webp")):
if file.endswith(".webp"):
os.rename(file,file+".png")
file = file+ ".png"
grouped_images.append(InputMediaPhoto(file, caption=caption))
elif file.endswith((".mp4", ".mkv", ".webm")):
has_audio = await check_audio(file)
if not has_audio:
animations.append(file)
else:
grouped_videos.append(InputMediaVideo(file, caption=caption))
# Limit 5 posts per list to avoid TG's flood system
group_list = [
grouped_images[imgs : imgs + 5] for imgs in range(0, len(grouped_images), 5)
] + [grouped_videos[vids : vids + 5] for vids in range(0, len(grouped_videos), 5)
] + animations
# Final output of group_list = [ [1,2,3,4,5], [6,7,8,9,10] ]
ret_dict.update({"is_grouped": True,"media": group_list})
# Otherwise return single after renaming to proper extension coz TG converts webp to sticker
else:
file = files[0]
ret_dict['media'] = file
if file.lower().endswith((".jpg", ".jpeg", ".png", ".webp")):
ret_dict["is_image"] = True
if file.lower().endswith(".webp"):
os.rename(file, file + ".jpg")
ret_dict.update({"media": down_load + ".jpg"})
elif file.lower().endswith((".mkv", ".mp4", ".webm")):
ret_dict.update({"is_video": True, "thumb": await take_ss(video=file, path=download_dir)})
elif file.lower().endswith(".gif"):
ret_dict.update({"is_animation": True})
return ret_dict
# YT-DLP for videos from multiple sites
async def yt_dl(url: str, caption: str, doc:bool=False):
path = "downloads/" + str(time.time())
video = f"{path}/v.mp4"
_opts = {
"outtmpl": video,
"ignoreerrors": True,
"ignore_no_formats_error": True,
"quiet": True,
"logger": FakeLogger(),
}
if "shorts" in url:
_opts.update({"format": "bv[ext=mp4][res=480]+ba[ext=m4a]/b[ext=mp4]"})
else:
_opts.update({"format": "bv[ext=mp4]+ba[ext=m4a]/b[ext=mp4]"})
data = "failed"
try:
yt_dlp.YoutubeDL(_opts).download(url)
if os.path.isfile(video):
data = {
"path": path,
"is_video": True,
"media": video,
"thumb": await take_ss(video=video, path=path),
"caption": caption,
}
except BaseException:
pass
return data
# To disable YT-DLP logging
class FakeLogger(object):
def debug(self, msg):
pass
def warning(self, msg):
pass
def error(self, msg):
pass
# Thumbnail
async def take_ss(video: str, path: str):
await run_shell_cmd(f'''ffmpeg -hide_banner -loglevel error -ss 0.1 -i "{video}" -vframes 1 "{path}/i.png"''')
if os.path.isfile(path + "/i.png"):
return path + "/i.png"
# Check if it's a video or gif
async def check_audio(file):
result = await run_shell_cmd(f"ffprobe -v error -show_entries format=nb_streams -of default=noprint_wrappers=1:nokey=1 {file}")
return int(result.get("stdout", 0)) - 1
async def run_shell_cmd(cmd):
proc = await asyncio.create_subprocess_shell(cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE)
stdout, stderr = await proc.communicate()
return {"stdout": stdout.decode("utf-8"), "stderr": stderr.decode("utf-8")}
# BOT Section
@ -109,10 +225,10 @@ class MESSAGE_PARSER:
def match_links(self):
url_map = {
"tiktok.com": yt_dl,
"www.instagram.com": instagram_dl,
"www.instagram.com": gallery_dl,
"youtube.com/shorts": yt_dl,
"twitter.com": yt_dl,
"www.reddit.com": reddit_dl,
"twitter.com": gallery_dl,
"www.reddit.com": gallery_dl,
}
for link in self.text_list:
if (match := url_map.get(url_p(link).netloc)):
@ -310,15 +426,7 @@ async def add_h():
print("\nThe message id is wrong. \nOr \nChat id message contains letters\nonly numerical ids are allowed.\n")
return 1
social_handler = bot.add_handler(
MessageHandler(
dl,
(
(filters.regex(r"^http*"))
& filters.chat(chats_list)
),
),
group=1,
)
MessageHandler(dl,(filters.regex(r"^http*")& filters.chat(chats_list))),group=1)
globals().update({"HANDLER_":social_handler})
@ -335,319 +443,7 @@ async def boot():
await idle()
# API Section
# Instagram
async def instagram_dl(url: str, caption: str, doc: bool = False):
args = locals()
# status = await instafix(message=message, link=i, caption=caption)
for i in [yt_dl, api_2]:
data = await i(**args)
if isinstance(data, dict):
break
return data
async def api_2(url: str, caption: str, doc: bool):
link = url.split("/?")[0] + E_JSON
response = await get_json(url=link)
if not response or "graphql" not in response:
return "failed"
return await parse_ghraphql(
response["graphql"]["shortcode_media"], caption=caption + "\n.."
)
async def parse_ghraphql(json_: dict, caption: str, doc: bool = False):
try:
path = f"downloads/{time.time()}"
os.makedirs(path)
ret_dict = {"path": path, "thumb": None, "caption": caption}
type_check = json_.get("__typename",None)
if not type_check:
return "failed"
elif type_check == "GraphSidecar":
media = []
for i in json_["edge_sidecar_to_children"]["edges"]:
if i["node"]["__typename"] == "GraphImage":
media.append(i["node"]["display_url"])
if i["node"]["__typename"] == "GraphVideo":
media.append(i["node"]["video_url"])
ret_dict.update({"is_grouped": False if doc else True, "media": await async_download(urls=media, path=path, doc=doc, caption=caption)})
else:
media = json_.get("video_url") or json_.get("display_url")
ret_dict.update(**await get_media(url=media, path=path))
except Exception:
await bot.send_message(chat_id=LOG_CHAT, text=str(traceback.format_exc()))
return ret_dict
# YT-DLP for videos from multiple sites
async def yt_dl(url: str, caption: str, doc:bool=False):
if "instagram.com/p/" in url:
return
path = str(time.time())
video = f"{path}/v.mp4"
_opts = {
"outtmpl": video,
"ignoreerrors": True,
"ignore_no_formats_error": True,
"quiet": True,
"logger": FakeLogger(),
}
if "shorts" in url:
_opts.update({"format": "bv[ext=mp4][res=480]+ba[ext=m4a]/b[ext=mp4]"})
else:
_opts.update({"format": "bv[ext=mp4]+ba[ext=m4a]/b[ext=mp4]"})
data = "failed"
try:
yt_dlp.YoutubeDL(_opts).download(url)
if os.path.isfile(video):
data = {
"path": path,
"is_video": True,
"media": video,
"thumb": await take_ss(video=video, path=path),
"caption": caption,
}
except BaseException:
pass
return data
# To disable YT-DLP logging
class FakeLogger(object):
def debug(self, msg):
pass
def warning(self, msg):
pass
def error(self, msg):
pass
# Reddit
async def reddit_dl(url: str, caption: str, doc: bool = False):
link = url.split("/?")[0] + ".json?limit=1"
headers = {
"user-agent": "Mozilla/5.0 (Macintosh; PPC Mac OS X 10_8_7 rv:5.0; en-US) AppleWebKit/533.31.5 (KHTML, like Gecko) Version/4.0 Safari/533.31.5"
}
try:
response = await get_json(url=link, headers=headers, json_=True)
if not response:
return "failed"
json_ = response[0]["data"]["children"][0]["data"]
caption = f'__{json_["subreddit_name_prefixed"]}:__\n**{json_["title"]}**\n\n' + caption
path = str(time.time())
os.mkdir(path)
is_vid, is_gallery = json_.get("is_video"), json_.get("is_gallery")
data = {"path": path, "caption": caption}
if is_vid:
video = f"{path}/v.mp4"
vid_url = json_["secure_media"]["reddit_video"]["hls_url"]
await run_shell_cmd(f'ffmpeg -hide_banner -loglevel error -i "{vid_url.strip()}" -c copy {video}')
data.update({"is_video": True, "media": video, "thumb": await take_ss(video=video, path=path)})
elif is_gallery:
grouped_media_urls = [json_["media_metadata"][val]["s"]["u"].replace("preview", "i") for val in json_["media_metadata"]]
downloads = await async_download(urls=grouped_media_urls, path=path, doc=doc, caption=caption)
data.update({"is_grouped": True, "media": downloads})
else:
url_ = json_.get("preview", {}).get("reddit_video_preview", {}).get("fallback_url", "") or json_.get("url_overridden_by_dest", "").strip()
if not url_:
return "failed"
data.update(await get_media(url=url_, path=path))
except Exception:
await bot.send_message(chat_id=LOG_CHAT, text=str(traceback.format_exc()))
return data
# Get Json response from APIs
async def get_json(url: str, headers: dict = None, params: dict = None, retry: bool = False, json_: bool = False, timeout: int = 10):
if retry:
client = RETRY_CLIENT
else:
client = SESSION
try:
async with client.get(url=url, headers=headers, params=params, timeout=timeout) as ses:
if json_:
ret_json = await ses.json()
else:
ret_json = json.loads(await ses.text())
except (json.decoder.JSONDecodeError, aiohttp.ContentTypeError, asyncio.TimeoutError):
return
except Exception:
await bot.send_message(chat_id=LOG_CHAT, text=str(traceback.format_exc()))
return
return ret_json
# Download media and return it with media type
async def get_media(url: str, path: str):
down_load = download(url, path)
ret_dict = {"media": down_load}
if down_load.lower().endswith((".jpg", ".jpeg", ".png", ".webp")):
ret_dict["is_image"] = True
if down_load.lower().endswith(".webp"):
os.rename(down_load, down_load + ".jpg")
ret_dict.update({"media": down_load + ".jpg"})
elif down_load.lower().endswith((".mkv", ".mp4", ".webm")):
ret_dict.update({"is_video": True, "thumb": await take_ss(video=down_load, path=path)})
elif down_load.lower().endswith(".gif"):
ret_dict.update({"is_animation": True})
else:
return {}
return ret_dict
# Download multiple media asynchronously to save time;
# Return it in a list or a list with smaller lists each containing upto 5 media.
async def async_download(urls: list, path: str, doc: bool = False, caption: str = ""):
down_loads = await asyncio.gather(*[asyncio.to_thread(download, url, path) for url in urls])
if doc:
return down_loads
[os.rename(file, file + ".png") for file in glob.glob(f"{path}/*.webp")]
files = [i + ".png" if i.endswith(".webp") else i for i in down_loads]
grouped_images, grouped_videos, animations = [], [], []
for file in files:
if file.endswith((".png", ".jpg", ".jpeg")):
grouped_images.append(InputMediaPhoto(file, caption=caption))
if file.endswith((".mp4", ".mkv", ".webm")):
has_audio = await check_audio(file)
if not has_audio:
animations.append(file)
else:
grouped_videos.append(InputMediaVideo(file, caption=caption))
return_list = [
grouped_images[imgs : imgs + 5] for imgs in range(0, len(grouped_images), 5)
] + [grouped_videos[vids : vids + 5] for vids in range(0, len(grouped_videos), 5)
] + animations
return return_list
# Thumbnail
async def take_ss(video: str, path: str):
await run_shell_cmd(f'''ffmpeg -hide_banner -loglevel error -ss 0.1 -i "{video}" -vframes 1 "{path}/i.png"''')
if os.path.isfile(path + "/i.png"):
return path + "/i.png"
async def check_audio(file):
result = await run_shell_cmd(f"ffprobe -v error -show_entries format=nb_streams -of default=noprint_wrappers=1:nokey=1 {file}")
return int(result.get("stdout", 0)) - 1
async def run_shell_cmd(cmd):
proc = await asyncio.create_subprocess_shell(cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE)
stdout, stderr = await proc.communicate()
return {"stdout": stdout.decode("utf-8"), "stderr": stderr.decode("utf-8")}
# Start only bot when file is called directly.
if __name__ == "__main__":
bot.start()
bot.run(boot())
# NOT FOR PUBLIC
#API_KEYS = {
# "abc": {
# "keys": [],
# "counter": 0,
# "exhausted": {},
# },
#}
#SWITCH = [0]
# Rotating Key function to avoid hitting limit on single Key
#async def get_key(func_tion):
# func = API_KEYS.get(func_tion, {})
# key = func.get("keys")
# count = func.get("counter")
# count += 1
# if count == len(key):
# count = 0
# ret_key = key[count]
# API_KEYS[func_tion]["counter"] = count
# return ret_key
# Tiktok
#async def tik_dl(url: str, doc: bool, caption:str):
# status = "failed"
# headers = {
# "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36"
# }
# url_ = f""
# response = await get_json(url_, headers=headers, json_=True)
# if not response or "status" in response and response["status"] == "failed":
# return "failed"
# if "video_data" in response:
# data = response["video_data"]["nwm_video_url_HQ"]
# status = {"path": "", "is_video": True, "media": data, "thumb": None, "caption": ""}
# if "image_data" in response:
# data = response["image_data"]["no_watermark_image_list"]
# path = f"downloads/{time.time()}"
# os.makedirs(path)
# downloads = await async_download(urls=data, path=path, doc=doc, caption=caption)
# status = {"path": path, "media": downloads, "caption": "", "is_grouped": True}
# return status
#async def multi_api(url: str, caption: str, doc: bool = False):
# apis = [
# {
# "url": "",
# "headers": {},
# "querystring": {},
# },
# ]
# switch_ = SWITCH[0] + 1
# if switch_ == len(apis):
# switch_ = 0
# SWITCH[0] = switch_
#
# api = apis[switch_]
# api["headers"]["API-Key"] = await get_key(f"multi_api{switch_}")
# response = await get_json(url=api.get("url"), headers=api.get("headers"), params=api.get("querystring"), json_=True)
# if not response or "message" in response:
# return "failed"
# data = response.get("data", {}).get("shortcode_media", {}) or response
# return await parse_ghraphql(json_=data, caption=caption + "\n" + "•" * switch_, doc=doc)
#async def api_1(url: str, caption: str, doc: bool = False):
# url = ""
# querystring = {"url": url}
# data = "failed"
# headers = {
# "API-Key": await get_key("api_1"),
# "API-Host": "",
# }
# response = await get_json(url=url, headers=headers, params=querystring)
# print(response)
# if not response or "message" in response or "messages" in response:
# return "failed"
# media = response["media"]
# path = f"downloads/{time.time()}"
# os.makedirs(path)
# if isinstance(media, list):
# downloads = await async_download(urls=media, path=path, doc=doc, caption=caption + "\n.")
# data = {"path": path, "media": downloads, "is_grouped": True}
# else:
# data = {
# "path": path,
# "caption": caption + "\n.",
# **await get_media(url=media.split("&filename")[0], path=path),
# }
# return data