RedditDL improved, Download mutiple media in async and some bug fixes.

This commit is contained in:
anonymousx97 2023-02-20 22:21:53 +05:30
parent 09262818a2
commit e355eab67f
2 changed files with 136 additions and 265 deletions

View File

@ -1,10 +1,32 @@
# Light weight Instagram DL bot.
### Light weight Social Media downloader bot.
* Supported Platforms:
* Videos: Instagram, Tiktok, Twitter, YouTube Shorts
* Images: Instagram, Reddit
* Gif : Reddit
# Deploy:
### Usage and Commands:
* Send supported links in any authorised chat/channel, bot will try to download and send the media.
* Owner only commands:
* `.dl link` to download and send media in any chat.
* `.bot update` to refresh chat list without restarting bot.
* `.bot restart` to restart bot.
* `.bot ids` to get chat / channel / user IDs.
* `.bot join or leave` to join / leave chat using ID.
* `.del` reply to a message to delete.
* `.term` to run shell commands in bot. Example: `.term ls`
* These commands can be used anywhere and are not limited to authorised chats.
### Deploy:
* For android local deploy:
* Download Latest [Termux](https://github.com/termux/termux-app/releases).
```bash
# Update local packages after installing Termux.
yes|apt update && yes|apt upgrade
```
* Config:
* Get API_ID and API_HASH from https://my.telegram.org/auth .
* Generate String Session by running this in termux:
* Generate String Session by running this in Termux:
```bash
bash -c "$(curl -fsSL https://raw.githubusercontent.com/ux-termux/string/main/Termux.sh)"
```
@ -26,13 +48,6 @@
* User : Your user id to control bot.
* Trigger : Trigger to access bot.
* Download Latest [Termux](https://github.com/termux/termux-app/releases).
```bash
# Update local packages after installing Termux.
yes|apt update && yes|apt upgrade
```
* Run the following commands:
```bash
# Install required packages.
@ -57,25 +72,14 @@
* If everything is correct you will get <b><i>Started</i></b> stdout in terminal and in your channel.
# Usage and Commands:
* Send Instagram link in any authorised chat/channel, bot will try to download and send the media.
* Owner only commands:
* `.dl link` to download and send media in any chat.
* `.rdl` to download media from reddit.
* `.bot update` to refresh chat list without restarting bot.
* `.bot restart` to restart bot.
* `.bot ids` to get chat / channel / user IDs.
* `.bot join or leave` to join / leave chat using ID.
* `.del` to delete message.
* These commands can be used anywhere and are not limited to authorised chats.
# Known limitations:
### Known limitations:
* If deployed on a VPS or any server Instragram might block access to some content.
After hitting Instagram's rate limit image download might not work because servers and vps usually have static IP and Instagram would block access.
* Deploying it locally would solve all of those issues since most of us have dynamic IP and Instagram will not be able to block access.
* Deploying it locally would solve all of those issues because most of us are likely to have dynamic IP so Instagram will not be able to block access.
Bot is made lightweight with local deploys in mind. But battery life will take some hit anyway.
* Logging in with your Instagram which would solve the rate-limit issues is not added and won't be added because 2 of my accounts were suspended till manual verification for using scrapping bots like these with login.
# Contact
### Contact
* For any questions related to deploy or issues contact me on
[Telegram](https://t.me/anonymousx97)

View File

@ -1,5 +1,6 @@
import asyncio
import base64
import glob
import json
import os
import shutil
@ -13,7 +14,7 @@ import yt_dlp
from dotenv import load_dotenv
from pyrogram import Client, filters, idle
from pyrogram.enums import ChatType
from pyrogram.errors import MediaEmpty, PhotoSaveFileInvalid, WebpageCurlFailed, PeerIdInvalid
from pyrogram.errors import MediaEmpty, PeerIdInvalid, PhotoSaveFileInvalid, WebpageCurlFailed
from pyrogram.handlers import MessageHandler
from pyrogram.types import InputMediaPhoto, InputMediaVideo, Message
from wget import download
@ -21,14 +22,9 @@ from wget import download
if os.path.isfile("config.env"):
load_dotenv("config.env")
bot = Client(
name="bot",
session_string=os.environ.get("STRING_SESSION"),
api_id=os.environ.get("API_ID"),
api_hash=os.environ.get("API_HASH"),
)
bot = Client(name="bot", session_string=os.environ.get("STRING_SESSION"), api_id=os.environ.get("API_ID"), api_hash=os.environ.get("API_HASH"))
log_chat = os.environ.get("LOG")
if log_chat == None:
if log_chat is None:
print("Enter log channel id in config")
exit()
chat_list = []
@ -94,16 +90,12 @@ async def multi_func(bot, message: Message):
await bot.send_message(chat_id=log_chat, text=str(traceback.format_exc()))
@bot.on_message(
filters.command(commands="term", prefixes=trigger) & filters.user(users)
)
@bot.on_message(filters.command(commands="term", prefixes=trigger) & filters.user(users))
async def run_cmd(bot, message: Message):
"""Function to run shell commands"""
cmd = message.text.replace("+term", "")
status_ = await message.reply("executing...")
process = await asyncio.create_subprocess_shell(
cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
)
process = await asyncio.create_subprocess_shell(cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE)
stdout, stderr = await process.communicate()
if process.returncode is not None:
@ -126,15 +118,14 @@ async def delete_message(bot, message: Message):
@bot.on_message(filters.command(commands="dl", prefixes=trigger) & filters.user(users))
async def dl(bot, message: Message):
""" The main Logic Function to download media """
response = await bot.send_message(message.chat.id, "`trying to download...`")
"""The main Logic Function to download media"""
rw_message = message.text.split()
reply = message.reply_to_message
reply_id = reply.id if reply else None
sender_ = message.author_signature or message.from_user.first_name or ""
response = await bot.send_message(message.chat.id, "`trying to download...`")
curse_ = ""
caption = "Shared by : "
if message.sender_chat:
caption += message.author_signature
else:
caption += message.from_user.first_name
caption = f"Shared by : {sender_}"
check_dl = "failed"
if "-d" in rw_message:
doc = True
@ -146,69 +137,35 @@ async def dl(bot, message: Message):
curse_ = "#FuckInstagram"
if check_dl == "failed":
check_dl = await json_dl(iurl=i, caption=caption, doc=doc)
if "twitter.com" in i or "https://youtube.com/shorts" in i or "tiktok.com" in i:
elif "twitter.com" in i or "https://youtube.com/shorts" in i or "tiktok.com" in i:
check_dl = await iyt_dl(url=i)
elif "www.reddit.com" in i:
check_dl = await reddit_dl(url_=i, doc=doc, sender_=sender_)
curse_ = "Link doesn't contain any media or is restricted\nTip: Make sure you are sending original post url and not an embedded post."
else:
pass
if isinstance(check_dl, dict):
"""Send Media if response from check dl contains data dict"""
if isinstance(check_dl["media"], list):
for data_ in check_dl["media"]:
for vv in check_dl["media"]:
if isinstance(vv, list):
"""Send Grouped Media if data contains a list made of smaller lists of 5 medias"""
await bot.send_media_group(
message.chat.id,
media=data_,
reply_to_message_id=message.reply_to_message.id
if message.reply_to_message
else None,
)
await bot.send_media_group(message.chat.id, media=vv, reply_to_message_id=reply_id)
await asyncio.sleep(3)
else:
"""Send Document if data is list of media files"""
await bot.send_document(
message.chat.id,
document=data_,
caption=caption,
reply_to_message_id=message.reply_to_message.id
if message.reply_to_message
else None,
)
""" If media isn't a list then it's a single file to be sent """
if isinstance(check_dl["media"], str):
if doc:
await bot.send_document(
message.chat.id,
document=check_dl["media"],
caption=caption,
reply_to_message_id=message.reply_to_message.id
if message.reply_to_message
else None,
)
await bot.send_document(message.chat.id, document=vv, caption=check_dl["caption"] + caption, reply_to_message_id=reply_id, force_document=True)
else:
if doc:
await bot.send_document(message.chat.id, document=check_dl["media"], caption=check_dl["caption"] + caption, reply_to_message_id=reply_id, force_document=True)
else:
if check_dl["type"] == "img":
await bot.send_photo(
message.chat.id,
photo=check_dl["media"],
caption=caption,
reply_to_message_id=message.reply_to_message.id
if message.reply_to_message
else None,
)
if check_dl["type"] == "vid":
try:
await bot.send_video(
message.chat.id,
video=check_dl["media"],
caption=caption,
thumb=check_dl["thumb"]
if os.path.isfile(check_dl["thumb"])
else None,
reply_to_message_id=message.reply_to_message.id
if message.reply_to_message
else None,
)
except (MediaEmpty, WebpageCurlFailed):
if check_dl["type"] == "img":
await bot.send_photo(message.chat.id, photo=check_dl["media"], caption=check_dl["caption"] + caption, reply_to_message_id=reply_id)
elif check_dl["type"] == "vid":
await bot.send_video(message.chat.id, video=check_dl["media"], caption=check_dl["caption"] + caption, thumb=check_dl["thumb"], reply_to_message_id=reply_id)
else:
await bot.send_animation(message.chat.id, animation=check_dl["media"], caption=check_dl["caption"] + caption, reply_to_message_id=reply_id, unsave=True)
except PhotoSaveFileInvalid:
await bot.send_document(message.chat.id, document=check_dl["media"], caption=check_dl["caption"] + caption, reply_to_message_id=reply_id)
except (MediaEmpty, WebpageCurlFailed, ValueError):
pass
if os.path.exists(str(check_dl["path"])):
shutil.rmtree(str(check_dl["path"]))
@ -222,33 +179,18 @@ async def dl(bot, message: Message):
async def iyt_dl(url: str):
"""Stop handling post url because this only downloads Videos and post might contain images"""
if url.startswith("https://www.instagram.com/p/"):
if not url.startswith("https://www.instagram.com/reel/"):
return "failed"
path_ = time.time()
video = f"{path_}/v.mp4"
thumb = f"{path_}/i.png"
_opts = {
"outtmpl": video,
"ignoreerrors": True,
"ignore_no_formats_error": True,
"format": "bv[ext=mp4]+ba[ext=m4a]/b[ext=mp4]",
"quiet": True,
"logger": FakeLogger(),
}
_opts = {"outtmpl": video, "ignoreerrors": True, "ignore_no_formats_error": True, "format": "bv[ext=mp4]+ba[ext=m4a]/b[ext=mp4]", "quiet": True, "logger": FakeLogger()}
return_val = "failed"
try:
yt_dlp.YoutubeDL(_opts).download(url)
if os.path.isfile(video):
call(
f'''ffmpeg -hide_banner -loglevel error -ss 0.1 -i "{video}" -vframes 1 "{thumb}"''',
shell=True,
)
return_val = {
"path": str(path_),
"type": "vid",
"media": video,
"thumb": thumb,
}
call(f'''ffmpeg -hide_banner -loglevel error -ss 0.1 -i "{video}" -vframes 1 "{thumb}"''', shell=True)
return_val = {"path": str(path_), "type": "vid", "media": video, "thumb": thumb if os.path.isfile(thumb) else None, "caption": ""}
except BaseException:
pass
return return_val
@ -274,149 +216,85 @@ async def json_dl(iurl: str, doc: bool, caption: str):
if url["__typename"] == "GraphVideo":
url_ = url["video_url"]
wget_x = download(url_, d_dir)
call(
f'''ffmpeg -hide_banner -loglevel error -ss 0.1 -i "{wget_x}" -vframes 1 "{d_dir}/i.png"''',
shell=True,
)
return_val = { "path": d_dir, "type": "vid", "media": wget_x, "thumb": d_dir + "/i.png" }
call(f'''ffmpeg -hide_banner -loglevel error -ss 0.1 -i "{wget_x}" -vframes 1 "{d_dir}/i.png"''', shell=True)
return_val = {"path": d_dir, "type": "vid", "media": wget_x, "thumb": d_dir + "/i.png", "caption": ""}
if url["__typename"] == "GraphImage":
url_ = url["display_url"]
wget_x = download(url_, d_dir + "/i.jpg")
return_val = { "path": d_dir, "type": "img", "media": wget_x, "thumb": "" }
return_val = {"path": d_dir, "type": "img", "media": wget_x, "thumb": None, "caption": ""}
if url["__typename"] == "GraphSidecar":
doc_list = []
vlist = []
vlist2 = []
plist = []
plist2 = []
url_list = []
for i in url["edge_sidecar_to_children"]["edges"]:
if i["node"]["__typename"] == "GraphImage":
url_ = i["node"]["display_url"]
wget_x = download(url_, d_dir)
if wget_x.endswith(".webp"):
os.rename(wget_x, wget_x + ".jpg")
wget_x = wget_x + ".jpg"
if doc:
doc_list.append(wget_x)
else:
if len(plist) >= 5:
plist2.append(InputMediaPhoto(media=wget_x, caption=caption))
else:
plist.append(InputMediaPhoto(media=wget_x, caption=caption))
url_list.append(i["node"]["display_url"])
if i["node"]["__typename"] == "GraphVideo":
url_ = i["node"]["video_url"]
wget_x = download(url_, d_dir)
if doc:
doc_list.append(wget_x)
else:
if len(vlist) >= 5:
vlist2.append(InputMediaVideo(media=wget_x, caption=caption))
else:
vlist.append(InputMediaVideo(media=wget_x, caption=caption))
if doc:
return_val = {"path": d_dir, "media": doc_list}
else:
return_val = {
"path": d_dir,
"media": [
zz for zz in [plist, plist2, vlist, vlist2] if len(zz) > 0
],
}
url_list.append(i["node"]["video_url"])
downloads = await async_download(urls=url_list, path=d_dir, doc=doc, caption=caption + "\n..")
return_val = {"path": d_dir, "media": downloads}
except Exception:
await bot.send_message(chat_id=log_chat, text=str(traceback.format_exc()))
return return_val
@bot.on_message(filters.command(commands="rdl", prefixes=trigger) & filters.user(users))
async def reddit_dl(bot, message: Message):
ext = None
del_link = True
rw_message = message.text.split()
response = await bot.send_message(
chat_id=message.chat.id, text="Trying to download..."
)
if message.sender_chat:
sender_ = message.author_signature
else:
sender_ = message.from_user.first_name
for link_ in rw_message:
if link_.startswith("https://www.reddit.com"):
link = link_.split("/?")[0] + ".json?limit=1"
headers = {
"user-agent": "Mozilla/5.0 (Macintosh; PPC Mac OS X 10_8_7 rv:5.0; en-US) AppleWebKit/533.31.5 (KHTML, like Gecko) Version/4.0 Safari/533.31.5",
}
link = url_.split("/?")[0] + ".json?limit=1"
headers = {"user-agent": "Mozilla/5.0 (Macintosh; PPC Mac OS X 10_8_7 rv:5.0; en-US) AppleWebKit/533.31.5 (KHTML, like Gecko) Version/4.0 Safari/533.31.5"}
return_val = "failed"
try:
async with aiohttp.ClientSession() as session:
async with session.get(link, headers=headers) as ss:
session_response = await ss.json()
json_ = session_response[0]["data"]["children"][0]["data"]
check_ = json_["secure_media"]
title_ = json_["title"]
subr = json_["subreddit_name_prefixed"]
caption = f"__{subr}:__\n**{title_}**\n\nShared by : {sender_}"
async with (aiohttp.ClientSession() as session, session.get(link, headers=headers) as ss):
response = await ss.json()
json_ = response[0]["data"]["children"][0]["data"]
caption = f'__{json_["subreddit_name_prefixed"]}:__\n**{json_["title"]}**\n\n'
d_dir = str(time.time())
os.mkdir(d_dir)
if isinstance(check_, dict):
v = f"{d_dir}/v.mp4"
t = f"{d_dir}/i.png"
if "oembed" in check_:
vid_url = json_["preview"]["reddit_video_preview"]["fallback_url"]
await bot.send_animation(
chat_id=message.chat.id,
animation=vid_url,
unsave=True,
caption=caption,
)
is_vid, is_gallery = json_.get("is_video"), json_.get("is_gallery")
if is_vid:
video = f"{d_dir}/v.mp4"
thumb = f"{d_dir}/i.png"
vid_url = json_["secure_media"]["reddit_video"]["hls_url"]
call(f'ffmpeg -hide_banner -loglevel error -i "{vid_url.strip()}" -c copy {video}', shell=True)
call(f'''ffmpeg -hide_banner -loglevel error -ss 0.1 -i "{video}" -vframes 1 "{thumb}"''', shell=True)
return_val = {"path": d_dir, "type": "vid", "media": video, "thumb": thumb, "caption": caption}
elif is_gallery:
grouped_media_urls = [f'https://i.redd.it/{i["media_id"]}.jpg' for i in json_["gallery_data"]["items"]]
downloads = await async_download(urls=grouped_media_urls, path=d_dir, doc=doc, caption=caption + f"Shared by : {sender_}")
return_val = {"path": d_dir, "media": downloads}
else:
vid_url = check_["reddit_video"]["hls_url"]
call(
f'ffmpeg -hide_banner -loglevel error -i "{vid_url.strip()}" -c copy {v}',
shell=True,
)
call(
f'''ffmpeg -ss 0.1 -i "{v}" -vframes 1 "{t}"''', shell=True
)
await message.reply_video(v, caption=caption, thumb=t)
media_ = json_["url_overridden_by_dest"].strip()
if media_.endswith((".jpg", ".jpeg", ".png", ".webp")):
img = download(media_, d_dir)
return_val = {"path": d_dir, "type": "img", "media": img, "thumb": None, "caption": caption}
elif media_.endswith(".gif"):
gif = download(media_, d_dir)
return_val = {"path": d_dir, "type": "animation", "media": gif, "thumb": None, "caption": caption}
else:
media_ = json_["url_overridden_by_dest"]
try:
if media_.strip().endswith(".gif"):
ext = ".gif"
await bot.send_animation(
chat_id=message.chat.id,
animation=media_,
unsave=True,
caption=caption,
)
if media_.strip().endswith((".jpg", ".jpeg", ".png", ".webp")):
ext = ".png"
await message.reply_photo(media_, caption=caption)
except (MediaEmpty, WebpageCurlFailed):
download(media_, f"{d_dir}/i{ext}")
if ext == ".gif":
await bot.send_animation(
chat_id=message.chat.id,
animation=f"{d_dir}/i.gif",
unsave=True,
caption=caption,
)
else:
try:
await message.reply_photo(f"{d_dir}/i.png", caption=caption)
except PhotoSaveFileInvalid:
await message.reply_document(document=f"{d_dir}/i.png", caption=caption)
if os.path.exists(str(d_dir)):
shutil.rmtree(str(d_dir))
gif_url = json_.get("preview", {}).get("reddit_video_preview", {}).get("fallback_url")
if gif_url:
gif = download(gif_url, d_dir)
return_val = {"path": d_dir, "type": "animation", "media": gif, "thumb": None, "caption": caption}
except Exception:
del_link = False
await bot.send_message(chat_id=log_chat, text=str(traceback.format_exc()))
await response.edit("Link doesn't contain any media or is restricted\nTip: Make sure you are sending original post url and not an embedded post.")
continue
if del_link:
await message.delete()
await response.delete()
return return_val
async def async_download(urls: list, path: str, doc: bool = False, caption: str = ""):
down_loads = await asyncio.gather(*[asyncio.to_thread(download, url, path) for url in urls])
if doc:
return down_loads
[os.rename(file, file + ".png") for file in glob.glob(f"{path}/*.webp")]
files = glob.glob(f"{path}/*")
grouped_images = [InputMediaPhoto(img, caption=caption) for img in files if img.endswith((".png", ".jpg", ".jpeg"))]
grouped_videos = [InputMediaVideo(vid, caption=caption) for vid in files if vid.endswith((".mp4", ".mkv", ".webm"))]
return_list = [grouped_images[imgs : imgs + 5] for imgs in range(0, len(grouped_images), 5)] + [
grouped_videos[vids : vids + 5] for vids in range(0, len(grouped_videos), 5)
]
return return_list
class FakeLogger(object):
@ -432,7 +310,7 @@ class FakeLogger(object):
async def add_h():
message_id = os.environ.get("MESSAGE")
if message_id == None:
if message_id is None:
print("Enter Message id in config.\n")
return 1
try:
@ -441,7 +319,7 @@ async def add_h():
print("Log channel not found.\nCheck the variable for mistakes")
return 1
chat_list.clear()
if msg == None:
if msg is None:
print("Message not found\nCheck variable for mistakes\n")
return 1
try:
@ -452,27 +330,16 @@ async def add_h():
chat_list.extend(chats_list)
social_handler = bot.add_handler(
MessageHandler(
dl,
(
(
dl,((
filters.regex(r"^https://www.instagram.com/*")
| filters.regex(r"^https://youtube.com/shorts/*")
| filters.regex(r"^https://twitter.com/*")
| filters.regex(r"^https://vm.tiktok.com/*")
)
& filters.chat(chat_list)
),
),
| filters.regex(r"^https://www.reddit.com/*")
) & filters.chat(chat_list))),
group=1,
)
reddit_handler = bot.add_handler(
MessageHandler(
reddit_dl,
(filters.regex(r"^https://www.reddit.com/*") & filters.chat(chat_list)),
),
group=2,
)
handler_.extend([social_handler, reddit_handler])
handler_.append(social_handler)
async def boot():