RedditDL improved, Download mutiple media in async and some bug fixes.

2025-02-20 11:13:19 +08:00 · 2023-02-20 22:21:53 +05:30 · 2023-02-20 22:21:53 +05:30 · e355eab67f
commit e355eab67f
parent 09262818a2
2 changed files with 136 additions and 265 deletions
--- a/README.md
+++ b/README.md
@ -1,10 +1,32 @@
-# Light weight Instagram DL bot.
+### Light weight Social Media downloader bot.
+  * Supported Platforms:
+    * Videos: Instagram, Tiktok, Twitter, YouTube Shorts
+    * Images: Instagram, Reddit
+    * Gif : Reddit

-# Deploy:
+### Usage and Commands:
+ * Send supported links in any authorised chat/channel, bot will try to download and send the media.  
+ * Owner only commands:
+   * `.dl link` to download and send media in any chat.
+   * `.bot update` to refresh chat list without restarting bot.
+   * `.bot restart` to restart bot.
+   * `.bot ids` to get chat / channel / user IDs.
+   * `.bot join or leave` to join / leave chat using ID.
+   * `.del` reply to a message to delete.
+   * `.term` to run shell commands in bot. Example: `.term ls`
+   * These commands can be used anywhere and are not limited to authorised chats.
+
+### Deploy:
+ * For android local deploy:
+     * Download Latest [Termux](https://github.com/termux/termux-app/releases).
+        ```bash
+        # Update local packages after installing Termux.
+        yes|apt update && yes|apt upgrade
+        ```

 * Config:
   * Get API_ID and API_HASH from https://my.telegram.org/auth .
-   * Generate String Session by running this in termux: 
+   * Generate String Session by running this in Termux: 
     ```bash 
     bash -c "$(curl -fsSL https://raw.githubusercontent.com/ux-termux/string/main/Termux.sh)" 
     ```
@ -26,13 +48,6 @@
   * User : Your user id to control bot.
   * Trigger : Trigger to access bot.

-
- * Download Latest [Termux](https://github.com/termux/termux-app/releases).
-    ```bash
-    # Update local packages after installing Termux.
-    yes|apt update && yes|apt upgrade
-    ```
-
 * Run the following commands:
    ```bash
    # Install required packages.
@ -57,25 +72,14 @@

 * If everything is correct you will get <b><i>Started</i></b> stdout in terminal and in your channel.

-# Usage and Commands:
- * Send Instagram link in any authorised chat/channel, bot will try to download and send the media.  
- * Owner only commands:
-   * `.dl link` to download and send media in any chat.
-   * `.rdl` to download media from reddit.
-   * `.bot update` to refresh chat list without restarting bot.
-   * `.bot restart` to restart bot.
-   * `.bot ids` to get chat / channel / user IDs.
-   * `.bot join or leave` to join / leave chat using ID.
-   * `.del` to delete message.
-   * These commands can be used anywhere and are not limited to authorised chats.

-# Known limitations:
+### Known limitations:
 * If deployed on a VPS or any server Instragram might block access to some content.  
 After hitting Instagram's rate limit image download might not work because servers and vps usually have static IP and Instagram would block access.
- * Deploying it locally would solve all of those issues since most of us have dynamic IP and Instagram will not be able to block access.  
+ * Deploying it locally would solve all of those issues because most of us are likely to have dynamic IP so Instagram will not be able to block access.  
 Bot is made lightweight with local deploys in mind. But battery life will take some hit anyway.
 * Logging in with your Instagram which would solve the rate-limit issues is not added and won't be added because 2 of my accounts were suspended till manual verification for using scrapping bots like these with login.  

-# Contact
+### Contact
 * For any questions related to deploy or issues contact me on  
 [Telegram](https://t.me/anonymousx97)
--- a/socialbot.py
+++ b/socialbot.py
@ -1,5 +1,6 @@
 import asyncio
 import base64
+import glob
 import json
 import os
 import shutil
@ -13,7 +14,7 @@ import yt_dlp
 from dotenv import load_dotenv
 from pyrogram import Client, filters, idle
 from pyrogram.enums import ChatType
-from pyrogram.errors import MediaEmpty, PhotoSaveFileInvalid, WebpageCurlFailed, PeerIdInvalid
+from pyrogram.errors import MediaEmpty, PeerIdInvalid, PhotoSaveFileInvalid, WebpageCurlFailed
 from pyrogram.handlers import MessageHandler
 from pyrogram.types import InputMediaPhoto, InputMediaVideo, Message
 from wget import download
@ -21,14 +22,9 @@ from wget import download
 if os.path.isfile("config.env"):
    load_dotenv("config.env")

-bot = Client(
-    name="bot",
-    session_string=os.environ.get("STRING_SESSION"),
-    api_id=os.environ.get("API_ID"),
-    api_hash=os.environ.get("API_HASH"),
-)
+bot = Client(name="bot", session_string=os.environ.get("STRING_SESSION"), api_id=os.environ.get("API_ID"), api_hash=os.environ.get("API_HASH"))
 log_chat = os.environ.get("LOG")
-if log_chat == None:
+if log_chat is None:
    print("Enter log channel id in config")
    exit()
 chat_list = []
@ -94,16 +90,12 @@ async def multi_func(bot, message: Message):
        await bot.send_message(chat_id=log_chat, text=str(traceback.format_exc()))


-@bot.on_message(
-    filters.command(commands="term", prefixes=trigger) & filters.user(users)
-)
+@bot.on_message(filters.command(commands="term", prefixes=trigger) & filters.user(users))
 async def run_cmd(bot, message: Message):
    """Function to run shell commands"""
    cmd = message.text.replace("+term", "")
    status_ = await message.reply("executing...")
-    process = await asyncio.create_subprocess_shell(
-        cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
-    )
+    process = await asyncio.create_subprocess_shell(cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE)
    stdout, stderr = await process.communicate()

    if process.returncode is not None:
@ -126,15 +118,14 @@ async def delete_message(bot, message: Message):

@bot.on_message(filters.command(commands="dl", prefixes=trigger) & filters.user(users))
 async def dl(bot, message: Message):
-    """ The main Logic Function to download media """
-    response = await bot.send_message(message.chat.id, "`trying to download...`")
+    """The main Logic Function to download media"""
    rw_message = message.text.split()
+    reply = message.reply_to_message
+    reply_id = reply.id if reply else None
+    sender_ = message.author_signature or message.from_user.first_name or ""
+    response = await bot.send_message(message.chat.id, "`trying to download...`")
    curse_ = ""
-    caption = "Shared by : "
-    if message.sender_chat:
-        caption += message.author_signature
-    else:
-        caption += message.from_user.first_name
+    caption = f"Shared by : {sender_}"
    check_dl = "failed"
    if "-d" in rw_message:
        doc = True
@ -146,69 +137,35 @@ async def dl(bot, message: Message):
            curse_ = "#FuckInstagram"
            if check_dl == "failed":
                check_dl = await json_dl(iurl=i, caption=caption, doc=doc)
-
-        if "twitter.com" in i or "https://youtube.com/shorts" in i or "tiktok.com" in i:
+        elif "twitter.com" in i or "https://youtube.com/shorts" in i or "tiktok.com" in i:
            check_dl = await iyt_dl(url=i)
-
+        elif "www.reddit.com" in i:
+            check_dl = await reddit_dl(url_=i, doc=doc, sender_=sender_)
+            curse_ = "Link doesn't contain any media or is restricted\nTip: Make sure you are sending original post url and not an embedded post."
+        else:
+            pass
        if isinstance(check_dl, dict):
-            """Send Media if response from check dl contains data dict"""
            if isinstance(check_dl["media"], list):
-                for data_ in check_dl["media"]:
+                for vv in check_dl["media"]:
                    if isinstance(vv, list):
-                        """Send Grouped Media if data contains a list made of smaller lists of 5 medias"""
-                        await bot.send_media_group(
-                            message.chat.id,
-                            media=data_,
-                            reply_to_message_id=message.reply_to_message.id
-                            if message.reply_to_message
-                            else None,
-                        )
+                        await bot.send_media_group(message.chat.id, media=vv, reply_to_message_id=reply_id)
                        await asyncio.sleep(3)
                    else:
-                        """Send Document if data is list of media files"""
-                        await bot.send_document(
-                            message.chat.id,
-                            document=data_,
-                            caption=caption,
-                            reply_to_message_id=message.reply_to_message.id
-                            if message.reply_to_message
-                            else None,
-                        )
-            """ If media isn't a list then it's a single file to be sent """
-            if isinstance(check_dl["media"], str):
-                if doc:
-                    await bot.send_document(
-                        message.chat.id,
-                        document=check_dl["media"],
-                        caption=caption,
-                        reply_to_message_id=message.reply_to_message.id
-                        if message.reply_to_message
-                        else None,
-                    )
+                        await bot.send_document(message.chat.id, document=vv, caption=check_dl["caption"] + caption, reply_to_message_id=reply_id, force_document=True)
+            else:
+                if doc:
+                    await bot.send_document(message.chat.id, document=check_dl["media"], caption=check_dl["caption"] + caption, reply_to_message_id=reply_id, force_document=True)
                else:
-                    if check_dl["type"] == "img":
-                        await bot.send_photo(
-                            message.chat.id,
-                            photo=check_dl["media"],
-                            caption=caption,
-                            reply_to_message_id=message.reply_to_message.id
-                            if message.reply_to_message
-                            else None,
-                        )
-                    if check_dl["type"] == "vid":
                    try:
-                            await bot.send_video(
-                                message.chat.id,
-                                video=check_dl["media"],
-                                caption=caption,
-                                thumb=check_dl["thumb"]
-                                if os.path.isfile(check_dl["thumb"])
-                                else None,
-                                reply_to_message_id=message.reply_to_message.id
-                                if message.reply_to_message
-                                else None,
-                            )
-                        except (MediaEmpty, WebpageCurlFailed):
+                        if check_dl["type"] == "img":
+                            await bot.send_photo(message.chat.id, photo=check_dl["media"], caption=check_dl["caption"] + caption, reply_to_message_id=reply_id)
+                        elif check_dl["type"] == "vid":
+                            await bot.send_video(message.chat.id, video=check_dl["media"], caption=check_dl["caption"] + caption, thumb=check_dl["thumb"], reply_to_message_id=reply_id)
+                        else:
+                            await bot.send_animation(message.chat.id, animation=check_dl["media"], caption=check_dl["caption"] + caption, reply_to_message_id=reply_id, unsave=True)
+                    except PhotoSaveFileInvalid:
+                        await bot.send_document(message.chat.id, document=check_dl["media"], caption=check_dl["caption"] + caption, reply_to_message_id=reply_id)
+                    except (MediaEmpty, WebpageCurlFailed, ValueError):
                        pass
            if os.path.exists(str(check_dl["path"])):
                shutil.rmtree(str(check_dl["path"]))
@ -222,33 +179,18 @@ async def dl(bot, message: Message):

 async def iyt_dl(url: str):
    """Stop handling post url because this only downloads Videos and post might contain images"""
-    if url.startswith("https://www.instagram.com/p/"):
+    if not url.startswith("https://www.instagram.com/reel/"):
        return "failed"
    path_ = time.time()
    video = f"{path_}/v.mp4"
    thumb = f"{path_}/i.png"
-    _opts = {
-        "outtmpl": video,
-        "ignoreerrors": True,
-        "ignore_no_formats_error": True,
-        "format": "bv[ext=mp4]+ba[ext=m4a]/b[ext=mp4]",
-        "quiet": True,
-        "logger": FakeLogger(),
-    }
+    _opts = {"outtmpl": video, "ignoreerrors": True, "ignore_no_formats_error": True, "format": "bv[ext=mp4]+ba[ext=m4a]/b[ext=mp4]", "quiet": True, "logger": FakeLogger()}
    return_val = "failed"
    try:
        yt_dlp.YoutubeDL(_opts).download(url)
        if os.path.isfile(video):
-            call(
-                f'''ffmpeg -hide_banner -loglevel error -ss 0.1 -i "{video}" -vframes 1 "{thumb}"''',
-                shell=True,
-            )
-            return_val = {
-                "path": str(path_),
-                "type": "vid",
-                "media": video,
-                "thumb": thumb,
-            }
+            call(f'''ffmpeg -hide_banner -loglevel error -ss 0.1 -i "{video}" -vframes 1 "{thumb}"''', shell=True)
+            return_val = {"path": str(path_), "type": "vid", "media": video, "thumb": thumb if os.path.isfile(thumb) else None, "caption": ""}
    except BaseException:
        pass
    return return_val
@ -274,149 +216,85 @@ async def json_dl(iurl: str, doc: bool, caption: str):
            if url["__typename"] == "GraphVideo":
                url_ = url["video_url"]
                wget_x = download(url_, d_dir)
-                call(
-                    f'''ffmpeg -hide_banner -loglevel error -ss 0.1 -i "{wget_x}" -vframes 1 "{d_dir}/i.png"''',
-                    shell=True,
-                )
-                return_val = { "path": d_dir, "type": "vid", "media": wget_x, "thumb": d_dir + "/i.png" }
+                call(f'''ffmpeg -hide_banner -loglevel error -ss 0.1 -i "{wget_x}" -vframes 1 "{d_dir}/i.png"''', shell=True)
+                return_val = {"path": d_dir, "type": "vid", "media": wget_x, "thumb": d_dir + "/i.png", "caption": ""}

            if url["__typename"] == "GraphImage":
                url_ = url["display_url"]
                wget_x = download(url_, d_dir + "/i.jpg")
-                return_val = { "path": d_dir, "type": "img", "media": wget_x, "thumb": "" }
+                return_val = {"path": d_dir, "type": "img", "media": wget_x, "thumb": None, "caption": ""}

            if url["__typename"] == "GraphSidecar":
-                doc_list = []
-                vlist = []
-                vlist2 = []
-                plist = []
-                plist2 = []
+                url_list = []
                for i in url["edge_sidecar_to_children"]["edges"]:
                    if i["node"]["__typename"] == "GraphImage":
-                        url_ = i["node"]["display_url"]
-                        wget_x = download(url_, d_dir)
-                        if wget_x.endswith(".webp"):
-                            os.rename(wget_x, wget_x + ".jpg")
-                            wget_x = wget_x + ".jpg"
-                        if doc:
-                            doc_list.append(wget_x)
-                        else:
-                            if len(plist) >= 5:
-                                plist2.append(InputMediaPhoto(media=wget_x, caption=caption))
-                            else:
-                                plist.append(InputMediaPhoto(media=wget_x, caption=caption))
+                        url_list.append(i["node"]["display_url"])
                    if i["node"]["__typename"] == "GraphVideo":
-                        url_ = i["node"]["video_url"]
-                        wget_x = download(url_, d_dir)
-                        if doc:
-                            doc_list.append(wget_x)
-                        else:
-                            if len(vlist) >= 5:
-                                vlist2.append(InputMediaVideo(media=wget_x, caption=caption))
-                            else:
-                                vlist.append(InputMediaVideo(media=wget_x, caption=caption))
-                if doc:
-                    return_val = {"path": d_dir, "media": doc_list}
-                else:
-                    return_val = {
-                        "path": d_dir,
-                        "media": [
-                            zz for zz in [plist, plist2, vlist, vlist2] if len(zz) > 0
-                        ],
-                    }
+                        url_list.append(i["node"]["video_url"])
+                downloads = await async_download(urls=url_list, path=d_dir, doc=doc, caption=caption + "\n..")
+                return_val = {"path": d_dir, "media": downloads}
        except Exception:
            await bot.send_message(chat_id=log_chat, text=str(traceback.format_exc()))
    return return_val


-@bot.on_message(filters.command(commands="rdl", prefixes=trigger) & filters.user(users))
 async def reddit_dl(bot, message: Message):
-    ext = None
-    del_link = True
-    rw_message = message.text.split()
-    response = await bot.send_message(
-        chat_id=message.chat.id, text="Trying to download..."
-    )
-    if message.sender_chat:
-        sender_ = message.author_signature
-    else:
-        sender_ = message.from_user.first_name
-    for link_ in rw_message:
-        if link_.startswith("https://www.reddit.com"):
-            link = link_.split("/?")[0] + ".json?limit=1"
-            headers = {
-                "user-agent": "Mozilla/5.0 (Macintosh; PPC Mac OS X 10_8_7 rv:5.0; en-US) AppleWebKit/533.31.5 (KHTML, like Gecko) Version/4.0 Safari/533.31.5",
-            }
+    link = url_.split("/?")[0] + ".json?limit=1"
+    headers = {"user-agent": "Mozilla/5.0 (Macintosh; PPC Mac OS X 10_8_7 rv:5.0; en-US) AppleWebKit/533.31.5 (KHTML, like Gecko) Version/4.0 Safari/533.31.5"}
+    return_val = "failed"
    try:
-                async with aiohttp.ClientSession() as session:
-                    async with session.get(link, headers=headers) as ss:
-                        session_response = await ss.json()
-                json_ = session_response[0]["data"]["children"][0]["data"]
-                check_ = json_["secure_media"]
-                title_ = json_["title"]
-                subr = json_["subreddit_name_prefixed"]
-                caption = f"__{subr}:__\n**{title_}**\n\nShared by : {sender_}"
+        async with (aiohttp.ClientSession() as session, session.get(link, headers=headers) as ss):
+            response = await ss.json()
+        json_ = response[0]["data"]["children"][0]["data"]
+        caption = f'__{json_["subreddit_name_prefixed"]}:__\n**{json_["title"]}**\n\n'
        d_dir = str(time.time())
        os.mkdir(d_dir)
-                if isinstance(check_, dict):
-                    v = f"{d_dir}/v.mp4"
-                    t = f"{d_dir}/i.png"
-                    if "oembed" in check_:
-                        vid_url = json_["preview"]["reddit_video_preview"]["fallback_url"]
-                        await bot.send_animation(
-                            chat_id=message.chat.id,
-                            animation=vid_url,
-                            unsave=True,
-                            caption=caption,
-                        )
+        is_vid, is_gallery = json_.get("is_video"), json_.get("is_gallery")
+
+        if is_vid:
+            video = f"{d_dir}/v.mp4"
+            thumb = f"{d_dir}/i.png"
+            vid_url = json_["secure_media"]["reddit_video"]["hls_url"]
+            call(f'ffmpeg -hide_banner -loglevel error -i "{vid_url.strip()}" -c copy {video}', shell=True)
+            call(f'''ffmpeg -hide_banner -loglevel error -ss 0.1 -i "{video}" -vframes 1 "{thumb}"''', shell=True)
+            return_val = {"path": d_dir, "type": "vid", "media": video, "thumb": thumb, "caption": caption}
+
+        elif is_gallery:
+            grouped_media_urls = [f'https://i.redd.it/{i["media_id"]}.jpg' for i in json_["gallery_data"]["items"]]
+            downloads = await async_download(urls=grouped_media_urls, path=d_dir, doc=doc, caption=caption + f"Shared by : {sender_}")
+            return_val = {"path": d_dir, "media": downloads}
+
        else:
-                        vid_url = check_["reddit_video"]["hls_url"]
-                        call(
-                            f'ffmpeg -hide_banner -loglevel error -i "{vid_url.strip()}" -c copy {v}',
-                            shell=True,
-                        )
-                        call(
-                            f'''ffmpeg -ss 0.1 -i "{v}" -vframes 1 "{t}"''', shell=True
-                        )
-                        await message.reply_video(v, caption=caption, thumb=t)
+            media_ = json_["url_overridden_by_dest"].strip()
+            if media_.endswith((".jpg", ".jpeg", ".png", ".webp")):
+                img = download(media_, d_dir)
+                return_val = {"path": d_dir, "type": "img", "media": img, "thumb": None, "caption": caption}
+            elif media_.endswith(".gif"):
+                gif = download(media_, d_dir)
+                return_val = {"path": d_dir, "type": "animation", "media": gif, "thumb": None, "caption": caption}
            else:
-                    media_ = json_["url_overridden_by_dest"]
-                    try:
-                        if media_.strip().endswith(".gif"):
-                            ext = ".gif"
-                            await bot.send_animation(
-                                chat_id=message.chat.id,
-                                animation=media_,
-                                unsave=True,
-                                caption=caption,
-                            )
-                        if media_.strip().endswith((".jpg", ".jpeg", ".png", ".webp")):
-                            ext = ".png"
-                            await message.reply_photo(media_, caption=caption)
-                    except (MediaEmpty, WebpageCurlFailed):
-                        download(media_, f"{d_dir}/i{ext}")
-                        if ext == ".gif":
-                            await bot.send_animation(
-                                chat_id=message.chat.id,
-                                animation=f"{d_dir}/i.gif",
-                                unsave=True,
-                                caption=caption,
-                            )
-                        else:
-                            try:
-                                await message.reply_photo(f"{d_dir}/i.png", caption=caption)
-                            except PhotoSaveFileInvalid:
-                                await message.reply_document(document=f"{d_dir}/i.png", caption=caption)
-                if os.path.exists(str(d_dir)):
-                    shutil.rmtree(str(d_dir))
+                gif_url = json_.get("preview", {}).get("reddit_video_preview", {}).get("fallback_url")
+                if gif_url:
+                    gif = download(gif_url, d_dir)
+                    return_val = {"path": d_dir, "type": "animation", "media": gif, "thumb": None, "caption": caption}
+
    except Exception:
-                del_link = False
        await bot.send_message(chat_id=log_chat, text=str(traceback.format_exc()))
-                await response.edit("Link doesn't contain any media or is restricted\nTip: Make sure you are sending original post url and not an embedded post.")
-            continue
-    if del_link:
-        await message.delete()
-        await response.delete()
+    return return_val
+
+
+async def async_download(urls: list, path: str, doc: bool = False, caption: str = ""):
+    down_loads = await asyncio.gather(*[asyncio.to_thread(download, url, path) for url in urls])
+    if doc:
+        return down_loads
+    [os.rename(file, file + ".png") for file in glob.glob(f"{path}/*.webp")]
+    files = glob.glob(f"{path}/*")
+    grouped_images = [InputMediaPhoto(img, caption=caption) for img in files if img.endswith((".png", ".jpg", ".jpeg"))]
+    grouped_videos = [InputMediaVideo(vid, caption=caption) for vid in files if vid.endswith((".mp4", ".mkv", ".webm"))]
+    return_list = [grouped_images[imgs : imgs + 5] for imgs in range(0, len(grouped_images), 5)] + [
+        grouped_videos[vids : vids + 5] for vids in range(0, len(grouped_videos), 5)
+    ]
+    return return_list


 class FakeLogger(object):
@ -432,7 +310,7 @@ class FakeLogger(object):

 async def add_h():
    message_id = os.environ.get("MESSAGE")
-    if message_id == None:
+    if message_id is None:
        print("Enter Message id in config.\n")
        return 1
    try:
@ -441,7 +319,7 @@ async def add_h():
        print("Log channel not found.\nCheck the variable for mistakes")
        return 1
    chat_list.clear()
-    if msg == None:
+    if msg is None:
        print("Message not found\nCheck variable for mistakes\n")
        return 1
    try:
@ -452,27 +330,16 @@ async def add_h():
    chat_list.extend(chats_list)
    social_handler = bot.add_handler(
        MessageHandler(
-            dl,
-            (
-                (
+            dl,((
                    filters.regex(r"^https://www.instagram.com/*")
                    | filters.regex(r"^https://youtube.com/shorts/*")
                    | filters.regex(r"^https://twitter.com/*")
                    | filters.regex(r"^https://vm.tiktok.com/*")
-                )
-                & filters.chat(chat_list)
-            ),
-        ),
+                    | filters.regex(r"^https://www.reddit.com/*")
+                ) & filters.chat(chat_list))),
        group=1,
    )
-    reddit_handler = bot.add_handler(
-        MessageHandler(
-            reddit_dl,
-            (filters.regex(r"^https://www.reddit.com/*") & filters.chat(chat_list)),
-        ),
-        group=2,
-    )
-    handler_.extend([social_handler, reddit_handler])
+    handler_.append(social_handler)


 async def boot():