apify-scrapers — Skillopedia

Apify Scrapers Overview Scrape content from major social platforms using Apify actors. Each platform has optimized settings for cost and quality. Quick Decision Tree Environment Setup Get your API key: https://console.apify.com/account/integrations Common Usage Patterns Scrape Twitter Trends Scrape Reddit Discussions Scrape LinkedIn Author Auto-detect and Scrape URL Scrape Instagram Profile Scrape Instagram Hashtag Scrape Instagram Reels Scrape Facebook Page Scrape Facebook Reviews Scrape Facebook Marketplace Scrape Google Maps Businesses Scrape Google Maps Reviews Extract Contact Info from W…

\n return bool(re.match(pattern, url))\n\n\ndef run_linkedin_scraper(\n mode: str,\n inputs: list,\n max_posts: int = 30,\n scrape_comments: bool = False,\n scrape_reactions: bool = False,\n max_reactions: int = 5\n) -> dict:\n \"\"\"\n Run the LinkedIn scraper Actor.\n\n Args:\n mode: 'author' or 'search'\n inputs: List of author URLs or search queries\n max_posts: Maximum posts to retrieve\n scrape_comments: Whether to scrape post comments\n scrape_reactions: Whether to scrape reaction details\n max_reactions: Max reactions to scrape per post\n\n Returns:\n dict: Scraper results with post data\n \"\"\"\n print(f\"🚀 Starting LinkedIn scraper in {mode} mode\")\n print(f\"📊 Max posts: {max_posts}\")\n print(f\"📝 Input: {inputs}\")\n\n # Initialize Apify client\n client = ApifyClient(APIFY_TOKEN)\n\n # Build Actor input based on mode\n run_input = {\n \"maxPosts\": max_posts,\n \"scrapeComments\": scrape_comments,\n \"scrapeReactions\": scrape_reactions,\n \"maxReactions\": max_reactions\n }\n\n if mode == \"author\":\n # Validate URLs\n for url in inputs:\n if not validate_linkedin_url(url):\n print(f\"⚠️ Warning: '{url}' may not be a valid LinkedIn profile URL\")\n run_input[\"authorUrls\"] = inputs\n elif mode == \"search\":\n run_input[\"searchQueries\"] = inputs\n else:\n raise ValueError(f\"Invalid mode: {mode}. Use 'author' or 'search'\")\n\n print(f\"⏳ Running Actor with input: {json.dumps(run_input, indent=2)}\")\n\n try:\n run = client.actor(ACTOR_ID).call(run_input=run_input)\n\n print(f\"✅ Actor run completed!\")\n print(f\"📋 Run ID: {run['id']}\")\n print(f\"⏱️ Duration: {run.get('stats', {}).get('runTimeSecs', 'N/A')}s\")\n\n # Fetch results from dataset\n print(\"📥 Fetching results...\")\n dataset_items = list(client.dataset(run[\"defaultDatasetId\"]).iterate_items())\n\n return {\n \"success\": True,\n \"run_id\": run['id'],\n \"dataset_id\": run[\"defaultDatasetId\"],\n \"items\": dataset_items,\n \"count\": len(dataset_items),\n \"mode\": mode,\n \"query\": inputs\n }\n\n except Exception as e:\n print(f\"❌ Actor run failed: {str(e)}\")\n return {\n \"success\": False,\n \"error\": str(e),\n \"items\": [],\n \"count\": 0,\n \"mode\": mode,\n \"query\": inputs\n }\n\n\ndef process_results(results: dict) -> dict:\n \"\"\"\n Process and structure the scraped results.\n\n Args:\n results: Raw results from Apify\n\n Returns:\n dict: Cleaned and structured data\n \"\"\"\n processed_posts = []\n\n for item in results[\"items\"]:\n # Extract engagement metrics from nested 'engagement' object\n engagement = item.get(\"engagement\", {})\n likes = engagement.get(\"likes\", 0)\n comments_count = engagement.get(\"comments\", 0)\n shares = engagement.get(\"shares\", 0)\n\n # Extract author info from nested 'author' object\n author = item.get(\"author\", {})\n\n # Extract posted time from nested 'postedAt' object\n posted_at = item.get(\"postedAt\", {})\n\n # Extract image URLs from 'postImages' array\n post_images = item.get(\"postImages\", [])\n media_urls = [img.get(\"url\") for img in post_images if img.get(\"url\")]\n\n post = {\n \"id\": item.get(\"id\", \"\"),\n \"text\": item.get(\"content\", \"\"),\n \"author_name\": author.get(\"name\", \"\"),\n \"author_url\": author.get(\"linkedinUrl\", \"\"),\n \"author_headline\": author.get(\"info\", \"\"),\n \"author_avatar\": author.get(\"avatar\", {}).get(\"url\", \"\"),\n \"posted_at\": posted_at.get(\"date\", \"\"),\n \"posted_ago\": posted_at.get(\"postedAgoText\", \"\"),\n \"likes\": likes,\n \"comments\": comments_count,\n \"reposts\": shares,\n \"post_url\": item.get(\"linkedinUrl\", \"\"),\n \"media_urls\": media_urls,\n \"hashtags\": item.get(\"hashtags\", []),\n \"engagement_score\": likes + (comments_count * 2) + (shares * 3),\n \"reactions_breakdown\": engagement.get(\"reactions\", [])\n }\n\n # Include comments if scraped\n if item.get(\"comments\"):\n post[\"comment_data\"] = item[\"comments\"]\n\n # Include reactions if scraped\n if item.get(\"reactions\"):\n post[\"reaction_data\"] = item[\"reactions\"]\n\n processed_posts.append(post)\n\n # Sort by engagement score descending\n processed_posts.sort(key=lambda x: x[\"engagement_score\"], reverse=True)\n\n return {\n \"posts\": processed_posts,\n \"scraped_at\": datetime.now().isoformat(),\n \"total_count\": len(processed_posts),\n \"mode\": results.get(\"mode\", \"unknown\"),\n \"query\": results.get(\"query\", []),\n \"run_id\": results.get(\"run_id\", \"\")\n }\n\n\ndef save_results(data: dict, filename: str = None) -> Path:\n \"\"\"\n Save results to .tmp directory.\n\n Args:\n data: Processed post data\n filename: Custom filename (optional)\n\n Returns:\n Path: Output file path\n \"\"\"\n OUTPUT_DIR.mkdir(exist_ok=True)\n\n if not filename:\n timestamp = datetime.now().strftime(\"%Y%m%d_%H%M%S\")\n filename = f\"linkedin_posts_{timestamp}.json\"\n\n output_path = OUTPUT_DIR / filename\n\n with open(output_path, 'w', encoding='utf-8') as f:\n json.dump(data, f, indent=2, ensure_ascii=False)\n\n print(f\"\\n💾 Results saved to: {output_path}\")\n print(f\"📊 Total posts: {data['total_count']}\")\n\n # Print top posts\n if data['posts']:\n print(\"\\n🔥 Top Posts by Engagement:\")\n for i, post in enumerate(data['posts'][:5], 1):\n text_preview = post['text'][:100] + \"...\" if len(post['text']) > 100 else post['text']\n text_preview = text_preview.replace('\\n', ' ')\n print(f\"\\n{i}. {post['author_name']}\")\n print(f\" {text_preview}\")\n print(f\" 👍 {post['likes']} | 💬 {post['comments']} | 🔄 {post['reposts']}\")\n if post['post_url']:\n print(f\" 🔗 {post['post_url']}\")\n\n return output_path\n\n\ndef main():\n \"\"\"Main execution function.\"\"\"\n parser = argparse.ArgumentParser(\n description=\"Scrape LinkedIn posts by author or search query\",\n formatter_class=argparse.RawDescriptionHelpFormatter,\n epilog=\"\"\"\nExamples:\n # Scrape from a profile\n python execution/scrape_linkedin_posts.py author \"https://www.linkedin.com/in/example-user/\"\n\n # Search for posts\n python execution/scrape_linkedin_posts.py search \"AI automation\" \"LLM agents\"\n\n # With options\n python execution/scrape_linkedin_posts.py author \"https://www.linkedin.com/in/user/\" --max-posts 50 --scrape-comments\n \"\"\"\n )\n\n parser.add_argument(\n \"mode\",\n choices=[\"author\", \"search\"],\n help=\"Scraping mode: 'author' for profile URLs, 'search' for keywords\"\n )\n parser.add_argument(\n \"inputs\",\n nargs=\"+\",\n help=\"LinkedIn profile URLs (author mode) or search queries (search mode)\"\n )\n parser.add_argument(\n \"--max-posts\",\n type=int,\n default=30,\n help=\"Maximum posts to scrape (default: 30)\"\n )\n parser.add_argument(\n \"--scrape-comments\",\n action=\"store_true\",\n help=\"Include post comments (increases cost)\"\n )\n parser.add_argument(\n \"--scrape-reactions\",\n action=\"store_true\",\n help=\"Include reaction details (increases cost)\"\n )\n parser.add_argument(\n \"--max-reactions\",\n type=int,\n default=5,\n help=\"Max reactions to scrape per post (default: 5)\"\n )\n parser.add_argument(\n \"--output\",\n help=\"Custom output filename\"\n )\n\n args = parser.parse_args()\n\n try:\n # Validate environment\n validate_environment()\n\n # Run scraper\n results = run_linkedin_scraper(\n mode=args.mode,\n inputs=args.inputs,\n max_posts=args.max_posts,\n scrape_comments=args.scrape_comments,\n scrape_reactions=args.scrape_reactions,\n max_reactions=args.max_reactions\n )\n\n if not results[\"success\"]:\n print(f\"❌ Scraping failed: {results.get('error')}\")\n return 1\n\n if results[\"count\"] == 0:\n print(\"⚠️ No posts found for the given input\")\n return 0\n\n # Process results\n processed_data = process_results(results)\n\n # Save results\n save_results(processed_data, args.output)\n\n print(\"\\n✅ LinkedIn scraping completed successfully!\")\n return 0\n\n except Exception as e:\n print(f\"❌ Error: {str(e)}\")\n import traceback\n traceback.print_exc()\n return 1\n\n\nif __name__ == \"__main__\":\n exit(main())\n","content_type":"text/x-python; charset=utf-8","language":"python","size":10735,"content_sha256":"1dd1f5767e112b50095dbb1a79e62d44d636aabdbbf762680332029504d7a9e1"},{"filename":"scripts/scrape_multi_platform.py","content":"#!/usr/bin/env python3\n\"\"\"\nMulti-Platform Content Scraper\nUnified script to scrape TikTok, YouTube, and Website content using Apify actors.\n\nUsage:\n python execution/scrape_multi_platform.py tiktok --hashtags AI ChatGPT --max-results 20\n python execution/scrape_multi_platform.py youtube --search \"AI tutorial\" --max-results 30\n python execution/scrape_multi_platform.py website --urls https://docs.example.com --max-pages 50\n\"\"\"\n\nimport os\nimport json\nfrom datetime import datetime\nfrom pathlib import Path\nfrom apify_client import ApifyClient\nfrom dotenv import load_dotenv\nimport argparse\n\n# Load environment variables\nload_dotenv()\n\n# Configuration\nAPIFY_TOKEN = os.getenv(\"APIFY_TOKEN\")\nOUTPUT_DIR = Path(__file__).parent.parent / \".tmp\"\n\n# Actor IDs\nACTORS = {\n \"tiktok\": \"clockworks/tiktok-scraper\",\n \"youtube\": \"streamers/youtube-scraper\",\n \"website\": \"apify/website-content-crawler\"\n}\n\ndef validate_environment():\n \"\"\"Validate required environment variables.\"\"\"\n if not APIFY_TOKEN:\n raise ValueError(\n \"APIFY_TOKEN not found in environment. \"\n \"Please add it to your .env file.\"\n )\n\ndef scrape_tiktok(hashtags=None, max_results=50, download_videos=False):\n \"\"\"Scrape TikTok content.\"\"\"\n client = ApifyClient(APIFY_TOKEN)\n\n # Build start URLs from hashtags\n if hashtags:\n start_urls = [f\"https://www.tiktok.com/tag/{tag.replace('#', '')}\" for tag in hashtags]\n else:\n start_urls = [\n \"https://www.tiktok.com/tag/ai\",\n \"https://www.tiktok.com/tag/chatgpt\",\n \"https://www.tiktok.com/tag/machinelearning\"\n ]\n\n run_input = {\n \"startUrls\": start_urls,\n \"resultsLimit\": max_results,\n \"shouldDownloadVideos\": download_videos,\n \"shouldDownloadCovers\": True,\n \"shouldDownloadSubtitles\": True\n }\n\n print(f\"🎵 Starting TikTok scraper for hashtags: {', '.join([url.split('/')[-1] for url in start_urls])}\")\n print(f\"📊 Max results: {max_results}\")\n\n run = client.actor(ACTORS[\"tiktok\"]).call(run_input=run_input)\n dataset_items = list(client.dataset(run[\"defaultDatasetId\"]).iterate_items())\n\n return {\"videos\": dataset_items, \"scraped_at\": datetime.now().isoformat()}\n\ndef scrape_youtube(search_query=None, max_results=50, download_subtitles=True):\n \"\"\"Scrape YouTube content.\"\"\"\n client = ApifyClient(APIFY_TOKEN)\n\n run_input = {\n \"maxResults\": max_results,\n \"downloadSubtitles\": download_subtitles,\n \"subtitlesLanguage\": \"en\"\n }\n\n if search_query:\n run_input[\"searchKeywords\"] = search_query\n print(f\"📹 Starting YouTube scraper for query: '{search_query}'\")\n else:\n run_input[\"searchKeywords\"] = \"AI tutorial\"\n print(f\"📹 Starting YouTube scraper with default query: 'AI tutorial'\")\n\n print(f\"📊 Max results: {max_results}\")\n\n run = client.actor(ACTORS[\"youtube\"]).call(run_input=run_input)\n dataset_items = list(client.dataset(run[\"defaultDatasetId\"]).iterate_items())\n\n return {\"videos\": dataset_items, \"scraped_at\": datetime.now().isoformat()}\n\ndef scrape_website(urls, max_pages=100, output_format=\"markdown\"):\n \"\"\"Scrape website content for RAG/LLM.\"\"\"\n client = ApifyClient(APIFY_TOKEN)\n\n if isinstance(urls, str):\n urls = [urls]\n\n start_urls = [{\"url\": url} for url in urls]\n\n run_input = {\n \"startUrls\": start_urls,\n \"maxCrawlPages\": max_pages,\n \"crawlerType\": \"cheerio\", # Fast, no JS rendering\n }\n\n print(f\"🌐 Starting Website Content Crawler\")\n print(f\"📋 URLs: {', '.join(urls)}\")\n print(f\"📊 Max pages: {max_pages}\")\n\n run = client.actor(ACTORS[\"website\"]).call(run_input=run_input)\n dataset_items = list(client.dataset(run[\"defaultDatasetId\"]).iterate_items())\n\n return {\"pages\": dataset_items, \"scraped_at\": datetime.now().isoformat()}\n\ndef save_results(data, platform, filename=None):\n \"\"\"Save results to .tmp directory.\"\"\"\n OUTPUT_DIR.mkdir(exist_ok=True)\n\n if not filename:\n timestamp = datetime.now().strftime(\"%Y%m%d_%H%M%S\")\n filename = f\"{platform}_content_{timestamp}.json\"\n\n output_path = OUTPUT_DIR / filename\n\n with open(output_path, 'w', encoding='utf-8') as f:\n json.dump(data, f, indent=2, ensure_ascii=False)\n\n print(f\"\\n💾 Results saved to: {output_path}\")\n\n # Print summary stats\n if platform == \"tiktok\":\n print(f\"📊 Total videos: {len(data.get('videos', []))}\")\n for i, video in enumerate(data.get('videos', [])[:3], 1):\n print(f\"\\n{i}. {video.get('text', 'No caption')[:50]}...\")\n print(f\" 👤 @{video.get('authorMeta', {}).get('name', 'unknown')}\")\n print(f\" ❤️ {video.get('diggCount', 0)} | 💬 {video.get('commentCount', 0)}\")\n\n elif platform == \"youtube\":\n print(f\"📊 Total videos: {len(data.get('videos', []))}\")\n for i, video in enumerate(data.get('videos', [])[:3], 1):\n print(f\"\\n{i}. {video.get('title', 'No title')[:60]}...\")\n print(f\" 📺 {video.get('channelName', 'unknown')}\")\n print(f\" 👁️ {video.get('viewCount', 0)} views\")\n\n elif platform == \"website\":\n print(f\"📊 Total pages: {len(data.get('pages', []))}\")\n for i, page in enumerate(data.get('pages', [])[:3], 1):\n print(f\"\\n{i}. {page.get('title', 'No title')[:60]}...\")\n print(f\" 🔗 {page.get('url', 'unknown')}\")\n\n return output_path\n\ndef main():\n \"\"\"Main execution function.\"\"\"\n parser = argparse.ArgumentParser(\n description=\"Multi-platform content scraper (TikTok, YouTube, Website)\"\n )\n\n subparsers = parser.add_subparsers(dest=\"platform\", help=\"Platform to scrape\")\n\n # TikTok subcommand\n tiktok_parser = subparsers.add_parser(\"tiktok\", help=\"Scrape TikTok content\")\n tiktok_parser.add_argument(\"--hashtags\", nargs=\"+\", help=\"Hashtags to scrape\")\n tiktok_parser.add_argument(\"--max-results\", type=int, default=50)\n tiktok_parser.add_argument(\"--download-videos\", action=\"store_true\")\n tiktok_parser.add_argument(\"--output\", help=\"Custom output filename\")\n\n # YouTube subcommand\n youtube_parser = subparsers.add_parser(\"youtube\", help=\"Scrape YouTube content\")\n youtube_parser.add_argument(\"--search\", help=\"Search query\")\n youtube_parser.add_argument(\"--max-results\", type=int, default=50)\n youtube_parser.add_argument(\"--no-subtitles\", action=\"store_true\")\n youtube_parser.add_argument(\"--output\", help=\"Custom output filename\")\n\n # Website subcommand\n website_parser = subparsers.add_parser(\"website\", help=\"Crawl website content\")\n website_parser.add_argument(\"--urls\", nargs=\"+\", required=True, help=\"URLs to crawl\")\n website_parser.add_argument(\"--max-pages\", type=int, default=100)\n website_parser.add_argument(\"--output\", help=\"Custom output filename\")\n\n args = parser.parse_args()\n\n if not args.platform:\n parser.print_help()\n return 1\n\n try:\n # Validate environment\n validate_environment()\n\n # Run appropriate scraper\n if args.platform == \"tiktok\":\n results = scrape_tiktok(\n hashtags=args.hashtags,\n max_results=args.max_results,\n download_videos=args.download_videos\n )\n elif args.platform == \"youtube\":\n results = scrape_youtube(\n search_query=args.search,\n max_results=args.max_results,\n download_subtitles=not args.no_subtitles\n )\n elif args.platform == \"website\":\n results = scrape_website(\n urls=args.urls,\n max_pages=args.max_pages\n )\n\n # Save results\n save_results(results, args.platform, getattr(args, 'output', None))\n\n print(\"\\n✅ Scraping completed successfully!\")\n return 0\n\n except Exception as e:\n print(f\"❌ Error: {str(e)}\")\n import traceback\n traceback.print_exc()\n return 1\n\nif __name__ == \"__main__\":\n exit(main())\n","content_type":"text/x-python; charset=utf-8","language":"python","size":8105,"content_sha256":"5cf1d88c25cecc5cd3028df9caa6fa7c582991ca54cdf0370c2a12d12290ca56"},{"filename":"scripts/scrape_reddit_ai_tech.py","content":"#!/usr/bin/env python3\n\"\"\"\nReddit AI/Tech Trends Scraper\nScrapes trending posts from AI and tech-focused subreddits using Apify.\n\nUsage:\n python execution/scrape_reddit_ai_tech.py [--max-posts 50] [--sort hot]\n\"\"\"\n\nimport os\nimport json\nfrom datetime import datetime\nfrom pathlib import Path\nfrom apify_client import ApifyClient\nfrom dotenv import load_dotenv\nimport argparse\n\n# Load environment variables\nload_dotenv()\n\n# Configuration\nAPIFY_TOKEN = os.getenv(\"APIFY_TOKEN\")\nACTOR_ID = \"trudax/reddit-scraper-lite\" # Free tier actor\nOUTPUT_DIR = Path(__file__).parent.parent / \".tmp\"\n\n# AI/Tech-focused subreddits\nDEFAULT_SUBREDDITS = [\n \"r/artificial\",\n \"r/MachineLearning\",\n \"r/LocalLLaMA\",\n \"r/ChatGPT\",\n \"r/OpenAI\",\n \"r/ClaudeAI\",\n \"r/singularity\",\n \"r/technology\",\n \"r/Futurology\"\n]\n\ndef validate_environment():\n \"\"\"Validate required environment variables.\"\"\"\n if not APIFY_TOKEN:\n raise ValueError(\n \"APIFY_TOKEN not found in environment. \"\n \"Please add it to your .env file.\"\n )\n\ndef run_reddit_scraper(\n subreddits=None,\n search_terms=None,\n max_posts=50,\n max_comments=20,\n sort_by=\"hot\",\n time_filter=\"week\"\n):\n \"\"\"\n Run the Reddit scraper Actor.\n\n Args:\n subreddits (list): List of subreddit names\n search_terms (str): Search keywords\n max_posts (int): Maximum number of posts to scrape\n max_comments (int): Maximum comments per post\n sort_by (str): Sort order (hot, top, new, relevance)\n time_filter (str): Time filter (hour, day, week, month, year)\n\n Returns:\n dict: Scraper results with post data\n \"\"\"\n if subreddits is None:\n subreddits = DEFAULT_SUBREDDITS\n\n print(f\"🚀 Starting Reddit scraper for subreddits: {', '.join(subreddits)}\")\n print(f\"📊 Max posts: {max_posts}, Sort: {sort_by}, Time: {time_filter}\")\n\n # Initialize Apify client\n client = ApifyClient(APIFY_TOKEN)\n\n # Prepare Actor input - use search in communities for better results\n communities = [sub.replace(\"r/\", \"\") for sub in subreddits]\n\n run_input = {\n \"maxItems\": max_posts,\n \"maxComments\": max_comments,\n \"sort\": sort_by,\n \"time\": time_filter,\n }\n\n # Use search terms if provided, otherwise use start URLs\n if search_terms:\n # Build search queries for each community\n run_input[\"searches\"] = [f\"{search_terms} subreddit:{comm}\" for comm in communities]\n print(f\"🔍 Search: {search_terms} in communities: {', '.join(communities)}\")\n else:\n # Use community URLs as startUrls\n start_urls = [{\"url\": f\"https://www.reddit.com/{sub}/\"} for sub in subreddits]\n run_input[\"startUrls\"] = start_urls\n\n print(\"⏳ Running Actor...\")\n\n # Run the Actor and wait for completion\n try:\n run = client.actor(ACTOR_ID).call(run_input=run_input)\n\n # Get run status\n print(f\"✅ Actor run completed!\")\n print(f\"📋 Run ID: {run['id']}\")\n print(f\"⏱️ Duration: {run.get('duration', 'N/A')}s\")\n\n # Fetch results from dataset\n print(\"📥 Fetching results...\")\n dataset_items = list(client.dataset(run[\"defaultDatasetId\"]).iterate_items())\n\n return {\n \"success\": True,\n \"run_id\": run['id'],\n \"dataset_id\": run[\"defaultDatasetId\"],\n \"items\": dataset_items,\n \"count\": len(dataset_items)\n }\n\n except Exception as e:\n print(f\"❌ Actor run failed: {str(e)}\")\n return {\n \"success\": False,\n \"error\": str(e),\n \"items\": [],\n \"count\": 0\n }\n\ndef process_results(results, min_score=10):\n \"\"\"\n Process and structure the scraped results.\n\n Args:\n results (dict): Raw results from Apify\n min_score (int): Minimum upvote score filter\n\n Returns:\n dict: Cleaned and structured data\n \"\"\"\n processed_posts = []\n\n print(f\"🔍 Filtering for posts with {min_score}+ upvotes\")\n\n total_scraped = len(results[\"items\"])\n filtered_count = 0\n\n # Debug: Print first item structure\n if results[\"items\"]:\n print(f\"📋 Sample item keys: {list(results['items'][0].keys())[:10]}\")\n\n for item in results[\"items\"]:\n # The lite version returns different field names\n # Handle different possible score field names\n score = (item.get(\"score\", 0) or item.get(\"ups\", 0) or\n item.get(\"upvotes\", 0) or item.get(\"upCount\", 0) or 0)\n\n # If no score field, skip filtering (accept all)\n if score > 0 and score \u003c min_score:\n continue\n\n # Skip comments (they have empty titles)\n title = item.get(\"title\", \"\")\n if not title or len(title) \u003c 5:\n continue\n\n # Extract post data (mapping lite version fields)\n post = {\n \"id\": item.get(\"id\", \"\") or item.get(\"parsedId\", \"\"),\n \"title\": title,\n \"subreddit\": item.get(\"subreddit\", \"\") or item.get(\"communityName\", \"\") or item.get(\"parsedCommunityName\", \"\"),\n \"author\": item.get(\"author\", \"\") or item.get(\"username\", \"\"),\n \"score\": score,\n \"upvote_ratio\": item.get(\"upvote_ratio\", 0) or item.get(\"upvoteRatio\", 0),\n \"num_comments\": item.get(\"num_comments\", 0) or item.get(\"numberOfComments\", 0),\n \"url\": item.get(\"url\", \"\"),\n \"permalink\": item.get(\"permalink\", \"\") or item.get(\"url\", \"\"),\n \"created_utc\": item.get(\"created_utc\", \"\") or item.get(\"createdAt\", \"\"),\n \"selftext\": (item.get(\"selftext\", \"\") or item.get(\"body\", \"\"))[:500], # Limit text length\n \"link_flair_text\": item.get(\"link_flair_text\", \"\"),\n \"is_video\": item.get(\"is_video\", False),\n \"top_comments\": []\n }\n\n # Extract top comments\n comments = item.get(\"comments\", [])\n for comment in comments[:5]: # Top 5 comments\n if isinstance(comment, dict):\n post[\"top_comments\"].append({\n \"author\": comment.get(\"author\", \"\"),\n \"body\": comment.get(\"body\", \"\")[:300], # Limit length\n \"score\": comment.get(\"score\", 0)\n })\n\n processed_posts.append(post)\n filtered_count += 1\n\n # Sort by score descending\n processed_posts.sort(key=lambda x: x[\"score\"], reverse=True)\n\n print(f\"✨ Filtered {filtered_count} posts from {total_scraped} total\")\n\n return {\n \"posts\": processed_posts,\n \"scraped_at\": datetime.now().isoformat(),\n \"total_count\": len(processed_posts),\n \"total_scraped\": total_scraped,\n \"run_id\": results.get(\"run_id\", \"\"),\n }\n\ndef save_results(data, filename=None):\n \"\"\"\n Save results to .tmp directory.\n\n Args:\n data (dict): Processed post data\n filename (str, optional): Custom filename\n \"\"\"\n # Ensure output directory exists\n OUTPUT_DIR.mkdir(exist_ok=True)\n\n # Generate filename\n if not filename:\n timestamp = datetime.now().strftime(\"%Y%m%d_%H%M%S\")\n filename = f\"reddit_ai_tech_{timestamp}.json\"\n\n output_path = OUTPUT_DIR / filename\n\n # Save to file\n with open(output_path, 'w', encoding='utf-8') as f:\n json.dump(data, f, indent=2, ensure_ascii=False)\n\n print(f\"💾 Results saved to: {output_path}\")\n print(f\"📊 Total posts: {data['total_count']}\")\n\n # Print top 5 posts\n print(\"\\n🔥 Top 5 Posts by Score:\")\n for i, post in enumerate(data['posts'][:5], 1):\n print(f\"\\n{i}. r/{post['subreddit']} - {post['title'][:60]}...\")\n print(f\" ⬆️ {post['score']} | 💬 {post['num_comments']} comments\")\n print(f\" 🔗 {post['permalink']}\")\n\n return output_path\n\ndef main():\n \"\"\"Main execution function.\"\"\"\n parser = argparse.ArgumentParser(\n description=\"Scrape trending AI/Tech posts from Reddit\"\n )\n parser.add_argument(\n \"--subreddits\",\n nargs=\"+\",\n help=\"List of subreddits (e.g., r/artificial r/MachineLearning)\"\n )\n parser.add_argument(\n \"--search\",\n help=\"Search terms to filter posts\"\n )\n parser.add_argument(\n \"--max-posts\",\n type=int,\n default=50,\n help=\"Maximum number of posts to scrape (default: 50)\"\n )\n parser.add_argument(\n \"--max-comments\",\n type=int,\n default=20,\n help=\"Maximum comments per post (default: 20)\"\n )\n parser.add_argument(\n \"--sort\",\n choices=[\"hot\", \"top\", \"new\", \"relevance\"],\n default=\"hot\",\n help=\"Sort order (default: hot)\"\n )\n parser.add_argument(\n \"--time\",\n choices=[\"hour\", \"day\", \"week\", \"month\", \"year\"],\n default=\"week\",\n help=\"Time filter (default: week)\"\n )\n parser.add_argument(\n \"--min-score\",\n type=int,\n default=10,\n help=\"Minimum upvote score filter (default: 10)\"\n )\n parser.add_argument(\n \"--output\",\n help=\"Custom output filename\"\n )\n\n args = parser.parse_args()\n\n try:\n # Validate environment\n validate_environment()\n\n # Run scraper\n results = run_reddit_scraper(\n subreddits=args.subreddits,\n search_terms=args.search,\n max_posts=args.max_posts,\n max_comments=args.max_comments,\n sort_by=args.sort,\n time_filter=args.time\n )\n\n if not results[\"success\"]:\n print(f\"❌ Scraping failed: {results.get('error')}\")\n return 1\n\n # Process results\n processed_data = process_results(results, min_score=args.min_score)\n\n # Save results\n save_results(processed_data, args.output)\n\n print(\"\\n✅ Scraping completed successfully!\")\n return 0\n\n except Exception as e:\n print(f\"❌ Error: {str(e)}\")\n return 1\n\nif __name__ == \"__main__\":\n exit(main())\n","content_type":"text/x-python; charset=utf-8","language":"python","size":9978,"content_sha256":"583874da8e2bad0cf8920b663c5a164fbd11bc3b0827b9e5595df32bd67ae937"},{"filename":"scripts/scrape_twitter_ai_trends.py","content":"#!/usr/bin/env python3\n\"\"\"\nTwitter AI Trends Scraper\nScrapes trending AI-related posts from Twitter/X using Apify.\n\nUsage:\n python execution/scrape_twitter_ai_trends.py [--max-tweets 100] [--query \"AI\"]\n\"\"\"\n\nimport os\nimport json\nimport time\nfrom datetime import datetime\nfrom pathlib import Path\nfrom apify_client import ApifyClient\nfrom dotenv import load_dotenv\nimport argparse\n\n# Load environment variables\nload_dotenv()\n\n# Configuration\nAPIFY_TOKEN = os.getenv(\"APIFY_TOKEN\")\nACTOR_ID = \"kaitoeasyapi/twitter-x-data-tweet-scraper-pay-per-result-cheapest\"\nOUTPUT_DIR = Path(__file__).parent.parent / \".tmp\"\n\ndef validate_environment():\n \"\"\"Validate required environment variables.\"\"\"\n if not APIFY_TOKEN:\n raise ValueError(\n \"APIFY_TOKEN not found in environment. \"\n \"Please add it to your .env file.\"\n )\n\ndef run_twitter_scraper(query=\"AI\", max_tweets=50):\n \"\"\"\n Run the Twitter scraper Actor.\n\n Args:\n query (str): Search query for tweets\n max_tweets (int): Maximum number of tweets to scrape\n\n Returns:\n dict: Scraper results with tweet data\n \"\"\"\n print(f\"🚀 Starting Twitter scraper for query: '{query}'\")\n print(f\"📊 Max tweets: {max_tweets}\")\n\n # Initialize Apify client\n client = ApifyClient(APIFY_TOKEN)\n\n # Prepare Actor input using correct API parameters\n run_input = {\n \"twitterContent\": query, # Search for tweets containing this term\n \"maxItems\": max_tweets, # Maximum number of tweets to return\n \"lang\": \"en\", # English tweets only\n \"queryType\": \"Latest\", # Get latest tweets\n \"include:nativeretweets\": False, # Exclude retweets\n \"filter:replies\": False, # Exclude replies\n }\n\n print(\"⏳ Running Actor...\")\n\n # Run the Actor and wait for completion\n try:\n run = client.actor(ACTOR_ID).call(run_input=run_input)\n\n # Get run status\n print(f\"✅ Actor run completed!\")\n print(f\"📋 Run ID: {run['id']}\")\n print(f\"⏱️ Duration: {run.get('duration', 'N/A')}s\")\n\n # Fetch results from dataset\n print(\"📥 Fetching results...\")\n dataset_items = list(client.dataset(run[\"defaultDatasetId\"]).iterate_items())\n\n return {\n \"success\": True,\n \"run_id\": run['id'],\n \"dataset_id\": run[\"defaultDatasetId\"],\n \"items\": dataset_items,\n \"count\": len(dataset_items)\n }\n\n except Exception as e:\n print(f\"❌ Actor run failed: {str(e)}\")\n return {\n \"success\": False,\n \"error\": str(e),\n \"items\": [],\n \"count\": 0\n }\n\ndef process_results(results, min_likes=10, min_retweets=5):\n \"\"\"\n Process and structure the scraped results, filtering for trending tweets.\n\n Args:\n results (dict): Raw results from Apify\n min_likes (int): Minimum likes for trending filter\n min_retweets (int): Minimum retweets for trending filter\n\n Returns:\n dict: Cleaned and structured data with only trending tweets\n \"\"\"\n from dateutil import parser\n\n processed_tweets = []\n now = datetime.now()\n cutoff_time = now.replace(hour=0, minute=0, second=0, microsecond=0) # Today at midnight\n\n print(f\"🔍 Filtering for tweets since: {cutoff_time.isoformat()}\")\n print(f\"📊 Engagement threshold: {min_likes}+ likes OR {min_retweets}+ retweets\")\n\n total_scraped = len(results[\"items\"])\n filtered_count = 0\n\n for item in results[\"items\"]:\n # Skip retweets - we want original content only\n if item.get(\"isRetweet\", False):\n continue\n\n likes = item.get(\"likeCount\", 0)\n retweets = item.get(\"retweetCount\", 0)\n\n # Filter by engagement threshold\n if likes \u003c min_likes and retweets \u003c min_retweets:\n continue\n\n # Parse and check date (but don't filter strictly - data quality issues)\n created_at_str = item.get(\"createdAt\", \"\")\n\n tweet = {\n \"id\": item.get(\"id\", \"\"),\n \"text\": item.get(\"text\", \"\"),\n \"author\": item.get(\"author\", {}).get(\"userName\", \"\"),\n \"author_name\": item.get(\"author\", {}).get(\"name\", \"\"),\n \"created_at\": created_at_str,\n \"likes\": likes,\n \"retweets\": retweets,\n \"replies\": item.get(\"replyCount\", 0),\n \"views\": item.get(\"viewCount\", 0),\n \"url\": item.get(\"url\", \"\"),\n \"engagement_score\": likes + retweets,\n }\n processed_tweets.append(tweet)\n filtered_count += 1\n\n # Sort by engagement score (likes + retweets) descending\n processed_tweets.sort(\n key=lambda x: x[\"engagement_score\"],\n reverse=True\n )\n\n print(f\"✨ Filtered {filtered_count} trending tweets from {total_scraped} total\")\n\n return {\n \"tweets\": processed_tweets,\n \"scraped_at\": datetime.now().isoformat(),\n \"total_count\": len(processed_tweets),\n \"total_scraped\": total_scraped,\n \"query_used\": results.get(\"query\", \"AI\"),\n \"run_id\": results.get(\"run_id\", \"\"),\n \"filter_applied\": {\n \"min_likes\": min_likes,\n \"min_retweets\": min_retweets,\n \"date_cutoff\": cutoff_time.isoformat(),\n \"retweets_excluded\": True\n }\n }\n\ndef save_results(data, filename=None):\n \"\"\"\n Save results to .tmp directory.\n\n Args:\n data (dict): Processed tweet data\n filename (str, optional): Custom filename\n \"\"\"\n # Ensure output directory exists\n OUTPUT_DIR.mkdir(exist_ok=True)\n\n # Generate filename\n if not filename:\n timestamp = datetime.now().strftime(\"%Y%m%d_%H%M%S\")\n filename = f\"twitter_ai_trends_{timestamp}.json\"\n\n output_path = OUTPUT_DIR / filename\n\n # Save to file\n with open(output_path, 'w', encoding='utf-8') as f:\n json.dump(data, f, indent=2, ensure_ascii=False)\n\n print(f\"💾 Results saved to: {output_path}\")\n print(f\"📊 Total tweets: {data['total_count']}\")\n\n # Print top 5 tweets\n print(\"\\n🔥 Top 5 Most Engaged Tweets:\")\n for i, tweet in enumerate(data['tweets'][:5], 1):\n print(f\"\\n{i}. @{tweet['author']}\")\n print(f\" {tweet['text'][:100]}...\")\n print(f\" ❤️ {tweet['likes']} | 🔄 {tweet['retweets']} | 👁️ {tweet['views']}\")\n print(f\" 🔗 {tweet['url']}\")\n\n return output_path\n\ndef main():\n \"\"\"Main execution function.\"\"\"\n parser = argparse.ArgumentParser(\n description=\"Scrape trending AI tweets from Twitter/X\"\n )\n parser.add_argument(\n \"--query\",\n default=\"AI OR ChatGPT OR LLM OR GPT OR Claude OR 'artificial intelligence' OR OpenAI OR Anthropic\",\n help=\"Search query for tweets\"\n )\n parser.add_argument(\n \"--max-tweets\",\n type=int,\n default=50,\n help=\"Maximum number of tweets to scrape\"\n )\n parser.add_argument(\n \"--output\",\n help=\"Custom output filename\"\n )\n parser.add_argument(\n \"--min-likes\",\n type=int,\n default=10,\n help=\"Minimum likes for trending filter (default: 10)\"\n )\n parser.add_argument(\n \"--min-retweets\",\n type=int,\n default=5,\n help=\"Minimum retweets for trending filter (default: 5)\"\n )\n\n args = parser.parse_args()\n\n try:\n # Validate environment\n validate_environment()\n\n # Run scraper\n results = run_twitter_scraper(\n query=args.query,\n max_tweets=args.max_tweets\n )\n\n if not results[\"success\"]:\n print(f\"❌ Scraping failed: {results.get('error')}\")\n return 1\n\n # Process results with trending filters\n results[\"query\"] = args.query\n processed_data = process_results(\n results,\n min_likes=args.min_likes,\n min_retweets=args.min_retweets\n )\n\n # Save results\n save_results(processed_data, args.output)\n\n print(\"\\n✅ Scraping completed successfully!\")\n return 0\n\n except Exception as e:\n print(f\"❌ Error: {str(e)}\")\n return 1\n\nif __name__ == \"__main__\":\n exit(main())\n","content_type":"text/x-python; charset=utf-8","language":"python","size":8229,"content_sha256":"ece813a0aed29dd6040c1636aa5a23dbde22e0c8202a5bed2c4847f1954cc261"}],"content_json":{"type":"doc","content":[{"type":"heading","attrs":{"level":1},"content":[{"text":"Apify Scrapers","type":"text"}]},{"type":"heading","attrs":{"level":2},"content":[{"text":"Overview","type":"text"}]},{"type":"paragraph","content":[{"text":"Scrape content from major social platforms using Apify actors. Each platform has optimized settings for cost and quality.","type":"text"}]},{"type":"heading","attrs":{"level":2},"content":[{"text":"Quick Decision Tree","type":"text"}]},{"type":"code_block","attrs":{"wrap":false,"language":""},"content":[{"text":"What do you want to scrape?\n│\n├── Social Media Posts\n│ ├── Twitter/X → references/twitter.md\n│ │ └── Script: scripts/scrape_twitter_ai_trends.py\n│ │\n│ ├── Reddit → references/reddit.md\n│ │ └── Script: scripts/scrape_reddit_ai_tech.py\n│ │\n│ ├── LinkedIn → references/linkedin.md\n│ │ └── Script: scripts/scrape_linkedin_posts.py\n│ │\n│ ├── Instagram → references/instagram.md\n│ │ └── Script: scripts/scrape_instagram.py\n│ │ └── Modes: profile, posts, hashtag, reels, comments\n│ │\n│ ├── Facebook → references/facebook.md\n│ │ └── Script: scripts/scrape_facebook.py\n│ │ └── Modes: page, posts, reviews, groups, marketplace\n│ │\n│ ├── TikTok → references/multi-platform.md\n│ │ └── Script: scripts/scrape_multi_platform.py\n│ │\n│ └── YouTube → references/multi-platform.md\n│ └── Script: scripts/scrape_multi_platform.py\n│\n├── Business/Places\n│ ├── Google Maps businesses → references/google-maps.md\n│ │ └── Script: scripts/scrape_google_maps.py\n│ │ └── Modes: search, place, reviews\n│ │\n│ └── Contact info from websites → references/contact-enrichment.md\n│ └── Script: scripts/scrape_contact_info.py\n│ └── Extract: emails, phone numbers, social profiles\n│\n├── Auto-detect URL type → references/url-detect.md\n│ └── Script: scripts/scrape_content_by_url.py\n│\n├── Trend Analysis (NEW)\n│ └── Enriched trend analysis → workflows/trend-analysis.md\n│ └── Script: scripts/analyze_trends.py\n│ └── Features: velocity scoring, lifecycle staging, opportunity scoring\n│\n└── Workflows (multi-step)\n ├── Lead generation → workflows/lead-generation.md\n ├── Influencer discovery → workflows/influencer-discovery.md\n ├── Competitor analysis → workflows/competitor-intel.md\n ├── Trend analysis → workflows/trend-analysis.md\n └── Competitor Ads Intelligence (NEW) → workflows/competitor-ads.md\n └── Script: scripts/scrape_competitor_ads.py\n └── Platforms: Facebook Ads Library, Google Ads Transparency\n └── Features: Spend estimates, creative analysis, benchmarking","type":"text"}]},{"type":"heading","attrs":{"level":2},"content":[{"text":"Environment Setup","type":"text"}]},{"type":"code_block","attrs":{"wrap":false,"language":"bash"},"content":[{"text":"# Required in .env\nAPIFY_TOKEN=apify_api_xxxxx","type":"text"}]},{"type":"paragraph","content":[{"text":"Get your API key: https://console.apify.com/account/integrations","type":"text"}]},{"type":"heading","attrs":{"level":2},"content":[{"text":"Common Usage Patterns","type":"text"}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"Scrape Twitter Trends","type":"text"}]},{"type":"code_block","attrs":{"wrap":false,"language":"bash"},"content":[{"text":"python scripts/scrape_twitter_ai_trends.py --query \"AI agents\" --max-tweets 50","type":"text"}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"Scrape Reddit Discussions","type":"text"}]},{"type":"code_block","attrs":{"wrap":false,"language":"bash"},"content":[{"text":"python scripts/scrape_reddit_ai_tech.py --subreddits \"MachineLearning,LocalLLaMA\" --max-posts 100","type":"text"}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"Scrape LinkedIn Author","type":"text"}]},{"type":"code_block","attrs":{"wrap":false,"language":"bash"},"content":[{"text":"python scripts/scrape_linkedin_posts.py author \"https://linkedin.com/in/username\" --max-posts 30","type":"text"}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"Auto-detect and Scrape URL","type":"text"}]},{"type":"code_block","attrs":{"wrap":false,"language":"bash"},"content":[{"text":"python scripts/scrape_content_by_url.py \"https://x.com/user/status/123456\"","type":"text"}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"Scrape Instagram Profile","type":"text"}]},{"type":"code_block","attrs":{"wrap":false,"language":"bash"},"content":[{"text":"python scripts/scrape_instagram.py profile \"https://instagram.com/username\" --max-posts 20","type":"text"}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"Scrape Instagram Hashtag","type":"text"}]},{"type":"code_block","attrs":{"wrap":false,"language":"bash"},"content":[{"text":"python scripts/scrape_instagram.py hashtag \"#artificialintelligence\" --max-posts 50","type":"text"}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"Scrape Instagram Reels","type":"text"}]},{"type":"code_block","attrs":{"wrap":false,"language":"bash"},"content":[{"text":"python scripts/scrape_instagram.py reels \"https://instagram.com/username\" --max-reels 30","type":"text"}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"Scrape Facebook Page","type":"text"}]},{"type":"code_block","attrs":{"wrap":false,"language":"bash"},"content":[{"text":"python scripts/scrape_facebook.py page \"https://facebook.com/pagename\" --max-posts 50","type":"text"}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"Scrape Facebook Reviews","type":"text"}]},{"type":"code_block","attrs":{"wrap":false,"language":"bash"},"content":[{"text":"python scripts/scrape_facebook.py reviews \"https://facebook.com/pagename\" --max-reviews 100","type":"text"}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"Scrape Facebook Marketplace","type":"text"}]},{"type":"code_block","attrs":{"wrap":false,"language":"bash"},"content":[{"text":"python scripts/scrape_facebook.py marketplace \"laptops in san francisco\" --max-items 30","type":"text"}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"Scrape Google Maps Businesses","type":"text"}]},{"type":"code_block","attrs":{"wrap":false,"language":"bash"},"content":[{"text":"python scripts/scrape_google_maps.py search \"AI consulting firms in New York\" --max-results 50","type":"text"}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"Scrape Google Maps Reviews","type":"text"}]},{"type":"code_block","attrs":{"wrap":false,"language":"bash"},"content":[{"text":"python scripts/scrape_google_maps.py reviews \"ChIJN1t_tDeuEmsRUsoyG83frY4\" --max-reviews 100","type":"text"}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"Extract Contact Info from Websites","type":"text"}]},{"type":"code_block","attrs":{"wrap":false,"language":"bash"},"content":[{"text":"python scripts/scrape_contact_info.py \"https://example.com\" --depth 2","type":"text"}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"Bulk Contact Enrichment","type":"text"}]},{"type":"code_block","attrs":{"wrap":false,"language":"bash"},"content":[{"text":"python scripts/scrape_contact_info.py --urls-file companies.txt --output contacts.json","type":"text"}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"Scrape Competitor Ads (Single Competitor)","type":"text"}]},{"type":"code_block","attrs":{"wrap":false,"language":"bash"},"content":[{"text":"python scripts/scrape_competitor_ads.py \"Nike\" --platforms facebook google --country US --days 30","type":"text"}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"Compare Multiple Competitors' Ads","type":"text"}]},{"type":"code_block","attrs":{"wrap":false,"language":"bash"},"content":[{"text":"python scripts/scrape_competitor_ads.py \"Nike\" \"Adidas\" \"Puma\" --compare --output comparison.json","type":"text"}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"Discover Advertisers by Keyword","type":"text"}]},{"type":"code_block","attrs":{"wrap":false,"language":"bash"},"content":[{"text":"python scripts/scrape_competitor_ads.py --search \"running shoes\" --country US --max-ads 200","type":"text"}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"Filter Competitor Ads by Media Type","type":"text"}]},{"type":"code_block","attrs":{"wrap":false,"language":"bash"},"content":[{"text":"python scripts/scrape_competitor_ads.py \"Netflix\" \"Disney+\" --platforms facebook --media-types video --days 7","type":"text"}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"Analyze Trends (NEW)","type":"text"}]},{"type":"code_block","attrs":{"wrap":false,"language":"bash"},"content":[{"text":"# Analyze specific topic with enrichments\npython scripts/analyze_trends.py \"artificial intelligence\" --sources google instagram tiktok --days 90\n\n# Discover trending topics in category\npython scripts/analyze_trends.py --category technology --discover --top 50\n\n# Compare multiple trends\npython scripts/analyze_trends.py \"AI\" \"blockchain\" \"metaverse\" --compare\n\n# Export HTML trend report\npython scripts/analyze_trends.py \"sustainable fashion\" --format html --output trend_report.html","type":"text"}]},{"type":"heading","attrs":{"level":2},"content":[{"text":"Cost Estimates","type":"text"}]},{"type":"table","attrs":{"layout":null},"content":[{"type":"tr","content":[{"type":"th","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Platform","type":"text"}]}]},{"type":"th","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Actor","type":"text"}]}]},{"type":"th","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Cost per Item","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Twitter","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"kaitoeasyapi/twitter-x-data-tweet-scraper","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"~$0.00025","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Reddit","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"trudax/reddit-scraper","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"~$0.001-0.005","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"LinkedIn","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"harvestapi/linkedin-post-search","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"~$0.01-0.05","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"YouTube","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"streamers/youtube-scraper","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"~$0.01-0.05","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"TikTok","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"clockworks/tiktok-scraper","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"~$0.005","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Instagram (profile)","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"apify/instagram-profile-scraper","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"~$0.005","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Instagram (posts)","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"apify/instagram-post-scraper","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"~$0.002-0.005","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Instagram (hashtag)","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"apify/instagram-hashtag-scraper","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"~$0.002-0.005","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Instagram (reels)","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"apify/instagram-reel-scraper","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"~$0.005-0.01","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Instagram (comments)","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"apify/instagram-comment-scraper","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"~$0.001-0.003","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Facebook (page)","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"apify/facebook-pages-scraper","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"~$0.005-0.01","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Facebook (posts)","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"apify/facebook-posts-scraper","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"~$0.003-0.005","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Facebook (reviews)","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"apify/facebook-reviews-scraper","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"~$0.002-0.005","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Facebook (groups)","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"apify/facebook-groups-scraper","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"~$0.005-0.01","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Facebook (marketplace)","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"apify/facebook-marketplace-scraper","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"~$0.005-0.01","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Google Maps (search)","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"compass/crawler-google-places","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"~$0.01-0.02","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Google Maps (place)","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"compass/google-maps-business-scraper","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"~$0.01","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Google Maps (reviews)","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"compass/google-maps-reviews-scraper","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"~$0.003-0.005","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Contact Enrichment","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"lukaskrivka/contact-info-scraper","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"~$0.01-0.03","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Google Trends","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"apify/google-trends-scraper","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"~$0.01","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Trend Analysis (multi)","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Multiple actors","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"~$0.50-1.50/run","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Facebook Ads Library","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"apify/facebook-ads-scraper","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"~$0.75/1K ads","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Facebook Ads (alt)","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"curious_coder/facebook-ads-library-scraper","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"~$0.50/1K ads","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Google Ads Transparency","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"lexis-solutions/google-ads-scraper","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"~$1.00/1K ads","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Google Ads (alt)","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"xtech/google-ad-transparency-scraper","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"~$0.80/1K ads","type":"text"}]}]}]}]},{"type":"heading","attrs":{"level":2},"content":[{"text":"Output Location","type":"text"}]},{"type":"paragraph","content":[{"text":"All scraped data saves to ","type":"text"},{"text":".tmp/","type":"text","marks":[{"type":"code_inline"}]},{"text":" with timestamped filenames:","type":"text"}]},{"type":"bullet_list","content":[{"type":"list_item","content":[{"type":"paragraph","content":[{"text":".tmp/twitter_ai_trends_YYYYMMDD.json","type":"text","marks":[{"type":"code_inline"}]}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":".tmp/reddit_ai_tech_YYYYMMDD.json","type":"text","marks":[{"type":"code_inline"}]}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":".tmp/linkedin_posts_YYYYMMDD_HHMMSS.json","type":"text","marks":[{"type":"code_inline"}]}]}]}]},{"type":"heading","attrs":{"level":2},"content":[{"text":"Security Notes","type":"text"}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"Credential Handling","type":"text"}]},{"type":"bullet_list","content":[{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Store ","type":"text"},{"text":"APIFY_TOKEN","type":"text","marks":[{"type":"code_inline"}]},{"text":" in ","type":"text"},{"text":".env","type":"text","marks":[{"type":"code_inline"}]},{"text":" file (never commit to git)","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Rotate API tokens periodically via Apify Console","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Never log or print API tokens in script output","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Use environment variables, not hardcoded values","type":"text"}]}]}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"Data Privacy","type":"text"}]},{"type":"bullet_list","content":[{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Scraped data contains only publicly available content","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Social media posts may include PII (names, handles, profile info)","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Data is stored locally in ","type":"text"},{"text":".tmp/","type":"text","marks":[{"type":"code_inline"}]},{"text":" directory","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"No data is retained by Apify after actor run completes","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Consider data minimization - only scrape what you need","type":"text"}]}]}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"Access Scopes","type":"text"}]},{"type":"bullet_list","content":[{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Apify tokens have full account access (no granular scopes)","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Use separate Apify accounts for different projects if needed","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Monitor usage via Apify Console dashboard","type":"text"}]}]}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"Compliance Considerations","type":"text"}]},{"type":"bullet_list","content":[{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Terms of Service","type":"text","marks":[{"type":"strong"}]},{"text":": Respect each platform's ToS (Twitter, Reddit, LinkedIn)","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Rate Limiting","type":"text","marks":[{"type":"strong"}]},{"text":": Actors have built-in rate limiting to avoid bans","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Robots.txt","type":"text","marks":[{"type":"strong"}]},{"text":": Some actors may bypass robots.txt - use responsibly","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"GDPR","type":"text","marks":[{"type":"strong"}]},{"text":": Scraped PII may be subject to GDPR if EU residents","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Ethical Use","type":"text","marks":[{"type":"strong"}]},{"text":": Only scrape public data; never bypass authentication","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Proxy Ethics","type":"text","marks":[{"type":"strong"}]},{"text":": Residential proxies should be used ethically","type":"text"}]}]}]},{"type":"heading","attrs":{"level":2},"content":[{"text":"Troubleshooting","type":"text"}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"Common Issues","type":"text"}]},{"type":"heading","attrs":{"level":4},"content":[{"text":"Issue: Actor run failed","type":"text"}]},{"type":"paragraph","content":[{"text":"Symptoms:","type":"text","marks":[{"type":"strong"}]},{"text":" Script terminates with \"Actor run failed\" or timeout error ","type":"text"},{"text":"Cause:","type":"text","marks":[{"type":"strong"}]},{"text":" Invalid actor ID, insufficient proxy credits, or actor configuration issue ","type":"text"},{"text":"Solution:","type":"text","marks":[{"type":"strong"}]}]},{"type":"bullet_list","content":[{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Verify the actor ID is correct in the script","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Check Apify Console for actor run logs","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Ensure proxy settings match actor requirements","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Try running with default proxy settings first","type":"text"}]}]}]},{"type":"heading","attrs":{"level":4},"content":[{"text":"Issue: Empty results returned","type":"text"}]},{"type":"paragraph","content":[{"text":"Symptoms:","type":"text","marks":[{"type":"strong"}]},{"text":" Script completes but returns 0 items ","type":"text"},{"text":"Cause:","type":"text","marks":[{"type":"strong"}]},{"text":" Content blocked by platform, invalid query, or proxy being detected ","type":"text"},{"text":"Solution:","type":"text","marks":[{"type":"strong"}]}]},{"type":"bullet_list","content":[{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Try a different proxy type (residential vs datacenter)","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Simplify the search query","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Reduce the number of results requested","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Check if the platform is blocking scraping attempts","type":"text"}]}]}]},{"type":"heading","attrs":{"level":4},"content":[{"text":"Issue: Rate limited by platform","type":"text"}]},{"type":"paragraph","content":[{"text":"Symptoms:","type":"text","marks":[{"type":"strong"}]},{"text":" Script fails with 429 errors or \"rate limited\" messages ","type":"text"},{"text":"Cause:","type":"text","marks":[{"type":"strong"}]},{"text":" Too many requests in a short time period ","type":"text"},{"text":"Solution:","type":"text","marks":[{"type":"strong"}]}]},{"type":"bullet_list","content":[{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Add delays between requests (actor settings)","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Reduce concurrent requests","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Use proxy rotation","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Wait and retry after a cooldown period","type":"text"}]}]}]},{"type":"heading","attrs":{"level":4},"content":[{"text":"Issue: Invalid API token","type":"text"}]},{"type":"paragraph","content":[{"text":"Symptoms:","type":"text","marks":[{"type":"strong"}]},{"text":" Authentication error or \"invalid token\" message ","type":"text"},{"text":"Cause:","type":"text","marks":[{"type":"strong"}]},{"text":" Token expired, revoked, or incorrectly set ","type":"text"},{"text":"Solution:","type":"text","marks":[{"type":"strong"}]}]},{"type":"bullet_list","content":[{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Regenerate API token in Apify Console","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Verify token is correctly set in ","type":"text"},{"text":".env","type":"text","marks":[{"type":"code_inline"}]},{"text":" file","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Check for leading/trailing whitespace in token","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Ensure ","type":"text"},{"text":"APIFY_TOKEN","type":"text","marks":[{"type":"code_inline"}]},{"text":" environment variable is loaded","type":"text"}]}]}]},{"type":"heading","attrs":{"level":4},"content":[{"text":"Issue: Proxy connection errors","type":"text"}]},{"type":"paragraph","content":[{"text":"Symptoms:","type":"text","marks":[{"type":"strong"}]},{"text":" Connection timeout or proxy errors ","type":"text"},{"text":"Cause:","type":"text","marks":[{"type":"strong"}]},{"text":" Proxy pool exhausted or geo-restriction issues ","type":"text"},{"text":"Solution:","type":"text","marks":[{"type":"strong"}]}]},{"type":"bullet_list","content":[{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Switch proxy type (basic, residential, or datacenter)","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Verify proxy credit balance in Apify Console","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Try a different proxy country/region","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Disable proxy to test if that's the root cause","type":"text"}]}]}]},{"type":"heading","attrs":{"level":2},"content":[{"text":"Resources","type":"text"}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"Platform References","type":"text"}]},{"type":"bullet_list","content":[{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"references/twitter.md","type":"text","marks":[{"type":"strong"}]},{"text":" - Twitter/X scraping details","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"references/reddit.md","type":"text","marks":[{"type":"strong"}]},{"text":" - Reddit scraping with subreddit targeting","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"references/linkedin.md","type":"text","marks":[{"type":"strong"}]},{"text":" - LinkedIn post scraping (author or search mode)","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"references/instagram.md","type":"text","marks":[{"type":"strong"}]},{"text":" - Instagram profile, posts, hashtag, reels, and comments scraping","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"references/facebook.md","type":"text","marks":[{"type":"strong"}]},{"text":" - Facebook page, posts, reviews, groups, and marketplace scraping","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"references/multi-platform.md","type":"text","marks":[{"type":"strong"}]},{"text":" - TikTok and YouTube scraping","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"references/url-detect.md","type":"text","marks":[{"type":"strong"}]},{"text":" - Auto-detect URL type and scrape","type":"text"}]}]}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"Business/Places References","type":"text"}]},{"type":"bullet_list","content":[{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"references/google-maps.md","type":"text","marks":[{"type":"strong"}]},{"text":" - Google Maps business search, place details, and reviews","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"references/contact-enrichment.md","type":"text","marks":[{"type":"strong"}]},{"text":" - Extract emails, phone numbers, and social profiles from websites","type":"text"}]}]}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"Workflow References","type":"text"}]},{"type":"bullet_list","content":[{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"workflows/lead-generation.md","type":"text","marks":[{"type":"strong"}]},{"text":" - Multi-step lead generation workflow","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"workflows/influencer-discovery.md","type":"text","marks":[{"type":"strong"}]},{"text":" - Find and analyze influencers across platforms","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"workflows/competitor-intel.md","type":"text","marks":[{"type":"strong"}]},{"text":" - Competitive intelligence gathering workflow","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"workflows/trend-analysis.md","type":"text","marks":[{"type":"strong"}]},{"text":" - Enriched multi-platform trend analysis with scoring","type":"text"}]}]}]},{"type":"heading","attrs":{"level":2},"content":[{"text":"Integration Patterns","type":"text"}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"Scrape and Enrich","type":"text"}]},{"type":"paragraph","content":[{"text":"Skills:","type":"text","marks":[{"type":"strong"}]},{"text":" apify-scrapers → parallel-research ","type":"text"},{"text":"Use case:","type":"text","marks":[{"type":"strong"}]},{"text":" Scrape social media posts, then enrich with deep research ","type":"text"},{"text":"Flow:","type":"text","marks":[{"type":"strong"}]}]},{"type":"ordered_list","attrs":{"order":1,"listStyle":"number"},"content":[{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Scrape Twitter/Reddit for mentions of a topic","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Extract company names or URLs from posts","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Use parallel-research to get detailed info on each company","type":"text"}]}]}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"Scrape and Summarize","type":"text"}]},{"type":"paragraph","content":[{"text":"Skills:","type":"text","marks":[{"type":"strong"}]},{"text":" apify-scrapers → content-generation ","type":"text"},{"text":"Use case:","type":"text","marks":[{"type":"strong"}]},{"text":" Create newsletter content from social media trends ","type":"text"},{"text":"Flow:","type":"text","marks":[{"type":"strong"}]}]},{"type":"ordered_list","attrs":{"order":1,"listStyle":"number"},"content":[{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Scrape trending AI posts from Twitter","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Pass scraped data to content-generation summarize","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Generate a formatted newsletter section","type":"text"}]}]}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"Scrape and Archive","type":"text"}]},{"type":"paragraph","content":[{"text":"Skills:","type":"text","marks":[{"type":"strong"}]},{"text":" apify-scrapers → google-workspace ","type":"text"},{"text":"Use case:","type":"text","marks":[{"type":"strong"}]},{"text":" Save scraped data to Google Drive for team access ","type":"text"},{"text":"Flow:","type":"text","marks":[{"type":"strong"}]}]},{"type":"ordered_list","attrs":{"order":1,"listStyle":"number"},"content":[{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Scrape LinkedIn posts from target accounts","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Format data as CSV or JSON","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Upload to Google Drive client folder via google-workspace","type":"text"}]}]}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"Trend Analysis + Content Strategy","type":"text"}]},{"type":"paragraph","content":[{"text":"Skills:","type":"text","marks":[{"type":"strong"}]},{"text":" apify-scrapers (trend-analysis) → content-generation ","type":"text"},{"text":"Use case:","type":"text","marks":[{"type":"strong"}]},{"text":" Identify trending topics and create content strategy ","type":"text"},{"text":"Flow:","type":"text","marks":[{"type":"strong"}]}]},{"type":"ordered_list","attrs":{"order":1,"listStyle":"number"},"content":[{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Run trend analysis: ","type":"text"},{"text":"python scripts/analyze_trends.py \"AI productivity\" --sources all","type":"text","marks":[{"type":"code_inline"}]}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Review lifecycle stage and opportunity score","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Use content-generation to create content for high-opportunity trends","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Focus on emerging trends with high velocity scores","type":"text"}]}]}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"Competitive Trend Monitoring","type":"text"}]},{"type":"paragraph","content":[{"text":"Skills:","type":"text","marks":[{"type":"strong"}]},{"text":" apify-scrapers (trend-analysis) → parallel-research ","type":"text"},{"text":"Use case:","type":"text","marks":[{"type":"strong"}]},{"text":" Monitor competitor visibility in trending topics ","type":"text"},{"text":"Flow:","type":"text","marks":[{"type":"strong"}]}]},{"type":"ordered_list","attrs":{"order":1,"listStyle":"number"},"content":[{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Analyze industry trends: ","type":"text"},{"text":"python scripts/analyze_trends.py --category \"your-industry\" --discover","type":"text","marks":[{"type":"code_inline"}]}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Compare your brand vs competitors in those trends","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Use parallel-research for deep dive on gaps","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Generate competitive intelligence report","type":"text"}]}]}]},{"type":"hr","attrs":{"markup":"---"}}]},"metadata":{"date":"2026-06-05","name":"apify-scrapers","author":"@skillopedia","source":{"stars":11,"repo_name":"casper-marketplace","origin_url":"https://github.com/casper-studios/casper-marketplace/blob/HEAD/casper/skills/apify-scrapers/SKILL.md","repo_owner":"casper-studios","body_sha256":"37fae058444ec92d87ca3a223eddbcc0c8065306264d2150f273bdac2b42093c","cluster_key":"9cbcbac9e61d9f3144d8e529162d5a4daf0d3df344c40e7f242ecee10a275560","clean_bundle":{"format":"clean-skill-bundle-v1","source":"casper-studios/casper-marketplace/casper/skills/apify-scrapers/SKILL.md","attachments":[{"id":"b308c284-3303-5647-acfa-6758f3cee924","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/b308c284-3303-5647-acfa-6758f3cee924/attachment.md","path":"references/contact-enrichment.md","size":2300,"sha256":"70fbf32e6d6b7d81423d0f6a8907fd0570d1f74d66d35b3d81cda32240c867f5","contentType":"text/markdown; charset=utf-8"},{"id":"f535f15f-f7b0-5331-ac4f-46fadccf41a8","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/f535f15f-f7b0-5331-ac4f-46fadccf41a8/attachment.md","path":"references/facebook.md","size":12353,"sha256":"44646a1f0e5c2339f89e9f06374be8e5626a84c4df105fd48df6558ecff1e684","contentType":"text/markdown; charset=utf-8"},{"id":"579e6642-6d51-5585-ad73-c1f677991613","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/579e6642-6d51-5585-ad73-c1f677991613/attachment.md","path":"references/google-maps.md","size":8859,"sha256":"db3497d4a4c84616180c421f3aa124c147ff7d392141678407a05eb332fa33aa","contentType":"text/markdown; charset=utf-8"},{"id":"1d88d3d7-317a-5800-81df-bb86ee0183a0","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/1d88d3d7-317a-5800-81df-bb86ee0183a0/attachment.md","path":"references/instagram.md","size":15543,"sha256":"7f40ff199ba4a21ef15595f52a773b7497df85eb9bce30392ded0aa5db70db45","contentType":"text/markdown; charset=utf-8"},{"id":"2a7e583c-2c62-5b8c-a013-96fa8d0b311c","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/2a7e583c-2c62-5b8c-a013-96fa8d0b311c/attachment.md","path":"references/linkedin.md","size":4628,"sha256":"b4dab8f71a78727c8ddf9f50f246a6c715a6e7dc5c1fc8bc858df449393ae616","contentType":"text/markdown; charset=utf-8"},{"id":"9cb2ff83-c333-574f-ab81-006c2b9b0359","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/9cb2ff83-c333-574f-ab81-006c2b9b0359/attachment.md","path":"references/multi-platform.md","size":4633,"sha256":"0dfa19b01c3a4f5c9ad208fff3ab3d6e721b0d9285b67038e82257aaab7b6f23","contentType":"text/markdown; charset=utf-8"},{"id":"deb75cda-2710-5aad-b041-57adaae54959","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/deb75cda-2710-5aad-b041-57adaae54959/attachment.md","path":"references/reddit.md","size":4061,"sha256":"8685849195f66ad0302ed067bc87ee88ebea746574959a2710becf3909d23b3e","contentType":"text/markdown; charset=utf-8"},{"id":"3a64ecec-a826-59cd-a3fd-7b5bcdee6d7f","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/3a64ecec-a826-59cd-a3fd-7b5bcdee6d7f/attachment.md","path":"references/twitter.md","size":3758,"sha256":"ab8d7cc447e1e95aedac88e429978090b72cf3bbccadf25dfb7599e4485540c6","contentType":"text/markdown; charset=utf-8"},{"id":"f6074950-026d-541b-8fbe-aa36c89dd531","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/f6074950-026d-541b-8fbe-aa36c89dd531/attachment.md","path":"references/url-detect.md","size":4952,"sha256":"15f0c22c2887fedfbe1ae1b02aaf189242522a3fb230bdb7203800d1a1d40d10","contentType":"text/markdown; charset=utf-8"},{"id":"f2ba396e-4b3d-5563-94e7-2e4ccadeeda4","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/f2ba396e-4b3d-5563-94e7-2e4ccadeeda4/attachment.md","path":"references/workflows/audience-analysis.md","size":11207,"sha256":"3f1e3d94d2625d250e43282da153de1347d09c2f5e2d64c8848448015d129650","contentType":"text/markdown; charset=utf-8"},{"id":"095ecf31-ce99-566f-9112-d22997a4f42d","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/095ecf31-ce99-566f-9112-d22997a4f42d/attachment.md","path":"references/workflows/competitor-ads.md","size":18956,"sha256":"76c8552e9f284514f24969a9ef6d71ed8aa483f1c15d8fd6d7dbbf8c06574b88","contentType":"text/markdown; charset=utf-8"},{"id":"b1ad853a-ea72-544b-86fa-ce2e78dcbed8","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/b1ad853a-ea72-544b-86fa-ce2e78dcbed8/attachment.md","path":"references/workflows/competitor-intel.md","size":2867,"sha256":"26e9b3330ee1863a16c83ff3aead25ef86ca0734bacfbe7a470e76a29dfe8cf5","contentType":"text/markdown; charset=utf-8"},{"id":"bd4ae641-ea2e-5577-b799-b2a23f8b64e8","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/bd4ae641-ea2e-5577-b799-b2a23f8b64e8/attachment.md","path":"references/workflows/influencer-discovery.md","size":18617,"sha256":"2dd5173b6b0480e238f2c177bc95d5b8fcd3208f52e715631a27109ef8b8434e","contentType":"text/markdown; charset=utf-8"},{"id":"56029bae-9c8f-5fd7-bc0e-6c0ae988a78a","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/56029bae-9c8f-5fd7-bc0e-6c0ae988a78a/attachment.md","path":"references/workflows/lead-generation.md","size":1786,"sha256":"567aea6049d96df98bba920a1e3c84b4ae77a8cd28082e8939af37a94cb51ab5","contentType":"text/markdown; charset=utf-8"},{"id":"71344abd-e905-573b-91f0-fcb17a52108a","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/71344abd-e905-573b-91f0-fcb17a52108a/attachment.md","path":"references/workflows/trend-analysis.md","size":16623,"sha256":"644e823af7a0782fa815a9c937778b2deb294f19cfcafd9214c9efd361fce2f0","contentType":"text/markdown; charset=utf-8"},{"id":"d73d064b-3bf4-5710-9dc2-65b636ea7d45","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/d73d064b-3bf4-5710-9dc2-65b636ea7d45/attachment.py","path":"scripts/analyze_audience.py","size":63408,"sha256":"4d11646bffde099b7060a32fe7d815a7f97470182cdc355fc5cc095ea23a7fdf","contentType":"text/x-python; charset=utf-8"},{"id":"525ebe9e-dde2-5552-9045-1cd512e59e0a","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/525ebe9e-dde2-5552-9045-1cd512e59e0a/attachment.py","path":"scripts/analyze_trends.py","size":74182,"sha256":"b3eaa2cc3c1a4c484129c1b0c983e6dc3f7fdb7d99d7d0053909cce802faf116","contentType":"text/x-python; charset=utf-8"},{"id":"21367fb8-8da2-5b3c-ba1f-cc74f3f1addc","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/21367fb8-8da2-5b3c-ba1f-cc74f3f1addc/attachment.py","path":"scripts/discover_influencers.py","size":47372,"sha256":"d822acff828b2eeaa762addf4202443758108522bbdc3aafe28733c987b32d39","contentType":"text/x-python; charset=utf-8"},{"id":"15e2d6e9-9ea3-5bcc-9e62-3d9ac120b3c6","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/15e2d6e9-9ea3-5bcc-9e62-3d9ac120b3c6/attachment.py","path":"scripts/enrich_contacts.py","size":11293,"sha256":"2d7022bd959568538d4f6845caac66f0a83116f4d21d8c6e604d3afdc07fb523","contentType":"text/x-python; charset=utf-8"},{"id":"a225f396-0b14-572d-988c-4ad8d0112d41","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/a225f396-0b14-572d-988c-4ad8d0112d41/attachment.py","path":"scripts/scrape_competitor_ads.py","size":33254,"sha256":"cf4af4011300abb7c630b18afa6d9ad93538c20924fb631ab535d51b523d43c9","contentType":"text/x-python; charset=utf-8"},{"id":"e3aec214-75cb-5186-b267-d174e7a129dd","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/e3aec214-75cb-5186-b267-d174e7a129dd/attachment.py","path":"scripts/scrape_content_by_url.py","size":17086,"sha256":"64f29d4cade217d3bfceddb77915afb4745f9f5262f68d43dad176f30d8d070c","contentType":"text/x-python; charset=utf-8"},{"id":"45f3c9e2-1b10-5681-86f8-8e16dcda8095","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/45f3c9e2-1b10-5681-86f8-8e16dcda8095/attachment.py","path":"scripts/scrape_facebook.py","size":20314,"sha256":"e88c42018f4d782e66dbc31273440744a40218e2d2767937884622360d8ff242","contentType":"text/x-python; charset=utf-8"},{"id":"1ad1d0bb-8d6d-5fb2-9c70-fb2a43965678","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/1ad1d0bb-8d6d-5fb2-9c70-fb2a43965678/attachment.py","path":"scripts/scrape_google_maps.py","size":19430,"sha256":"c7d4d7ac8929f625931edd470dc83809b8a39a92e81a5e6e2ffba5c21e6b6522","contentType":"text/x-python; charset=utf-8"},{"id":"a9abced5-6377-59f1-a418-d89c37c156fc","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/a9abced5-6377-59f1-a418-d89c37c156fc/attachment.py","path":"scripts/scrape_instagram.py","size":27729,"sha256":"01e47d787cef2a2a21b02e96ae9a15522eeed5b4a0b573ba921484bea9cb8f04","contentType":"text/x-python; charset=utf-8"},{"id":"34eb0532-ae8b-5be5-b0f3-fc306999fbc2","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/34eb0532-ae8b-5be5-b0f3-fc306999fbc2/attachment.py","path":"scripts/scrape_linkedin_posts.py","size":10735,"sha256":"1dd1f5767e112b50095dbb1a79e62d44d636aabdbbf762680332029504d7a9e1","contentType":"text/x-python; charset=utf-8"},{"id":"6e88ce11-e014-5ca4-a43e-a45e2501db6c","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/6e88ce11-e014-5ca4-a43e-a45e2501db6c/attachment.py","path":"scripts/scrape_multi_platform.py","size":8105,"sha256":"5cf1d88c25cecc5cd3028df9caa6fa7c582991ca54cdf0370c2a12d12290ca56","contentType":"text/x-python; charset=utf-8"},{"id":"0ab63960-83c2-5843-85e1-6474d2e2bdda","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/0ab63960-83c2-5843-85e1-6474d2e2bdda/attachment.py","path":"scripts/scrape_reddit_ai_tech.py","size":9978,"sha256":"583874da8e2bad0cf8920b663c5a164fbd11bc3b0827b9e5595df32bd67ae937","contentType":"text/x-python; charset=utf-8"},{"id":"0758e715-aa2e-5996-bfa6-424d932d3e61","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/0758e715-aa2e-5996-bfa6-424d932d3e61/attachment.py","path":"scripts/scrape_twitter_ai_trends.py","size":8229,"sha256":"ece813a0aed29dd6040c1636aa5a23dbde22e0c8202a5bed2c4847f1954cc261","contentType":"text/x-python; charset=utf-8"}],"bundle_sha256":"7629bde12a84ca0bf406ceea3f7c655eebd0ed1e2871bba466166ca3188aa9f0","attachment_count":28,"text_attachments":28,"attachment_storage":"skillopedia-attachments-v1","binary_attachments":0,"excluded_attachments":[]},"cluster_size":1,"skill_md_path":"casper/skills/apify-scrapers/SKILL.md","import_metadata":{"date":"2026-06-05","author":"@skillopedia","version":"v1","category":"browser-automation-scraping","category_label":"Browser"},"exact_dupes_collapsed_into_this":0},"version":"v1","category":"browser-automation-scraping","import_tag":"clean-skills-v1","description":"Social media and web scraping using Apify actors. Use this skill when scraping Twitter/X tweets, Reddit posts, LinkedIn posts, Instagram profiles/posts/reels, Facebook pages/posts/groups, TikTok videos, YouTube content, Google Maps businesses/reviews, contact enrichment (emails/phones from websites), or when auto-detecting URL type to scrape. Triggers on requests to scrape social media, get trending posts, extract business info, find contact details, or extract content from social URLs."}},"renderedAt":1782980862805}

Important: agents should read /llm.txt, /llms.txt, or /.well-known/skills.json to discover the public Skillopedia API.