deobfuscating-powershell-obfuscated-malware

Deobfuscating PowerShell Obfuscated Malware Overview PowerShell is heavily abused by malware authors due to its deep Windows integration and powerful scripting capabilities. Obfuscation techniques include string concatenation, Base64 encoding, character substitution, Invoke-Expression layering, SecureString abuse, environment variable manipulation, and tick-mark insertion. Modern malware uses multiple obfuscation layers requiring iterative deobfuscation. Tools like PSDecode, PowerDecode, and PowerPeeler automate much of this process, while manual AST (Abstract Syntax Tree) analysis handles cu…

,\n r'&\\s*\\(\\s*\\

Important: agents should read /llm.txt, /llms.txt, or /.well-known/skills.json to discover the public Skillopedia API.

,\n r'\\|\\s*IEX',\n r'\\|\\s*Invoke-Expression',\n ]\n for pattern in iex_patterns:\n if re.search(pattern, script_content, re.IGNORECASE):\n techniques.append(f\"Invoke-Expression variant: {pattern}\")\n\n # Check for tick-mark obfuscation\n tick_count = script_content.count('`')\n if tick_count > 5:\n techniques.append(f\"Tick-mark Insertion ({tick_count} backticks)\")\n\n # Check for environment variable abuse\n if re.search(r'\\$env:', script_content, re.IGNORECASE):\n env_refs = re.findall(r'\\$env:\\w+', script_content, re.IGNORECASE)\n if len(env_refs) > 2:\n techniques.append(f\"Environment Variable Abuse ({len(env_refs)} refs)\")\n\n # Check for SecureString\n if re.search(r'ConvertTo-SecureString', script_content, re.IGNORECASE):\n techniques.append(\"SecureString Encryption\")\n\n # Check for compression\n if re.search(r'IO\\.Compression|DeflateStream|GZipStream',\n script_content, re.IGNORECASE):\n techniques.append(\"Compression (Deflate/GZip)\")\n\n # Check for XOR encoding\n if re.search(r'-bxor\\s+\\d+', script_content, re.IGNORECASE):\n techniques.append(\"XOR Encoding\")\n\n # Check for Replace chain\n replace_count = len(re.findall(r'\\.Replace\\(', script_content))\n if replace_count > 2:\n techniques.append(f\"Replace Chain ({replace_count} replacements)\")\n\n return techniques\n\n\ndef decode_base64_command(script_content):\n \"\"\"Extract and decode Base64 encoded commands.\"\"\"\n b64_match = re.search(\n r'-[Ee](?:nc(?:odedcommand)?)\\s+([A-Za-z0-9+/=]{20,})',\n script_content, re.IGNORECASE\n )\n if b64_match:\n encoded = b64_match.group(1)\n try:\n decoded = base64.b64decode(encoded).decode('utf-16-le')\n return decoded\n except Exception:\n return None\n return None\n\n\ndef remove_tick_marks(script_content):\n \"\"\"Remove PowerShell tick-mark obfuscation.\"\"\"\n # Remove backticks that are not escape sequences\n escape_chars = {'`n', '`r', '`t', '`a', '`b', '`f', '`v', '`0', '``'}\n result = []\n i = 0\n while i \u003c len(script_content):\n if script_content[i] == '`' and i + 1 \u003c len(script_content):\n pair = script_content[i:i+2]\n if pair in escape_chars:\n result.append(pair)\n i += 2\n else:\n # Skip the backtick, keep the next char\n result.append(script_content[i+1])\n i += 2\n else:\n result.append(script_content[i])\n i += 1\n return ''.join(result)\n\n\ndef resolve_string_concat(script_content):\n \"\"\"Resolve simple string concatenation patterns.\"\"\"\n # Pattern: 'str1' + 'str2'\n pattern = re.compile(r\"'([^']*)'\\s*\\+\\s*'([^']*)'\")\n while pattern.search(script_content):\n script_content = pattern.sub(lambda m: f\"'{m.group(1)}{m.group(2)}'\",\n script_content)\n # Pattern: \"str1\" + \"str2\"\n pattern = re.compile(r'\"([^\"]*)\"\\s*\\+\\s*\"([^\"]*)\"')\n while pattern.search(script_content):\n script_content = pattern.sub(lambda m: f'\"{m.group(1)}{m.group(2)}\"',\n script_content)\n return script_content\n\n\nif __name__ == \"__main__\":\n if len(sys.argv) \u003c 2:\n print(f\"Usage: {sys.argv[0]} \u003cpowershell_script>\")\n sys.exit(1)\n\n with open(sys.argv[1], 'r', errors='replace') as f:\n content = f.read()\n\n print(\"[+] Obfuscation Analysis\")\n print(\"=\" * 60)\n techniques = analyze_obfuscation(content)\n for t in techniques:\n print(f\" - {t}\")\n\n # Attempt automatic deobfuscation\n print(\"\\n[+] Attempting Deobfuscation\")\n print(\"=\" * 60)\n\n # Layer 1: Remove tick marks\n deobfuscated = remove_tick_marks(content)\n\n # Layer 2: Resolve string concatenation\n deobfuscated = resolve_string_concat(deobfuscated)\n\n # Layer 3: Decode Base64\n b64_decoded = decode_base64_command(deobfuscated)\n if b64_decoded:\n print(\"[+] Base64 decoded content:\")\n print(b64_decoded[:2000])\n deobfuscated = b64_decoded\n\n print(f\"\\n[+] Deobfuscated script length: {len(deobfuscated)} chars\")\n output_file = sys.argv[1] + \".deobfuscated.ps1\"\n with open(output_file, 'w') as f:\n f.write(deobfuscated)\n print(f\"[+] Saved to {output_file}\")\n```\n\n### Step 2: Multi-Layer IEX Replacement\n\n```python\nimport subprocess\nimport tempfile\nimport os\n\ndef iex_replacement_deobfuscate(script_content, max_layers=10):\n \"\"\"Iteratively replace IEX with Write-Output to unwrap layers.\"\"\"\n # IEX replacement patterns\n replacements = [\n (r'\\bInvoke-Expression\\b', 'Write-Output'),\n (r'\\bIEX\\b', 'Write-Output'),\n (r'\\|\\s*IEX\\b', '| Write-Output'),\n ]\n\n current = script_content\n layers = []\n\n for layer_num in range(max_layers):\n # Apply IEX replacements\n modified = current\n for pattern, replacement in replacements:\n modified = re.sub(pattern, replacement, modified, flags=re.IGNORECASE)\n\n if modified == current and layer_num > 0:\n print(f\" [+] No more IEX layers found at layer {layer_num}\")\n break\n\n # Write to temp file and execute in constrained PowerShell\n with tempfile.NamedTemporaryFile(mode='w', suffix='.ps1',\n delete=False) as tmp:\n tmp.write(modified)\n tmp_path = tmp.name\n\n try:\n result = subprocess.run(\n ['powershell', '-NoProfile', '-ExecutionPolicy', 'Bypass',\n '-File', tmp_path],\n capture_output=True, text=True, timeout=30\n )\n\n output = result.stdout.strip()\n if output and output != current:\n print(f\" [+] Layer {layer_num + 1}: Unwrapped \"\n f\"{len(output)} chars\")\n layers.append({\n \"layer\": layer_num + 1,\n \"technique\": \"IEX replacement\",\n \"content_length\": len(output),\n })\n current = output\n else:\n break\n\n except subprocess.TimeoutExpired:\n print(f\" [!] Layer {layer_num + 1}: Execution timeout\")\n break\n finally:\n os.unlink(tmp_path)\n\n return current, layers\n```\n\n### Step 3: Extract IOCs from Deobfuscated Script\n\n```python\ndef extract_iocs_from_script(deobfuscated_content):\n \"\"\"Extract indicators of compromise from deobfuscated PowerShell.\"\"\"\n iocs = {\n \"urls\": [],\n \"ips\": [],\n \"domains\": [],\n \"file_paths\": [],\n \"registry_keys\": [],\n \"commands\": [],\n \"base64_blobs\": [],\n }\n\n # URLs\n url_pattern = re.compile(\n r'https?://[^\\s\\'\"\u003c>)\\]]+', re.IGNORECASE\n )\n iocs[\"urls\"] = list(set(url_pattern.findall(deobfuscated_content)))\n\n # IP addresses\n ip_pattern = re.compile(\n r'\\b(?:\\d{1,3}\\.){3}\\d{1,3}\\b'\n )\n iocs[\"ips\"] = list(set(ip_pattern.findall(deobfuscated_content)))\n\n # File paths\n path_pattern = re.compile(\n r'[A-Za-z]:\\\\[^\\s\\'\"\u003c>|]+|'\n r'\\\\\\\\[^\\s\\'\"\u003c>|]+|'\n r'%(?:APPDATA|TEMP|USERPROFILE|PROGRAMFILES)%[^\\s\\'\"\u003c>|]*',\n re.IGNORECASE\n )\n iocs[\"file_paths\"] = list(set(path_pattern.findall(deobfuscated_content)))\n\n # Registry keys\n reg_pattern = re.compile(\n r'(?:HKLM|HKCU|HKCR|HKU|HKCC)(?:\\\\[^\\s\\'\"\u003c>|]+)+',\n re.IGNORECASE\n )\n iocs[\"registry_keys\"] = list(set(reg_pattern.findall(deobfuscated_content)))\n\n # Suspicious commands\n suspicious_cmds = [\n 'New-Object Net.WebClient',\n 'DownloadString', 'DownloadFile', 'DownloadData',\n 'Start-Process', 'Invoke-WebRequest',\n 'New-Object IO.MemoryStream',\n 'Reflection.Assembly',\n 'Add-MpPreference -ExclusionPath',\n 'Set-MpPreference -DisableRealtimeMonitoring',\n 'New-ScheduledTask', 'Register-ScheduledTask',\n ]\n for cmd in suspicious_cmds:\n if cmd.lower() in deobfuscated_content.lower():\n iocs[\"commands\"].append(cmd)\n\n return iocs\n```\n\n## Validation Criteria\n\n- All obfuscation layers identified and classified correctly\n- Base64 encoded commands decoded to readable PowerShell\n- Tick-mark and string concatenation obfuscation resolved\n- IEX replacement reveals next-stage payloads\n- URLs, IPs, and file paths extracted from final deobfuscated stage\n- Deobfuscated script matches observed malware behavior in sandbox\n\n## References\n\n- [PSDecode - PowerShell Deobfuscation](https://github.com/R3MRUM/PSDecode)\n- [PowerDecode - Multi-layer Deobfuscation](https://github.com/Malandrone/PowerDecode)\n- [PowerPeeler - Instruction-level Deobfuscation](https://arxiv.org/html/2406.04027v2)\n- [SentinelOne - Deconstructing PowerShell Obfuscation](https://www.sentinelone.com/blog/deconstructing-powershell-obfuscation-in-malspam-campaigns/)\n- [MITRE ATT&CK T1059.001 - PowerShell](https://attack.mitre.org/techniques/T1059/001/)\n---","attachment_filenames":["assets/template.md","references/api-reference.md","references/standards.md","references/workflows.md","scripts/agent.py","scripts/process.py"],"attachments":[{"filename":"assets/template.md","content":"# PowerShell Deobfuscation Analysis Report\n\n## Report Metadata\n| Field | Value |\n|-------|-------|\n| Report ID | PS-DEOB-YYYY-NNNN |\n| Date | YYYY-MM-DD |\n| Sample Hash (SHA-256) | |\n| Original Filename | |\n| Classification | TLP:AMBER |\n\n## Obfuscation Layers Identified\n\n| Layer | Technique | Description |\n|-------|-----------|-------------|\n| 1 | | |\n| 2 | | |\n| 3 | | |\n\n## Deobfuscation Results\n\n### Layer-by-Layer Breakdown\n| Layer | Input Size | Output Size | Technique Applied |\n|-------|-----------|-------------|-------------------|\n| 1 | bytes | bytes | |\n| 2 | bytes | bytes | |\n\n### Final Deobfuscated Script Summary\n- **Total layers removed**:\n- **Final script purpose**:\n- **Execution method**:\n\n## Extracted IOCs\n\n### URLs\n| URL | Purpose |\n|-----|---------|\n| | Payload download / C2 |\n\n### IP Addresses\n| IP | Context |\n|----|---------|\n| | |\n\n### File System Artifacts\n| Path | Action |\n|------|--------|\n| | Created / Modified / Deleted |\n\n### Registry Keys\n| Key | Action |\n|-----|--------|\n| | Created / Modified |\n\n## Behavioral Analysis\n- **Download behavior**:\n- **Persistence mechanism**:\n- **Evasion techniques**:\n- **Payload type**:\n\n## MITRE ATT&CK Mapping\n| Technique | ID | Evidence |\n|-----------|-----|---------|\n| PowerShell | T1059.001 | Script execution |\n| Obfuscated Files | T1027 | Multi-layer encoding |\n| | | |\n","content_type":"text/markdown; charset=utf-8","language":"markdown","size":1353,"content_sha256":"d408e6a18919eb167381b2cc278332b21c88e74e9477ab1d60b16866889878d9"},{"filename":"references/api-reference.md","content":"# PowerShell Deobfuscation — API Reference\n\n## Libraries\n\n| Library | Install | Purpose |\n|---------|---------|---------|\n| re | stdlib | Regex pattern matching for obfuscation detection |\n| base64 | stdlib | Base64 decoding of encoded commands |\n| pySigma | `pip install pySigma` | Sigma rule generation for detections |\n\n## Common Obfuscation Techniques\n\n| Technique | Pattern | Example |\n|-----------|---------|---------|\n| Base64 Encoding | `-EncodedCommand \u003cb64>` | `powershell -enc SQBFAFgA...` |\n| String Concatenation | `'str1'+'str2'` | `'Inv'+'oke'+'-Exp'+'ression'` |\n| Character Codes | `[char]73+[char]69` | `[char]73` = I, `[char]69` = E |\n| Backtick Escape | `` `I`E`X `` | Backtick breaks keyword detection |\n| Variable Substitution | `$env:COMSPEC` | Use env vars as execution paths |\n| Compression | `IO.Compression.DeflateStream` | Compressed + Base64 payload |\n\n## Detection Event IDs\n\n| Source | Event ID | Description |\n|--------|----------|-------------|\n| PowerShell | 4104 | Script block logging (deobfuscated content) |\n| Sysmon | 1 | Process creation with command line |\n| Defender | 1116 | Malware detection |\n\n## External References\n\n- [Invoke-Obfuscation](https://github.com/danielbohannon/Invoke-Obfuscation)\n- [PSDecode](https://github.com/R3MRUM/PSDecode)\n- [PowerShell ScriptBlock Logging](https://learn.microsoft.com/en-us/powershell/module/microsoft.powershell.core/about/about_logging)\n","content_type":"text/markdown; charset=utf-8","language":"markdown","size":1425,"content_sha256":"b520346e3826be26e1158752d15c902060acd54e2cd2fbdab4dd1a4aaa4a5733"},{"filename":"references/standards.md","content":"# Standards and Frameworks Reference\n\n## PowerShell Obfuscation Taxonomy\n\n### Layer Classification\n| Layer | Technique | Example |\n|-------|-----------|---------|\n| L1 | Base64 EncodedCommand | `powershell -enc SQBFAFgA...` |\n| L2 | String Concatenation | `$a='Inv'+'oke'+'-Ex'+'pression'` |\n| L3 | Character Code Array | `[char[]](73,69,88)-join''` |\n| L4 | Tick-Mark Insertion | `` I`nv`oke-Exp`ress`ion `` |\n| L5 | Environment Variable | `$env:COMSPEC[4,15,25]-join''` |\n| L6 | SecureString | `ConvertTo-SecureString ... -Key` |\n| L7 | Compression + Base64 | `IO.Compression.DeflateStream` |\n| L8 | XOR Encoding | `$bytes | %{ $_ -bxor 0x42 }` |\n| L9 | Replace Chain | `.Replace('abc','I').Replace(...)` |\n| L10 | Format String | `(\"{2}{0}{1}\" -f 'ke-','Ex','Invo')` |\n\n### MITRE ATT&CK Mappings\n| Technique | ID | Description |\n|-----------|-----|------------|\n| Command and Scripting Interpreter: PowerShell | T1059.001 | Malicious PowerShell execution |\n| Obfuscated Files or Information | T1027 | Encoding/encryption of scripts |\n| Deobfuscate/Decode Files | T1140 | Runtime deobfuscation |\n| Ingress Tool Transfer | T1105 | Downloading payloads via PS |\n| System Binary Proxy Execution | T1218 | Using trusted binaries |\n\n## PowerShell AST Node Types for Analysis\n\n### Key Expression Nodes\n- `CommandExpression`: Direct command invocations\n- `InvokeMemberExpression`: Method calls on objects\n- `BinaryExpression`: String concatenation operators\n- `ArrayExpression`: Character array construction\n- `SubExpression`: Nested expression evaluation\n- `ExpandableStringExpression`: String interpolation\n\n## References\n- [PowerShell Language Specification](https://docs.microsoft.com/en-us/powershell/scripting/lang-spec/chapter-01)\n- [Invoke-Obfuscation Framework](https://github.com/danielbohannon/Invoke-Obfuscation)\n- [AMSI Interface Documentation](https://docs.microsoft.com/en-us/windows/win32/amsi/)\n","content_type":"text/markdown; charset=utf-8","language":"markdown","size":1907,"content_sha256":"0ad72afc6f2161a7e0b1ceee335109c62dfeabc67725a7a903862f59fc98eef1"},{"filename":"references/workflows.md","content":"# PowerShell Deobfuscation Workflows\n\n## Workflow 1: Automated Multi-Layer Deobfuscation\n\n```\n[Obfuscated Script] --> [Identify Techniques] --> [Remove Tick Marks]\n |\n v\n [Resolve Concatenation]\n |\n v\n [Decode Base64 Layers]\n |\n v\n [IEX -> Write-Output]\n |\n v\n [Extract Final Payload]\n```\n\n## Workflow 2: AST-Based Analysis\n\n```\n[Script Input] --> [Parse AST] --> [Walk Expression Nodes] --> [Evaluate Expressions]\n |\n v\n [Reconstruct Commands]\n |\n v\n [Extract IOCs]\n```\n\n## Workflow 3: Dynamic Sandbox Deobfuscation\n\n```\n[Obfuscated Script] --> [Execute in Sandbox] --> [Capture ScriptBlock Logs]\n |\n v\n [Event ID 4104 Analysis]\n |\n v\n [Reconstruct Execution Chain]\n```\n\n### Steps:\n1. **Enable Logging**: Enable PowerShell ScriptBlock logging (Event ID 4104)\n2. **Execute**: Run obfuscated script in isolated sandbox\n3. **Collect**: Gather all ScriptBlock log entries\n4. **Reconstruct**: Assemble deobfuscated script from logged blocks\n5. **Extract**: Pull IOCs from the reconstructed clear-text script\n","content_type":"text/markdown; charset=utf-8","language":"markdown","size":2348,"content_sha256":"644ffded2a31cb79b9f0eaf23cf7ff27c718786f5fc9f175b7b97c90764b5834"},{"filename":"scripts/agent.py","content":"#!/usr/bin/env python3\n\"\"\"PowerShell obfuscated malware deobfuscation agent.\"\"\"\n\nimport json\nimport argparse\nimport re\nimport base64\nfrom datetime import datetime\n\n\ndef decode_base64_commands(script_content):\n \"\"\"Find and decode Base64 encoded PowerShell commands.\"\"\"\n decoded = []\n b64_pattern = re.compile(r'-[eE](?:nc(?:odedcommand)?)\\s+([A-Za-z0-9+/=]{20,})')\n for match in b64_pattern.finditer(script_content):\n encoded = match.group(1)\n try:\n raw = base64.b64decode(encoded)\n text = raw.decode(\"utf-16-le\", errors=\"replace\")\n decoded.append({\"encoded\": encoded[:60] + \"...\", \"decoded\": text[:500]})\n except Exception:\n pass\n standalone_b64 = re.compile(r'[\"\\']([A-Za-z0-9+/]{40,}={0,2})[\"\\']')\n for match in standalone_b64.finditer(script_content):\n try:\n raw = base64.b64decode(match.group(1))\n text = raw.decode(\"utf-8\", errors=\"replace\")\n if text.isprintable() or \"http\" in text.lower():\n decoded.append({\"encoded\": match.group(1)[:60] + \"...\", \"decoded\": text[:500]})\n except Exception:\n pass\n return decoded\n\n\ndef deobfuscate_string_concatenation(script_content):\n \"\"\"Resolve string concatenation obfuscation.\"\"\"\n concat_pattern = re.compile(r\"(?:'[^']*'\\s*\\+\\s*){2,}'[^']*'\")\n resolved = []\n for match in concat_pattern.finditer(script_content):\n original = match.group(0)\n parts = re.findall(r\"'([^']*)'\", original)\n result = \"\".join(parts)\n resolved.append({\"obfuscated\": original[:80], \"resolved\": result[:500]})\n return resolved\n\n\ndef detect_obfuscation_techniques(script_content):\n \"\"\"Identify obfuscation techniques used in the script.\"\"\"\n techniques = []\n checks = [\n (r'-[eE](?:nc(?:odedcommand)?)', \"Base64 encoded command\", \"HIGH\"),\n (r'\\[(?:char|int)\\]\\s*\\d+', \"Character code conversion\", \"MEDIUM\"),\n (r'(?:iex|invoke-expression)', \"Invoke-Expression (IEX) execution\", \"HIGH\"),\n (r'\\$\\{[^}]+\\}', \"Variable name obfuscation with braces\", \"LOW\"),\n (r'\\.(?:replace|split|reverse)\\(', \"String manipulation methods\", \"MEDIUM\"),\n (r'-(?:join|split)\\s', \"Array join/split obfuscation\", \"MEDIUM\"),\n (r'(?:Net\\.WebClient|DownloadString|DownloadFile)', \"Web download cradle\", \"CRITICAL\"),\n (r'(?:Start-Process|Invoke-Item|cmd\\s*/c)', \"Process execution\", \"HIGH\"),\n (r'\\[System\\.Convert\\]::FromBase64String', \".NET Base64 decode\", \"HIGH\"),\n (r'(?:gci|ls|dir)\\s+env:', \"Environment variable access\", \"LOW\"),\n ]\n for pattern, name, severity in checks:\n if re.search(pattern, script_content, re.IGNORECASE):\n techniques.append({\"technique\": name, \"severity\": severity})\n return techniques\n\n\ndef extract_iocs(script_content):\n \"\"\"Extract indicators of compromise from deobfuscated content.\"\"\"\n iocs = {\"urls\": [], \"ips\": [], \"domains\": [], \"file_paths\": []}\n url_pattern = re.compile(r'https?://[^\\s\"\\'\u003c>]+')\n ip_pattern = re.compile(r'\\b(?:\\d{1,3}\\.){3}\\d{1,3}\\b')\n path_pattern = re.compile(r'[A-Z]:\\\\[\\w\\\\]+\\.\\w{2,4}|/(?:tmp|var|etc)/[\\w/]+')\n iocs[\"urls\"] = list(set(url_pattern.findall(script_content)))\n iocs[\"ips\"] = list(set(ip_pattern.findall(script_content)))\n iocs[\"file_paths\"] = list(set(path_pattern.findall(script_content)))\n return iocs\n\n\ndef run_analysis(script_path):\n \"\"\"Execute PowerShell deobfuscation analysis.\"\"\"\n print(f\"\\n{'='*60}\")\n print(f\" POWERSHELL MALWARE DEOBFUSCATION\")\n print(f\" File: {script_path}\")\n print(f\" Generated: {datetime.utcnow().isoformat()} UTC\")\n print(f\"{'='*60}\\n\")\n\n with open(script_path, \"r\", errors=\"replace\") as f:\n content = f.read()\n\n techniques = detect_obfuscation_techniques(content)\n print(f\"--- OBFUSCATION TECHNIQUES ({len(techniques)}) ---\")\n for t in techniques:\n print(f\" [{t['severity']}] {t['technique']}\")\n\n b64 = decode_base64_commands(content)\n print(f\"\\n--- BASE64 DECODED ({len(b64)}) ---\")\n for d in b64[:5]:\n print(f\" {d['decoded'][:100]}\")\n\n concat = deobfuscate_string_concatenation(content)\n print(f\"\\n--- STRING CONCAT RESOLVED ({len(concat)}) ---\")\n for c in concat[:5]:\n print(f\" {c['resolved'][:100]}\")\n\n all_decoded = content\n for d in b64:\n all_decoded += \"\\n\" + d[\"decoded\"]\n iocs = extract_iocs(all_decoded)\n print(f\"\\n--- IOCs ---\")\n print(f\" URLs: {iocs['urls'][:5]}\")\n print(f\" IPs: {iocs['ips'][:5]}\")\n print(f\" Paths: {iocs['file_paths'][:5]}\")\n\n return {\"techniques\": techniques, \"decoded_b64\": b64, \"concat\": concat, \"iocs\": iocs}\n\n\ndef main():\n parser = argparse.ArgumentParser(description=\"PowerShell Deobfuscation Agent\")\n parser.add_argument(\"--script\", required=True, help=\"Path to obfuscated PowerShell script\")\n parser.add_argument(\"--output\", help=\"Save report to JSON file\")\n args = parser.parse_args()\n\n report = run_analysis(args.script)\n if args.output:\n with open(args.output, \"w\") as f:\n json.dump(report, f, indent=2, default=str)\n print(f\"\\n[+] Report saved to {args.output}\")\n\n\nif __name__ == \"__main__\":\n main()\n","content_type":"text/x-python; charset=utf-8","language":"python","size":5229,"content_sha256":"6fffa54145859c75d9e2e2d89a26e0974f626c8ec3efff43438d57c66bf31d68"},{"filename":"scripts/process.py","content":"#!/usr/bin/env python3\n\"\"\"\nPowerShell Malware Deobfuscation Script\n\nIdentifies and removes multiple layers of PowerShell obfuscation\nto reveal the underlying malicious payload and extract IOCs.\n\nRequirements:\n pip install regex\n\nUsage:\n python process.py --file obfuscated.ps1 --output deobfuscated.ps1\n python process.py --file obfuscated.ps1 --extract-iocs\n\"\"\"\n\nimport argparse\nimport base64\nimport json\nimport re\nimport sys\nfrom pathlib import Path\n\n\nclass PowerShellDeobfuscator:\n \"\"\"Multi-layer PowerShell deobfuscation engine.\"\"\"\n\n def __init__(self):\n self.layers = []\n self.iocs = {\n \"urls\": set(),\n \"ips\": set(),\n \"domains\": set(),\n \"file_paths\": set(),\n \"registry_keys\": set(),\n \"suspicious_commands\": set(),\n }\n\n def analyze(self, content):\n \"\"\"Identify obfuscation techniques present.\"\"\"\n techniques = []\n\n checks = [\n (r'-[Ee]nc(?:odedcommand)?\\s+[A-Za-z0-9+/=]{20,}',\n \"Base64 EncodedCommand\"),\n (r'\\[Convert\\]::FromBase64String', \"FromBase64String\"),\n (r\"'\\s*\\+\\s*'\", \"String Concatenation (single-quote)\"),\n (r'\"\\s*\\+\\s*\"', \"String Concatenation (double-quote)\"),\n (r'\\[char\\]\\s*\\d+', \"Character Code Casting\"),\n (r'\\[char\\[\\]\\]\\s*\$[\\d,\\s]+\$', \"Character Array\"),\n (r'`[a-zA-Z]', \"Tick-Mark Insertion\"),\n (r'Invoke-Expression', \"Invoke-Expression\"),\n (r'\\bIEX\\b', \"IEX Alias\"),\n (r'\\|\\s*IEX', \"Pipeline IEX\"),\n (r'IO\\.Compression', \"Compression Stream\"),\n (r'-bxor\\s+\\d+', \"XOR Encoding\"),\n (r'\\.Replace\\(', \"Replace Chain\"),\n (r'ConvertTo-SecureString', \"SecureString\"),\n (r'\\$env:', \"Environment Variable\"),\n (r'-f\\s+[\\'\"]', \"Format String Operator\"),\n (r'New-Object\\s+IO\\.MemoryStream', \"MemoryStream\"),\n ]\n\n for pattern, name in checks:\n matches = re.findall(pattern, content, re.IGNORECASE)\n if matches:\n techniques.append({\"technique\": name, \"count\": len(matches)})\n\n return techniques\n\n def deobfuscate(self, content):\n \"\"\"Apply all deobfuscation layers iteratively.\"\"\"\n current = content\n iteration = 0\n\n while iteration \u003c 20:\n previous = current\n\n # Layer: Remove tick marks\n current = self._remove_ticks(current)\n\n # Layer: Resolve string concatenation\n current = self._resolve_concat(current)\n\n # Layer: Decode Base64 EncodedCommand\n current = self._decode_base64_command(current)\n\n # Layer: Decode FromBase64String calls\n current = self._decode_frombase64(current)\n\n # Layer: Resolve character arrays\n current = self._resolve_char_arrays(current)\n\n # Layer: Resolve format strings\n current = self._resolve_format_strings(current)\n\n # Layer: Decompress streams\n current = self._decompress_streams(current)\n\n if current == previous:\n break\n\n self.layers.append({\n \"iteration\": iteration + 1,\n \"length_before\": len(previous),\n \"length_after\": len(current),\n })\n iteration += 1\n\n # Extract IOCs from final result\n self._extract_iocs(current)\n\n return current\n\n def _remove_ticks(self, content):\n \"\"\"Remove backtick obfuscation.\"\"\"\n escape_sequences = {'`n', '`r', '`t', '`a', '`b', '`f', '`v', '`0', '``'}\n result = []\n i = 0\n while i \u003c len(content):\n if content[i] == '`' and i + 1 \u003c len(content):\n pair = content[i:i+2]\n if pair in escape_sequences:\n result.append(pair)\n i += 2\n else:\n result.append(content[i+1])\n i += 2\n else:\n result.append(content[i])\n i += 1\n return ''.join(result)\n\n def _resolve_concat(self, content):\n \"\"\"Resolve string concatenation.\"\"\"\n # Single-quoted concatenation\n pattern = re.compile(r\"'([^']*)'\\s*\\+\\s*'([^']*)'\")\n while pattern.search(content):\n content = pattern.sub(r\"'\\1\\2'\", content)\n\n # Double-quoted concatenation\n pattern = re.compile(r'\"([^\"]*)\"\\s*\\+\\s*\"([^\"]*)\"')\n while pattern.search(content):\n content = pattern.sub(r'\"\\1\\2\"', content)\n\n return content\n\n def _decode_base64_command(self, content):\n \"\"\"Decode -EncodedCommand Base64 arguments.\"\"\"\n pattern = re.compile(\n r'-[Ee]nc(?:odedcommand)?\\s+([A-Za-z0-9+/=]{20,})',\n re.IGNORECASE\n )\n match = pattern.search(content)\n if match:\n try:\n decoded = base64.b64decode(match.group(1)).decode('utf-16-le')\n content = pattern.sub(decoded, content)\n except Exception:\n pass\n return content\n\n def _decode_frombase64(self, content):\n \"\"\"Decode [Convert]::FromBase64String calls.\"\"\"\n pattern = re.compile(\n r\"\\[Convert\\]::FromBase64String\$\\s*['\\\"]([A-Za-z0-9+/=]+)['\\\"]\\s*\$\",\n re.IGNORECASE\n )\n for match in pattern.finditer(content):\n try:\n decoded = base64.b64decode(match.group(1))\n decoded_str = decoded.decode('utf-8', errors='replace')\n content = content.replace(match.group(0), f\"'{decoded_str}'\")\n except Exception:\n pass\n return content\n\n def _resolve_char_arrays(self, content):\n \"\"\"Resolve [char] and [char[]] expressions.\"\"\"\n # [char]NN patterns\n pattern = re.compile(r'\\[char\\]\\s*(\\d+)', re.IGNORECASE)\n for match in pattern.finditer(content):\n try:\n char_val = chr(int(match.group(1)))\n content = content.replace(match.group(0), f\"'{char_val}'\")\n except (ValueError, OverflowError):\n pass\n\n return content\n\n def _resolve_format_strings(self, content):\n \"\"\"Resolve PowerShell format string operator.\"\"\"\n pattern = re.compile(\n r\"\$?\\s*['\\\"](\\{[\\d\\}{\\s]+[^'\\\"]*)['\\\"]\"\n r\"\\s*-f\\s*([^)]+)\$?\",\n re.IGNORECASE\n )\n for match in pattern.finditer(content):\n try:\n fmt_str = match.group(1)\n args_str = match.group(2)\n args = [a.strip().strip(\"'\\\"\") for a in args_str.split(\",\")]\n resolved = fmt_str\n for i, arg in enumerate(args):\n resolved = resolved.replace(f\"{{{i}}}\", arg)\n content = content.replace(match.group(0), f\"'{resolved}'\")\n except Exception:\n pass\n return content\n\n def _decompress_streams(self, content):\n \"\"\"Attempt to decode compressed Base64 payloads.\"\"\"\n import zlib\n import io\n\n b64_pattern = re.compile(r'[A-Za-z0-9+/=]{100,}')\n for match in b64_pattern.finditer(content):\n try:\n raw = base64.b64decode(match.group(0))\n # Try deflate\n decompressed = zlib.decompress(raw, -zlib.MAX_WBITS)\n decoded = decompressed.decode('utf-8', errors='replace')\n if len(decoded) > 50:\n content = content.replace(match.group(0), decoded)\n except Exception:\n try:\n # Try gzip\n raw = base64.b64decode(match.group(0))\n decompressed = zlib.decompress(raw, zlib.MAX_WBITS | 16)\n decoded = decompressed.decode('utf-8', errors='replace')\n if len(decoded) > 50:\n content = content.replace(match.group(0), decoded)\n except Exception:\n pass\n return content\n\n def _extract_iocs(self, content):\n \"\"\"Extract IOCs from deobfuscated content.\"\"\"\n # URLs\n for url in re.findall(r'https?://[^\\s\\'\"\u003c>)\\]]+', content, re.I):\n self.iocs[\"urls\"].add(url)\n\n # IPs\n for ip in re.findall(r'\\b(?:\\d{1,3}\\.){3}\\d{1,3}\\b', content):\n self.iocs[\"ips\"].add(ip)\n\n # File paths\n for path in re.findall(\n r'[A-Za-z]:\\\\[^\\s\\'\"\u003c>|]+', content, re.I\n ):\n self.iocs[\"file_paths\"].add(path)\n\n # Registry keys\n for key in re.findall(\n r'(?:HKLM|HKCU|HKCR)(?:\\\\[^\\s\\'\"\u003c>|]+)+', content, re.I\n ):\n self.iocs[\"registry_keys\"].add(key)\n\n # Suspicious commands\n for cmd in ['DownloadString', 'DownloadFile', 'Invoke-WebRequest',\n 'Start-Process', 'New-ScheduledTask', 'Add-MpPreference',\n 'Reflection.Assembly']:\n if cmd.lower() in content.lower():\n self.iocs[\"suspicious_commands\"].add(cmd)\n\n def get_report(self):\n \"\"\"Generate analysis report.\"\"\"\n return {\n \"layers_processed\": len(self.layers),\n \"layer_details\": self.layers,\n \"iocs\": {k: sorted(v) for k, v in self.iocs.items()},\n }\n\n\ndef main():\n parser = argparse.ArgumentParser(\n description=\"PowerShell Malware Deobfuscator\"\n )\n parser.add_argument(\"--file\", required=True, help=\"Input PS1 file\")\n parser.add_argument(\"--output\", help=\"Output deobfuscated file\")\n parser.add_argument(\"--extract-iocs\", action=\"store_true\",\n help=\"Extract IOCs from result\")\n parser.add_argument(\"--report\", help=\"Save JSON report\")\n\n args = parser.parse_args()\n\n with open(args.file, 'r', errors='replace') as f:\n content = f.read()\n\n deob = PowerShellDeobfuscator()\n\n print(\"[+] Analyzing obfuscation techniques...\")\n techniques = deob.analyze(content)\n for t in techniques:\n print(f\" - {t['technique']} ({t['count']} occurrences)\")\n\n print(f\"\\n[+] Deobfuscating ({len(content)} chars)...\")\n result = deob.deobfuscate(content)\n print(f\"[+] Result: {len(result)} chars\")\n\n if args.output:\n with open(args.output, 'w') as f:\n f.write(result)\n print(f\"[+] Saved to {args.output}\")\n\n report = deob.get_report()\n if args.extract_iocs or args.report:\n print(f\"\\n[+] Extracted IOCs:\")\n for category, values in report[\"iocs\"].items():\n if values:\n print(f\" {category}:\")\n for v in values:\n print(f\" - {v}\")\n\n if args.report:\n with open(args.report, 'w') as f:\n json.dump(report, f, indent=2)\n print(f\"[+] Report saved to {args.report}\")\n\n\nif __name__ == \"__main__\":\n main()\n","content_type":"text/x-python; charset=utf-8","language":"python","size":10939,"content_sha256":"e6a736b9b4cd023f86300d8e10a1eba330f1e3ae0e05483d8843d35ceadb3ffc"}],"content_json":{"type":"doc","content":[{"type":"heading","attrs":{"level":1},"content":[{"text":"Deobfuscating PowerShell Obfuscated Malware","type":"text"}]},{"type":"heading","attrs":{"level":2},"content":[{"text":"Overview","type":"text"}]},{"type":"paragraph","content":[{"text":"PowerShell is heavily abused by malware authors due to its deep Windows integration and powerful scripting capabilities. Obfuscation techniques include string concatenation, Base64 encoding, character substitution, Invoke-Expression layering, SecureString abuse, environment variable manipulation, and tick-mark insertion. Modern malware uses multiple obfuscation layers requiring iterative deobfuscation. Tools like PSDecode, PowerDecode, and PowerPeeler automate much of this process, while manual AST (Abstract Syntax Tree) analysis handles custom obfuscation. PowerPeeler achieves a 95% deobfuscation correctness rate using instruction-level dynamic analysis of expression-related AST nodes.","type":"text"}]},{"type":"heading","attrs":{"level":2},"content":[{"text":"When to Use","type":"text"}]},{"type":"bullet_list","content":[{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"When performing authorized security testing that involves deobfuscating powershell obfuscated malware","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"When analyzing malware samples or attack artifacts in a controlled environment","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"When conducting red team exercises or penetration testing engagements","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"When building detection capabilities based on offensive technique understanding","type":"text"}]}]}]},{"type":"heading","attrs":{"level":2},"content":[{"text":"Prerequisites","type":"text"}]},{"type":"bullet_list","content":[{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Python 3.9+ with ","type":"text"},{"text":"base64","type":"text","marks":[{"type":"code_inline"}]},{"text":", ","type":"text"},{"text":"re","type":"text","marks":[{"type":"code_inline"}]},{"text":", ","type":"text"},{"text":"subprocess","type":"text","marks":[{"type":"code_inline"}]},{"text":" modules","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"PowerShell 5.1+ or PowerShell 7+ (for AST access)","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"PSDecode (","type":"text"},{"text":"Install-Module PSDecode","type":"text","marks":[{"type":"code_inline"}]},{"text":")","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"PowerDecode (https://github.com/Malandrone/PowerDecode)","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Isolated VM or sandbox for safe script execution","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"CyberChef for manual encoding transformations","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Understanding of PowerShell AST and Invoke-Expression patterns","type":"text"}]}]}]},{"type":"heading","attrs":{"level":2},"content":[{"text":"Key Concepts","type":"text"}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"Common Obfuscation Techniques","type":"text"}]},{"type":"paragraph","content":[{"text":"PowerShell malware employs layered obfuscation to evade static detection. String concatenation splits commands across variables (","type":"text"},{"text":"$a='In'+'voke'","type":"text","marks":[{"type":"code_inline"}]},{"text":"). Base64 encoding wraps entire scripts in ","type":"text"},{"text":"-EncodedCommand","type":"text","marks":[{"type":"code_inline"}]},{"text":" parameters. Character code arrays use ","type":"text"},{"text":"[char]","type":"text","marks":[{"type":"code_inline"}]},{"text":" casting (","type":"text"},{"text":"[char[]](73,69,88)|%{$r+=$_}","type":"text","marks":[{"type":"code_inline"}]},{"text":"). Environment variable abuse reads substrings from ","type":"text"},{"text":"$env:","type":"text","marks":[{"type":"code_inline"}]},{"text":" paths. Tick-mark insertion adds backticks between characters that PowerShell ignores (","type":"text"},{"text":"I","type":"text","marks":[{"type":"code_inline"}]},{"text":"nv","type":"text"},{"text":"oke-Exp","type":"text","marks":[{"type":"code_inline"}]},{"text":"ression`). SecureString conversion encrypts strings using ConvertTo-SecureString with embedded keys.","type":"text"}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"AST-Based Deobfuscation","type":"text"}]},{"type":"paragraph","content":[{"text":"PowerShell's Abstract Syntax Tree exposes the parsed structure of scripts regardless of surface-level obfuscation. By walking the AST and evaluating expression nodes, analysts can resolve concatenated strings, decode encoded values, and reconstruct the original commands. PowerPeeler uses this approach at the instruction level, monitoring the execution process to correlate AST nodes with their evaluated results.","type":"text"}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"Dynamic Execution Tracing","type":"text"}]},{"type":"paragraph","content":[{"text":"By replacing ","type":"text"},{"text":"Invoke-Expression","type":"text","marks":[{"type":"code_inline"}]},{"text":" (IEX) with ","type":"text"},{"text":"Write-Output","type":"text","marks":[{"type":"code_inline"}]},{"text":", analysts can safely capture the deobfuscated script content that would normally be executed. This technique works across multiple layers by iteratively replacing IEX calls until the final payload is revealed.","type":"text"}]},{"type":"heading","attrs":{"level":2},"content":[{"text":"Workflow","type":"text"}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"Step 1: Identify Obfuscation Layers","type":"text"}]},{"type":"code_block","attrs":{"wrap":false,"language":"python"},"content":[{"text":"#!/usr/bin/env python3\n\"\"\"Identify and classify PowerShell obfuscation techniques.\"\"\"\nimport re\nimport base64\nimport sys\n\n\ndef analyze_obfuscation(script_content):\n \"\"\"Identify obfuscation techniques used in PowerShell script.\"\"\"\n techniques = []\n\n # Check for Base64 encoded command\n b64_pattern = re.compile(\n r'-[Ee](?:nc(?:odedcommand)?)\\s+([A-Za-z0-9+/=]{20,})',\n re.IGNORECASE\n )\n if b64_pattern.search(script_content):\n techniques.append(\"Base64 EncodedCommand\")\n\n # Check for FromBase64String\n if re.search(r'\\[Convert\\]::FromBase64String', script_content, re.IGNORECASE):\n techniques.append(\"Base64 FromBase64String\")\n\n # Check for string concatenation\n concat_count = script_content.count(\"'+'\") + script_content.count('\"+\"')\n if concat_count > 3:\n techniques.append(f\"String Concatenation ({concat_count} joins)\")\n\n # Check for char array construction\n if re.search(r'\\[char\\]\\s*\\d+', script_content, re.IGNORECASE):\n techniques.append(\"Character Code Array\")\n\n # Check for Invoke-Expression variants\n iex_patterns = [\n r'Invoke-Expression',\n r'\\bIEX\\b',\n r'\\.\\s*\\(\\s*\\

Important: agents should read /llm.txt, /llms.txt, or /.well-known/skills.json to discover the public Skillopedia API.

,\n r'&\\s*\\(\\s*\\

Important: agents should read /llm.txt, /llms.txt, or /.well-known/skills.json to discover the public Skillopedia API.