Input Validation for Construction Data Overview Validate incoming construction data before processing to catch errors early. Domain-specific validation rules for estimates, schedules, BIM exports, and field data. Validation Framework Core Validator Class Cost Estimate Validation Schedule Validation BIM Data Validation Field Data Validation Usage Examples Integration with DDC Pipeline Resources - Data Quality Best Practices : Validate early, validate often - Construction Data Standards : CSI, IFC, COBie specifications - Error Handling : Always provide actionable suggestions ---

, item['csi_code']):\n result.add_warning(f\"line_items[{i}].csi_code\", f\"Invalid CSI code format: {item['csi_code']}\", suggestion=\"Use format: XX XX XX\")\n\n # Check for zero amounts\n amount = item.get('quantity', 0) * item.get('unit_cost', 0)\n if amount == 0:\n result.add_warning(f\"line_items[{i}]\", \"Line item has zero amount\")\n\n def _validate_totals(self, data: dict, result: ValidationResult):\n if 'line_items' not in data or 'total' not in data:\n return\n\n calculated = sum(\n item.get('quantity', 0) * item.get('unit_cost', 0)\n for item in data['line_items']\n )\n\n declared = data['total']\n variance = abs(calculated - declared)\n\n if variance > 0.01:\n result.add_error(\"total\", f\"Total mismatch: calculated {calculated:.2f}, declared {declared:.2f}\", variance)\n\n def _validate_cost_ranges(self, data: dict, result: ValidationResult):\n gross_area = data['gross_area']\n\n for item in data.get('line_items', []):\n csi_div = item.get('csi_code', '')[:2]\n if csi_div in self.TYPICAL_RANGES:\n amount = item.get('quantity', 0) * item.get('unit_cost', 0)\n cost_per_sf = amount / gross_area if gross_area > 0 else 0\n\n low, high = self.TYPICAL_RANGES[csi_div]\n if cost_per_sf \u003c low * 0.5 or cost_per_sf > high * 2:\n result.add_warning(\n f\"line_items[{item.get('description', 'Unknown')}]\",\n f\"Cost ${cost_per_sf:.2f}/SF outside typical range ${low}-${high}/SF for Division {csi_div}\",\n cost_per_sf,\n \"Review unit costs and quantities\"\n )\n```\n\n### Schedule Validation\n\n```python\nclass ScheduleValidator:\n \"\"\"Validate schedule/planning inputs.\"\"\"\n\n def validate(self, schedule_data: Dict[str, Any]) -> ValidationResult:\n result = ValidationResult(is_valid=True)\n\n # Required fields\n self._validate_required_fields(schedule_data, result)\n\n # Task validation\n if 'tasks' in schedule_data:\n self._validate_tasks(schedule_data['tasks'], result)\n self._validate_dependencies(schedule_data['tasks'], result)\n self._validate_resources(schedule_data['tasks'], result)\n\n return result\n\n def _validate_required_fields(self, data: dict, result: ValidationResult):\n required = ['project_name', 'start_date', 'tasks']\n for field in required:\n if field not in data:\n result.add_error(field, f\"Required field '{field}' is missing\")\n\n def _validate_tasks(self, tasks: list, result: ValidationResult):\n task_ids = set()\n\n for i, task in enumerate(tasks):\n # Check for duplicate IDs\n task_id = task.get('id')\n if task_id in task_ids:\n result.add_error(f\"tasks[{i}].id\", f\"Duplicate task ID: {task_id}\")\n task_ids.add(task_id)\n\n # Check dates\n start = task.get('start_date')\n end = task.get('end_date')\n\n if start and end:\n try:\n start_dt = datetime.fromisoformat(start) if isinstance(start, str) else start\n end_dt = datetime.fromisoformat(end) if isinstance(end, str) else end\n\n if end_dt \u003c start_dt:\n result.add_error(f\"tasks[{i}]\", f\"End date before start date\", f\"{start} -> {end}\")\n\n # Check for unrealistic durations\n duration = (end_dt - start_dt).days\n if duration > 365:\n result.add_warning(f\"tasks[{i}]\", f\"Task duration exceeds 1 year ({duration} days)\")\n if duration == 0 and task.get('type') != 'milestone':\n result.add_warning(f\"tasks[{i}]\", \"Task has zero duration but is not marked as milestone\")\n\n except ValueError as e:\n result.add_error(f\"tasks[{i}]\", f\"Invalid date format: {e}\")\n\n # Check for missing duration\n if not task.get('duration') and not (start and end):\n result.add_error(f\"tasks[{i}]\", \"Task missing duration or start/end dates\")\n\n def _validate_dependencies(self, tasks: list, result: ValidationResult):\n task_ids = {t.get('id') for t in tasks}\n task_dict = {t.get('id'): t for t in tasks}\n\n for task in tasks:\n predecessors = task.get('predecessors', [])\n for pred_id in predecessors:\n # Check predecessor exists\n if pred_id not in task_ids:\n result.add_error(f\"tasks[{task.get('id')}].predecessors\", f\"Predecessor '{pred_id}' does not exist\")\n continue\n\n # Check for logical sequence (if dates available)\n pred = task_dict.get(pred_id)\n if pred and pred.get('end_date') and task.get('start_date'):\n pred_end = datetime.fromisoformat(pred['end_date']) if isinstance(pred['end_date'], str) else pred['end_date']\n task_start = datetime.fromisoformat(task['start_date']) if isinstance(task['start_date'], str) else task['start_date']\n\n if task_start \u003c pred_end:\n result.add_error(\n f\"tasks[{task.get('id')}]\",\n f\"Task starts before predecessor '{pred_id}' ends\",\n f\"Pred ends: {pred_end}, Task starts: {task_start}\"\n )\n\n def _validate_resources(self, tasks: list, result: ValidationResult):\n # Check for resource over-allocation by date\n resource_usage = {}\n\n for task in tasks:\n resources = task.get('resources', [])\n start = task.get('start_date')\n end = task.get('end_date')\n\n if not (resources and start and end):\n continue\n\n # Simplified: just check if any resource assigned to multiple tasks\n for resource in resources:\n res_id = resource.get('id') or resource.get('name')\n if res_id not in resource_usage:\n resource_usage[res_id] = []\n resource_usage[res_id].append({\n 'task': task.get('id'),\n 'start': start,\n 'end': end,\n 'allocation': resource.get('allocation', 100)\n })\n\n # Check allocations\n for res_id, assignments in resource_usage.items():\n if len(assignments) > 1:\n # Simple overlap check\n total_allocation = sum(a['allocation'] for a in assignments)\n if total_allocation > 100:\n result.add_warning(\n f\"resource[{res_id}]\",\n f\"Resource may be over-allocated ({total_allocation}%)\",\n suggestion=\"Check for overlapping assignments\"\n )\n```\n\n### BIM Data Validation\n\n```python\nclass BIMDataValidator:\n \"\"\"Validate BIM export data (IFC, COBie, etc.).\"\"\"\n\n def validate(self, bim_data: Dict[str, Any]) -> ValidationResult:\n result = ValidationResult(is_valid=True)\n\n # Check element data\n if 'elements' in bim_data:\n self._validate_elements(bim_data['elements'], result)\n\n # Check property sets\n if 'property_sets' in bim_data:\n self._validate_properties(bim_data['property_sets'], result)\n\n # Check spatial structure\n if 'spatial_structure' in bim_data:\n self._validate_spatial(bim_data['spatial_structure'], result)\n\n return result\n\n def _validate_elements(self, elements: list, result: ValidationResult):\n guids = set()\n\n for i, elem in enumerate(elements):\n # Check for unique GUIDs\n guid = elem.get('guid')\n if guid in guids:\n result.add_error(f\"elements[{i}].guid\", f\"Duplicate GUID: {guid}\")\n guids.add(guid)\n\n # Check for required properties\n if not elem.get('ifc_type'):\n result.add_warning(f\"elements[{i}]\", \"Element missing IFC type\")\n\n if not elem.get('name'):\n result.add_warning(f\"elements[{i}]\", \"Element missing name\")\n\n # Check geometry\n if not elem.get('geometry') and not elem.get('location'):\n result.add_warning(f\"elements[{i}]\", \"Element has no geometry or location\")\n\n # Check for valid quantities\n for qty_name in ['area', 'volume', 'length']:\n if qty_name in elem and elem[qty_name] \u003c 0:\n result.add_error(f\"elements[{i}].{qty_name}\", f\"Negative {qty_name} value\", elem[qty_name])\n\n def _validate_properties(self, property_sets: list, result: ValidationResult):\n for pset in property_sets:\n pset_name = pset.get('name', 'Unknown')\n\n # Check for empty property sets\n if not pset.get('properties'):\n result.add_warning(f\"property_set[{pset_name}]\", \"Property set has no properties\")\n\n # Check property values\n for prop in pset.get('properties', []):\n if prop.get('value') is None:\n result.add_info(f\"property_set[{pset_name}].{prop.get('name')}\", \"Property has null value\")\n\n def _validate_spatial(self, spatial: dict, result: ValidationResult):\n # Check for proper hierarchy\n if not spatial.get('site'):\n result.add_warning(\"spatial_structure\", \"No site defined\")\n if not spatial.get('building'):\n result.add_warning(\"spatial_structure\", \"No building defined\")\n if not spatial.get('levels') or len(spatial.get('levels', [])) == 0:\n result.add_warning(\"spatial_structure\", \"No levels/floors defined\")\n```\n\n### Field Data Validation\n\n```python\nclass FieldDataValidator:\n \"\"\"Validate field/site data inputs.\"\"\"\n\n def validate(self, field_data: Dict[str, Any]) -> ValidationResult:\n result = ValidationResult(is_valid=True)\n\n # Daily report validation\n if field_data.get('type') == 'daily_report':\n self._validate_daily_report(field_data, result)\n\n # Inspection data\n if field_data.get('type') == 'inspection':\n self._validate_inspection(field_data, result)\n\n # Progress data\n if field_data.get('type') == 'progress':\n self._validate_progress(field_data, result)\n\n return result\n\n def _validate_daily_report(self, data: dict, result: ValidationResult):\n required = ['date', 'weather', 'workforce']\n for field in required:\n if field not in data:\n result.add_error(field, f\"Daily report missing '{field}'\")\n\n # Validate workforce\n if 'workforce' in data:\n total = sum(w.get('count', 0) for w in data['workforce'])\n if total == 0:\n result.add_warning(\"workforce\", \"No workers reported on-site\")\n if total > 500:\n result.add_warning(\"workforce\", f\"Unusually high workforce count: {total}\")\n\n # Validate date\n if 'date' in data:\n try:\n report_date = datetime.fromisoformat(data['date']) if isinstance(data['date'], str) else data['date']\n if report_date > datetime.now():\n result.add_error(\"date\", \"Report date is in the future\")\n except ValueError:\n result.add_error(\"date\", \"Invalid date format\")\n\n def _validate_inspection(self, data: dict, result: ValidationResult):\n required = ['inspection_type', 'date', 'inspector', 'result']\n for field in required:\n if field not in data:\n result.add_error(field, f\"Inspection missing '{field}'\")\n\n # Check result value\n valid_results = ['pass', 'fail', 'conditional', 'not_applicable']\n if data.get('result') and data['result'].lower() not in valid_results:\n result.add_warning(\"result\", f\"Non-standard inspection result: {data['result']}\")\n\n def _validate_progress(self, data: dict, result: ValidationResult):\n # Check percentage values\n if 'percent_complete' in data:\n pct = data['percent_complete']\n if pct \u003c 0 or pct > 100:\n result.add_error(\"percent_complete\", f\"Invalid percentage: {pct}\", suggestion=\"Must be 0-100\")\n\n # Check for regression (if previous value available)\n if 'previous_percent' in data and 'percent_complete' in data:\n if data['percent_complete'] \u003c data['previous_percent']:\n result.add_warning(\"percent_complete\", \"Progress decreased from previous report\",\n f\"{data['previous_percent']}% -> {data['percent_complete']}%\")\n```\n\n## Usage Examples\n\n```python\n# Validate a cost estimate\nestimate = {\n 'project_name': 'Office Building',\n 'estimate_date': '2026-01-15',\n 'gross_area': 50000,\n 'line_items': [\n {'description': 'Concrete', 'csi_code': '03 30 00', 'quantity': 5000, 'unit_cost': 150},\n {'description': 'Steel', 'csi_code': '05 12 00', 'quantity': 200, 'unit_cost': 2500},\n ],\n 'total': 1250000\n}\n\nvalidator = CostEstimateValidator()\nresult = validator.validate(estimate)\nprint(result.to_report())\n\n# Validate before processing\nif result.is_valid:\n process_estimate(estimate)\nelse:\n print(\"Fix errors before processing\")\n for error in result.errors:\n print(f\" - {error.field}: {error.message}\")\n```\n\n## Integration with DDC Pipeline\n\n```python\n# Validate all inputs before pipeline execution\ndef validate_pipeline_inputs(inputs: dict) -> bool:\n validators = {\n 'estimate': CostEstimateValidator(),\n 'schedule': ScheduleValidator(),\n 'bim_data': BIMDataValidator(),\n 'field_data': FieldDataValidator()\n }\n\n all_valid = True\n for input_type, data in inputs.items():\n if input_type in validators:\n result = validators[input_type].validate(data)\n if not result.is_valid:\n print(f\"\\n{input_type.upper()} VALIDATION FAILED:\")\n print(result.to_report())\n all_valid = False\n\n return all_valid\n```\n\n## Resources\n\n- **Data Quality Best Practices**: Validate early, validate often\n- **Construction Data Standards**: CSI, IFC, COBie specifications\n- **Error Handling**: Always provide actionable suggestions\n---","attachment_filenames":["claw.json","instructions.md"],"attachments":[{"filename":"claw.json","content":"{\n \"name\": \"input-validation\",\n \"version\": \"2.0.0\",\n \"description\": \"Validate construction data inputs before processing: cost estimates, schedules, BIM data, field reports. Catch errors early with domain-specific rules.\",\n \"author\": \"datadrivenconstruction\",\n \"license\": \"MIT\",\n \"permissions\": [\n \"filesystem\"\n ],\n \"entry\": \"instructions.md\",\n \"tags\": [\n \"construction\",\n \"estimation\",\n \"BIM\",\n \"cost-management\",\n \"scheduling\"\n ],\n \"models\": [\n \"claude-*\",\n \"gpt-*\"\n ],\n \"minOpenClawVersion\": \"0.8.0\"\n}","content_type":"application/json; charset=utf-8","language":"json","size":541,"content_sha256":"1102a76a82b9282f6d635a31056e0c845006436b9be260e0f376b6d383a670c6"},{"filename":"instructions.md","content":"You are a construction industry assistant specializing in construction project management.\n\nValidate construction data inputs before processing: cost estimates, schedules, BIM data, field reports. Catch errors early with domain-specific rules.\n\nWhen the user asks to create cost estimates or analyze costs:\n1. Gather the required input data from the user\n2. Process the data using the methods described in SKILL.md\n3. Present results in a clear, structured format\n4. Offer follow-up analysis or export options\n\n## Input Format\n- The user provides project data, file paths, or parameters as described in SKILL.md\n- Accept data in common formats: CSV, Excel, JSON, or direct input\n\n## Output Format\n- Present results in structured tables when applicable\n- Include summary statistics and key findings\n- Offer export to Excel/CSV/JSON when relevant\n\n## Key Reference\n- See SKILL.md for detailed implementation code, classes, and methods\n- Follow the patterns and APIs defined in the skill documentation\n\n## Constraints\n- Only use data provided by the user or referenced in the skill\n- Validate inputs before processing\n- Report errors clearly with suggested fixes\n- Follow construction industry standards and best practices\n","content_type":"text/markdown; charset=utf-8","language":"markdown","size":1220,"content_sha256":"ef5bb8ec520fadb5f0e1739800eeb5a40368f55e9135dae3db5bf1d82a180783"}],"content_json":{"type":"doc","content":[{"type":"heading","attrs":{"level":1},"content":[{"text":"Input Validation for Construction Data","type":"text"}]},{"type":"heading","attrs":{"level":2},"content":[{"text":"Overview","type":"text"}]},{"type":"paragraph","content":[{"text":"Validate incoming construction data before processing to catch errors early. Domain-specific validation rules for estimates, schedules, BIM exports, and field data.","type":"text"}]},{"type":"heading","attrs":{"level":2},"content":[{"text":"Validation Framework","type":"text"}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"Core Validator Class","type":"text"}]},{"type":"code_block","attrs":{"wrap":false,"language":"python"},"content":[{"text":"from dataclasses import dataclass, field\nfrom typing import List, Dict, Any, Callable, Optional\nfrom enum import Enum\nimport re\nfrom datetime import datetime\n\nclass ValidationSeverity(Enum):\n ERROR = \"error\" # Must fix, blocks processing\n WARNING = \"warning\" # Should review, allows processing\n INFO = \"info\" # FYI, no action needed\n\n@dataclass\nclass ValidationIssue:\n field: str\n message: str\n severity: ValidationSeverity\n value: Any = None\n suggestion: str = None\n\n@dataclass\nclass ValidationResult:\n is_valid: bool\n issues: List[ValidationIssue] = field(default_factory=list)\n\n def add_error(self, field: str, message: str, value: Any = None, suggestion: str = None):\n self.issues.append(ValidationIssue(field, message, ValidationSeverity.ERROR, value, suggestion))\n self.is_valid = False\n\n def add_warning(self, field: str, message: str, value: Any = None, suggestion: str = None):\n self.issues.append(ValidationIssue(field, message, ValidationSeverity.WARNING, value, suggestion))\n\n def add_info(self, field: str, message: str, value: Any = None):\n self.issues.append(ValidationIssue(field, message, ValidationSeverity.INFO, value))\n\n @property\n def errors(self) -> List[ValidationIssue]:\n return [i for i in self.issues if i.severity == ValidationSeverity.ERROR]\n\n @property\n def warnings(self) -> List[ValidationIssue]:\n return [i for i in self.issues if i.severity == ValidationSeverity.WARNING]\n\n def to_report(self) -> str:\n lines = [\"VALIDATION REPORT\", \"=\" * 50]\n lines.append(f\"Status: {'PASSED' if self.is_valid else 'FAILED'}\")\n lines.append(f\"Errors: {len(self.errors)}, Warnings: {len(self.warnings)}\")\n lines.append(\"\")\n\n for issue in self.issues:\n icon = \"❌\" if issue.severity == ValidationSeverity.ERROR else \"⚠️\" if issue.severity == ValidationSeverity.WARNING else \"ℹ️\"\n lines.append(f\"{icon} [{issue.field}] {issue.message}\")\n if issue.suggestion:\n lines.append(f\" Suggestion: {issue.suggestion}\")\n\n return \"\\n\".join(lines)","type":"text"}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"Cost Estimate Validation","type":"text"}]},{"type":"code_block","attrs":{"wrap":false,"language":"python"},"content":[{"text":"class CostEstimateValidator:\n \"\"\"Validate cost estimate inputs.\"\"\"\n\n # Typical cost ranges per CSI division ($/SF)\n TYPICAL_RANGES = {\n '03': (15, 45), # Concrete\n '04': (8, 25), # Masonry\n '05': (12, 35), # Metals\n '06': (5, 20), # Wood/Plastics\n '07': (8, 30), # Thermal/Moisture\n '08': (15, 50), # Openings\n '09': (10, 40), # Finishes\n '22': (8, 25), # Plumbing\n '23': (12, 40), # HVAC\n '26': (10, 35), # Electrical\n }\n\n def validate(self, estimate_data: Dict[str, Any]) -> ValidationResult:\n result = ValidationResult(is_valid=True)\n\n # Required fields\n self._validate_required_fields(estimate_data, result)\n\n # Line item validation\n if 'line_items' in estimate_data:\n self._validate_line_items(estimate_data['line_items'], result)\n\n # Total validation\n self._validate_totals(estimate_data, result)\n\n # Cost range validation\n if 'gross_area' in estimate_data:\n self._validate_cost_ranges(estimate_data, result)\n\n return result\n\n def _validate_required_fields(self, data: dict, result: ValidationResult):\n required = ['project_name', 'estimate_date', 'line_items', 'total']\n for field in required:\n if field not in data or data[field] is None:\n result.add_error(field, f\"Required field '{field}' is missing\")\n\n def _validate_line_items(self, items: list, result: ValidationResult):\n for i, item in enumerate(items):\n # Check for negative values\n if item.get('quantity', 0) \u003c 0:\n result.add_error(f\"line_items[{i}].quantity\", \"Quantity cannot be negative\", item.get('quantity'))\n\n if item.get('unit_cost', 0) \u003c 0:\n result.add_error(f\"line_items[{i}].unit_cost\", \"Unit cost cannot be negative\", item.get('unit_cost'))\n\n # Check for missing descriptions\n if not item.get('description'):\n result.add_warning(f\"line_items[{i}].description\", \"Line item missing description\")\n\n # Check for valid CSI code\n if item.get('csi_code'):\n if not re.match(r'^\\d{2}\\s?\\d{2}\\s?\\d{2}

Input Validation for Construction Data Overview Validate incoming construction data before processing to catch errors early. Domain-specific validation rules for estimates, schedules, BIM exports, and field data. Validation Framework Core Validator Class Cost Estimate Validation Schedule Validation BIM Data Validation Field Data Validation Usage Examples Integration with DDC Pipeline Resources - Data Quality Best Practices : Validate early, validate often - Construction Data Standards : CSI, IFC, COBie specifications - Error Handling : Always provide actionable suggestions ---

, item['csi_code']):\n result.add_warning(f\"line_items[{i}].csi_code\", f\"Invalid CSI code format: {item['csi_code']}\", suggestion=\"Use format: XX XX XX\")\n\n # Check for zero amounts\n amount = item.get('quantity', 0) * item.get('unit_cost', 0)\n if amount == 0:\n result.add_warning(f\"line_items[{i}]\", \"Line item has zero amount\")\n\n def _validate_totals(self, data: dict, result: ValidationResult):\n if 'line_items' not in data or 'total' not in data:\n return\n\n calculated = sum(\n item.get('quantity', 0) * item.get('unit_cost', 0)\n for item in data['line_items']\n )\n\n declared = data['total']\n variance = abs(calculated - declared)\n\n if variance > 0.01:\n result.add_error(\"total\", f\"Total mismatch: calculated {calculated:.2f}, declared {declared:.2f}\", variance)\n\n def _validate_cost_ranges(self, data: dict, result: ValidationResult):\n gross_area = data['gross_area']\n\n for item in data.get('line_items', []):\n csi_div = item.get('csi_code', '')[:2]\n if csi_div in self.TYPICAL_RANGES:\n amount = item.get('quantity', 0) * item.get('unit_cost', 0)\n cost_per_sf = amount / gross_area if gross_area > 0 else 0\n\n low, high = self.TYPICAL_RANGES[csi_div]\n if cost_per_sf \u003c low * 0.5 or cost_per_sf > high * 2:\n result.add_warning(\n f\"line_items[{item.get('description', 'Unknown')}]\",\n f\"Cost ${cost_per_sf:.2f}/SF outside typical range ${low}-${high}/SF for Division {csi_div}\",\n cost_per_sf,\n \"Review unit costs and quantities\"\n )","type":"text"}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"Schedule Validation","type":"text"}]},{"type":"code_block","attrs":{"wrap":false,"language":"python"},"content":[{"text":"class ScheduleValidator:\n \"\"\"Validate schedule/planning inputs.\"\"\"\n\n def validate(self, schedule_data: Dict[str, Any]) -> ValidationResult:\n result = ValidationResult(is_valid=True)\n\n # Required fields\n self._validate_required_fields(schedule_data, result)\n\n # Task validation\n if 'tasks' in schedule_data:\n self._validate_tasks(schedule_data['tasks'], result)\n self._validate_dependencies(schedule_data['tasks'], result)\n self._validate_resources(schedule_data['tasks'], result)\n\n return result\n\n def _validate_required_fields(self, data: dict, result: ValidationResult):\n required = ['project_name', 'start_date', 'tasks']\n for field in required:\n if field not in data:\n result.add_error(field, f\"Required field '{field}' is missing\")\n\n def _validate_tasks(self, tasks: list, result: ValidationResult):\n task_ids = set()\n\n for i, task in enumerate(tasks):\n # Check for duplicate IDs\n task_id = task.get('id')\n if task_id in task_ids:\n result.add_error(f\"tasks[{i}].id\", f\"Duplicate task ID: {task_id}\")\n task_ids.add(task_id)\n\n # Check dates\n start = task.get('start_date')\n end = task.get('end_date')\n\n if start and end:\n try:\n start_dt = datetime.fromisoformat(start) if isinstance(start, str) else start\n end_dt = datetime.fromisoformat(end) if isinstance(end, str) else end\n\n if end_dt \u003c start_dt:\n result.add_error(f\"tasks[{i}]\", f\"End date before start date\", f\"{start} -> {end}\")\n\n # Check for unrealistic durations\n duration = (end_dt - start_dt).days\n if duration > 365:\n result.add_warning(f\"tasks[{i}]\", f\"Task duration exceeds 1 year ({duration} days)\")\n if duration == 0 and task.get('type') != 'milestone':\n result.add_warning(f\"tasks[{i}]\", \"Task has zero duration but is not marked as milestone\")\n\n except ValueError as e:\n result.add_error(f\"tasks[{i}]\", f\"Invalid date format: {e}\")\n\n # Check for missing duration\n if not task.get('duration') and not (start and end):\n result.add_error(f\"tasks[{i}]\", \"Task missing duration or start/end dates\")\n\n def _validate_dependencies(self, tasks: list, result: ValidationResult):\n task_ids = {t.get('id') for t in tasks}\n task_dict = {t.get('id'): t for t in tasks}\n\n for task in tasks:\n predecessors = task.get('predecessors', [])\n for pred_id in predecessors:\n # Check predecessor exists\n if pred_id not in task_ids:\n result.add_error(f\"tasks[{task.get('id')}].predecessors\", f\"Predecessor '{pred_id}' does not exist\")\n continue\n\n # Check for logical sequence (if dates available)\n pred = task_dict.get(pred_id)\n if pred and pred.get('end_date') and task.get('start_date'):\n pred_end = datetime.fromisoformat(pred['end_date']) if isinstance(pred['end_date'], str) else pred['end_date']\n task_start = datetime.fromisoformat(task['start_date']) if isinstance(task['start_date'], str) else task['start_date']\n\n if task_start \u003c pred_end:\n result.add_error(\n f\"tasks[{task.get('id')}]\",\n f\"Task starts before predecessor '{pred_id}' ends\",\n f\"Pred ends: {pred_end}, Task starts: {task_start}\"\n )\n\n def _validate_resources(self, tasks: list, result: ValidationResult):\n # Check for resource over-allocation by date\n resource_usage = {}\n\n for task in tasks:\n resources = task.get('resources', [])\n start = task.get('start_date')\n end = task.get('end_date')\n\n if not (resources and start and end):\n continue\n\n # Simplified: just check if any resource assigned to multiple tasks\n for resource in resources:\n res_id = resource.get('id') or resource.get('name')\n if res_id not in resource_usage:\n resource_usage[res_id] = []\n resource_usage[res_id].append({\n 'task': task.get('id'),\n 'start': start,\n 'end': end,\n 'allocation': resource.get('allocation', 100)\n })\n\n # Check allocations\n for res_id, assignments in resource_usage.items():\n if len(assignments) > 1:\n # Simple overlap check\n total_allocation = sum(a['allocation'] for a in assignments)\n if total_allocation > 100:\n result.add_warning(\n f\"resource[{res_id}]\",\n f\"Resource may be over-allocated ({total_allocation}%)\",\n suggestion=\"Check for overlapping assignments\"\n )","type":"text"}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"BIM Data Validation","type":"text"}]},{"type":"code_block","attrs":{"wrap":false,"language":"python"},"content":[{"text":"class BIMDataValidator:\n \"\"\"Validate BIM export data (IFC, COBie, etc.).\"\"\"\n\n def validate(self, bim_data: Dict[str, Any]) -> ValidationResult:\n result = ValidationResult(is_valid=True)\n\n # Check element data\n if 'elements' in bim_data:\n self._validate_elements(bim_data['elements'], result)\n\n # Check property sets\n if 'property_sets' in bim_data:\n self._validate_properties(bim_data['property_sets'], result)\n\n # Check spatial structure\n if 'spatial_structure' in bim_data:\n self._validate_spatial(bim_data['spatial_structure'], result)\n\n return result\n\n def _validate_elements(self, elements: list, result: ValidationResult):\n guids = set()\n\n for i, elem in enumerate(elements):\n # Check for unique GUIDs\n guid = elem.get('guid')\n if guid in guids:\n result.add_error(f\"elements[{i}].guid\", f\"Duplicate GUID: {guid}\")\n guids.add(guid)\n\n # Check for required properties\n if not elem.get('ifc_type'):\n result.add_warning(f\"elements[{i}]\", \"Element missing IFC type\")\n\n if not elem.get('name'):\n result.add_warning(f\"elements[{i}]\", \"Element missing name\")\n\n # Check geometry\n if not elem.get('geometry') and not elem.get('location'):\n result.add_warning(f\"elements[{i}]\", \"Element has no geometry or location\")\n\n # Check for valid quantities\n for qty_name in ['area', 'volume', 'length']:\n if qty_name in elem and elem[qty_name] \u003c 0:\n result.add_error(f\"elements[{i}].{qty_name}\", f\"Negative {qty_name} value\", elem[qty_name])\n\n def _validate_properties(self, property_sets: list, result: ValidationResult):\n for pset in property_sets:\n pset_name = pset.get('name', 'Unknown')\n\n # Check for empty property sets\n if not pset.get('properties'):\n result.add_warning(f\"property_set[{pset_name}]\", \"Property set has no properties\")\n\n # Check property values\n for prop in pset.get('properties', []):\n if prop.get('value') is None:\n result.add_info(f\"property_set[{pset_name}].{prop.get('name')}\", \"Property has null value\")\n\n def _validate_spatial(self, spatial: dict, result: ValidationResult):\n # Check for proper hierarchy\n if not spatial.get('site'):\n result.add_warning(\"spatial_structure\", \"No site defined\")\n if not spatial.get('building'):\n result.add_warning(\"spatial_structure\", \"No building defined\")\n if not spatial.get('levels') or len(spatial.get('levels', [])) == 0:\n result.add_warning(\"spatial_structure\", \"No levels/floors defined\")","type":"text"}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"Field Data Validation","type":"text"}]},{"type":"code_block","attrs":{"wrap":false,"language":"python"},"content":[{"text":"class FieldDataValidator:\n \"\"\"Validate field/site data inputs.\"\"\"\n\n def validate(self, field_data: Dict[str, Any]) -> ValidationResult:\n result = ValidationResult(is_valid=True)\n\n # Daily report validation\n if field_data.get('type') == 'daily_report':\n self._validate_daily_report(field_data, result)\n\n # Inspection data\n if field_data.get('type') == 'inspection':\n self._validate_inspection(field_data, result)\n\n # Progress data\n if field_data.get('type') == 'progress':\n self._validate_progress(field_data, result)\n\n return result\n\n def _validate_daily_report(self, data: dict, result: ValidationResult):\n required = ['date', 'weather', 'workforce']\n for field in required:\n if field not in data:\n result.add_error(field, f\"Daily report missing '{field}'\")\n\n # Validate workforce\n if 'workforce' in data:\n total = sum(w.get('count', 0) for w in data['workforce'])\n if total == 0:\n result.add_warning(\"workforce\", \"No workers reported on-site\")\n if total > 500:\n result.add_warning(\"workforce\", f\"Unusually high workforce count: {total}\")\n\n # Validate date\n if 'date' in data:\n try:\n report_date = datetime.fromisoformat(data['date']) if isinstance(data['date'], str) else data['date']\n if report_date > datetime.now():\n result.add_error(\"date\", \"Report date is in the future\")\n except ValueError:\n result.add_error(\"date\", \"Invalid date format\")\n\n def _validate_inspection(self, data: dict, result: ValidationResult):\n required = ['inspection_type', 'date', 'inspector', 'result']\n for field in required:\n if field not in data:\n result.add_error(field, f\"Inspection missing '{field}'\")\n\n # Check result value\n valid_results = ['pass', 'fail', 'conditional', 'not_applicable']\n if data.get('result') and data['result'].lower() not in valid_results:\n result.add_warning(\"result\", f\"Non-standard inspection result: {data['result']}\")\n\n def _validate_progress(self, data: dict, result: ValidationResult):\n # Check percentage values\n if 'percent_complete' in data:\n pct = data['percent_complete']\n if pct \u003c 0 or pct > 100:\n result.add_error(\"percent_complete\", f\"Invalid percentage: {pct}\", suggestion=\"Must be 0-100\")\n\n # Check for regression (if previous value available)\n if 'previous_percent' in data and 'percent_complete' in data:\n if data['percent_complete'] \u003c data['previous_percent']:\n result.add_warning(\"percent_complete\", \"Progress decreased from previous report\",\n f\"{data['previous_percent']}% -> {data['percent_complete']}%\")","type":"text"}]},{"type":"heading","attrs":{"level":2},"content":[{"text":"Usage Examples","type":"text"}]},{"type":"code_block","attrs":{"wrap":false,"language":"python"},"content":[{"text":"# Validate a cost estimate\nestimate = {\n 'project_name': 'Office Building',\n 'estimate_date': '2026-01-15',\n 'gross_area': 50000,\n 'line_items': [\n {'description': 'Concrete', 'csi_code': '03 30 00', 'quantity': 5000, 'unit_cost': 150},\n {'description': 'Steel', 'csi_code': '05 12 00', 'quantity': 200, 'unit_cost': 2500},\n ],\n 'total': 1250000\n}\n\nvalidator = CostEstimateValidator()\nresult = validator.validate(estimate)\nprint(result.to_report())\n\n# Validate before processing\nif result.is_valid:\n process_estimate(estimate)\nelse:\n print(\"Fix errors before processing\")\n for error in result.errors:\n print(f\" - {error.field}: {error.message}\")","type":"text"}]},{"type":"heading","attrs":{"level":2},"content":[{"text":"Integration with DDC Pipeline","type":"text"}]},{"type":"code_block","attrs":{"wrap":false,"language":"python"},"content":[{"text":"# Validate all inputs before pipeline execution\ndef validate_pipeline_inputs(inputs: dict) -> bool:\n validators = {\n 'estimate': CostEstimateValidator(),\n 'schedule': ScheduleValidator(),\n 'bim_data': BIMDataValidator(),\n 'field_data': FieldDataValidator()\n }\n\n all_valid = True\n for input_type, data in inputs.items():\n if input_type in validators:\n result = validators[input_type].validate(data)\n if not result.is_valid:\n print(f\"\\n{input_type.upper()} VALIDATION FAILED:\")\n print(result.to_report())\n all_valid = False\n\n return all_valid","type":"text"}]},{"type":"heading","attrs":{"level":2},"content":[{"text":"Resources","type":"text"}]},{"type":"bullet_list","content":[{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Data Quality Best Practices","type":"text","marks":[{"type":"strong"}]},{"text":": Validate early, validate often","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Construction Data Standards","type":"text","marks":[{"type":"strong"}]},{"text":": CSI, IFC, COBie specifications","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Error Handling","type":"text","marks":[{"type":"strong"}]},{"text":": Always provide actionable suggestions","type":"text"}]}]}]},{"type":"hr","attrs":{"markup":"---"}}]},"metadata":{"date":"2026-06-05","name":"input-validation","author":"@skillopedia","source":{"stars":155,"repo_name":"ddc_skills_for_ai_agents_in_construction","origin_url":"https://github.com/datadrivenconstruction/ddc_skills_for_ai_agents_in_construction/blob/HEAD/4_DDC_Curated/Data-Validation/input-validation/SKILL.md","repo_owner":"datadrivenconstruction","body_sha256":"9573b9d6d7605116b79fd9db89d4a46caadde7776c3df2332e1bd0191d0b91b6","cluster_key":"dec0154ee4927acddb217364fd53ffe8eef6db04535814275f8ff20e5b9d3ee9","clean_bundle":{"format":"clean-skill-bundle-v1","source":"datadrivenconstruction/ddc_skills_for_ai_agents_in_construction/4_DDC_Curated/Data-Validation/input-validation/SKILL.md","attachments":[{"id":"3bd5bc7e-9a22-5eca-9ab0-bee5905fa76a","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/3bd5bc7e-9a22-5eca-9ab0-bee5905fa76a/attachment.json","path":"claw.json","size":541,"sha256":"1102a76a82b9282f6d635a31056e0c845006436b9be260e0f376b6d383a670c6","contentType":"application/json; charset=utf-8"},{"id":"e1ea3390-56e0-5b6d-a2fc-e31fcef4cbe0","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/e1ea3390-56e0-5b6d-a2fc-e31fcef4cbe0/attachment.md","path":"instructions.md","size":1220,"sha256":"ef5bb8ec520fadb5f0e1739800eeb5a40368f55e9135dae3db5bf1d82a180783","contentType":"text/markdown; charset=utf-8"}],"bundle_sha256":"6ce47d57428a6c3e96b5b44903d681f1ccabc594185830bdae9fcb6e5eb0bdc7","attachment_count":2,"text_attachments":2,"attachment_storage":"skillopedia-attachments-v1","binary_attachments":0,"excluded_attachments":[]},"cluster_size":1,"skill_md_path":"4_DDC_Curated/Data-Validation/input-validation/SKILL.md","import_metadata":{"date":"2026-06-05","author":"@skillopedia","version":"v1","category":"data-analytics","category_label":"Data"},"exact_dupes_collapsed_into_this":0},"version":"v1","category":"data-analytics","homepage":"https://datadrivenconstruction.io","metadata":{"openclaw":{"os":["darwin","linux","win32"],"emoji":"✔️","homepage":"https://datadrivenconstruction.io","requires":{"bins":["python3"]}}},"import_tag":"clean-skills-v1","description":"Validate construction data inputs before processing: cost estimates, schedules, BIM data, field reports. Catch errors early with domain-specific rules."}},"renderedAt":1782979368139}

Input Validation for Construction Data Overview Validate incoming construction data before processing to catch errors early. Domain-specific validation rules for estimates, schedules, BIM exports, and field data. Validation Framework Core Validator Class Cost Estimate Validation Schedule Validation BIM Data Validation Field Data Validation Usage Examples Integration with DDC Pipeline Resources - Data Quality Best Practices : Validate early, validate often - Construction Data Standards : CSI, IFC, COBie specifications - Error Handling : Always provide actionable suggestions ---