Linux system administration requires mastery of essential commands for efficient server management. This comprehensive guide covers must-know commands for system administrators and demonstrates how ArgoFusion SSH's batch execution capabilities can dramatically improve operational efficiency.
1. Essential System Monitoring Commands
System monitoring forms the foundation of effective server administration:
Process and Performance Monitoring
# System resource monitoring
htop # Interactive process viewer
top -u username # Show processes for specific user
ps aux | grep nginx # Find specific processes
pstree -p # Show process tree with PIDs
# Memory usage analysis
free -h # Human-readable memory usage
vmstat 1 5 # Virtual memory statistics
cat /proc/meminfo # Detailed memory information
# CPU monitoring
lscpu # CPU architecture info
cat /proc/cpuinfo # Detailed CPU information
iostat -x 1 5 # I/O statistics
sar -u 1 10 # CPU utilization over time
ArgoFusion Batch System Monitoring
# ArgoFusion batch execution for system monitoring (from group_run.py)
class SystemMonitoringCommands:
"""Batch system monitoring across server groups"""
def __init__(self):
self.monitoring_commands = {
'system_overview': [
'uptime',
'free -h',
'df -h',
'who',
'last | head -10'
],
'performance_check': [
'top -b -n 1 | head -20',
'iostat -x 1 1',
'vmstat 1 1',
'sar -u 1 1'
],
'security_audit': [
'last | grep -v "^$" | head -20',
'sudo grep "Failed password" /var/log/auth.log | tail -10',
'netstat -tulpn | grep LISTEN',
'ps aux | grep -E "(ssh|ftp|http)"'
]
}
async def execute_monitoring_suite(self, host_groups, monitoring_type):
"""Execute monitoring commands across multiple host groups"""
try:
commands = self.monitoring_commands.get(monitoring_type, [])
if not commands:
raise ValueError(f"Unknown monitoring type: {monitoring_type}")
results = {}
for group_name, hosts in host_groups.items():
group_results = {}
# Execute commands on all hosts in group concurrently
for host in hosts:
host_results = []
for command in commands:
try:
result = await self.execute_ssh_command(
host, command, timeout=30
)
host_results.append({
'command': command,
'status': 'success',
'output': result['output'],
'execution_time': result['execution_time']
})
except Exception as e:
host_results.append({
'command': command,
'status': 'error',
'error': str(e)
})
group_results[host['hostname']] = host_results
results[group_name] = group_results
# Generate monitoring report
report = await self.generate_monitoring_report(
results, monitoring_type
)
return {
'monitoring_type': monitoring_type,
'execution_time': datetime.now(),
'results': results,
'report': report
}
except Exception as e:
logger.error(f"Batch monitoring execution failed: {str(e)}")
raise
2. File System Management Commands
File system operations are critical for maintaining server storage:
Disk Usage and Management
# Disk space monitoring
df -h # Disk usage by filesystem
du -sh /var/log/* # Directory sizes
du -h --max-depth=1 / # Top-level directory sizes
find / -size +100M # Find files larger than 100MB
# File system operations
lsblk # List block devices
mount | column -t # Show mounted filesystems
fdisk -l # List disk partitions
lsof +D /path # List open files in directory
# File permissions and ownership
chmod 755 /path/to/file # Set file permissions
chown user:group /path/to/file # Change ownership
find /path -perm 777 # Find files with specific permissions
getfacl /path/to/file # Get file ACL
ArgoFusion Automated File System Maintenance
# Automated file system maintenance with ArgoFusion
class FileSystemMaintenanceManager:
"""Automated file system maintenance across server groups"""
def __init__(self):
self.maintenance_tasks = {
'disk_cleanup': [
'sudo apt-get autoremove -y',
'sudo apt-get autoclean',
'sudo journalctl --vacuum-time=30d',
'find /tmp -type f -atime +7 -delete',
'find /var/log -name "*.log" -type f -mtime +30 -delete'
],
'log_rotation': [
'sudo logrotate -f /etc/logrotate.conf',
'sudo systemctl reload rsyslog',
'du -sh /var/log/*'
],
'disk_health_check': [
'df -h',
'du -sh /var/log /tmp /home',
'sudo smartctl -H /dev/sda',
'iostat -x 1 3'
]
}
async def execute_maintenance_workflow(self, server_groups, maintenance_type):
"""Execute maintenance workflow across server groups"""
try:
# Pre-maintenance validation
pre_check = await self.pre_maintenance_validation(server_groups)
if not pre_check['passed']:
raise RuntimeError(f"Pre-maintenance check failed: {pre_check['reason']}")
# Execute maintenance tasks
maintenance_results = {}
for group_name, hosts in server_groups.items():
group_results = []
for host in hosts:
host_result = await self.execute_host_maintenance(
host, maintenance_type
)
group_results.append(host_result)
maintenance_results[group_name] = group_results
# Post-maintenance verification
verification_results = await self.post_maintenance_verification(
server_groups, maintenance_results
)
# Generate maintenance report
report = {
'maintenance_type': maintenance_type,
'executed_at': datetime.now(),
'pre_check': pre_check,
'maintenance_results': maintenance_results,
'verification_results': verification_results,
'success_rate': self.calculate_success_rate(maintenance_results)
}
# Store maintenance record
await self.store_maintenance_record(report)
return report
except Exception as e:
logger.error(f"Maintenance workflow failed: {str(e)}")
raise
async def execute_host_maintenance(self, host, maintenance_type):
"""Execute maintenance tasks on individual host"""
try:
commands = self.maintenance_tasks.get(maintenance_type, [])
results = []
# Create maintenance session
ssh_client = await self.create_maintenance_session(host)
# Execute commands with proper error handling
for command in commands:
try:
result = await self.execute_maintenance_command(
ssh_client, command, timeout=300
)
results.append({
'command': command,
'status': 'success',
'output': result['output'],
'execution_time': result['execution_time']
})
except Exception as e:
results.append({
'command': command,
'status': 'error',
'error': str(e)
})
# Continue with other commands unless critical failure
if self.is_critical_command(command):
break
# Cleanup maintenance session
await self.cleanup_maintenance_session(ssh_client)
return {
'hostname': host['hostname'],
'status': 'completed',
'results': results,
'completion_time': datetime.now()
}
except Exception as e:
logger.error(f"Host maintenance failed for {host['hostname']}: {str(e)}")
return {
'hostname': host['hostname'],
'status': 'failed',
'error': str(e)
}
3. Network Administration Commands
Network management is crucial for server connectivity and security:
Network Diagnostics and Configuration
# Network interface management
ip addr show # Show IP addresses
ip route show # Display routing table
ip link show # Show network interfaces
ethtool eth0 # Ethernet interface details
# Network connectivity testing
ping -c 4 google.com # Test connectivity
traceroute google.com # Trace network path
mtr google.com # Network diagnostic tool
nslookup domain.com # DNS lookup
# Port and connection monitoring
netstat -tulpn # Show listening ports
ss -tulpn # Modern netstat alternative
lsof -i :80 # Show processes using port 80
nmap localhost # Port scan localhost
# Firewall management
sudo ufw status # Ubuntu firewall status
sudo iptables -L # List iptables rules
sudo firewall-cmd --list-all # CentOS/RHEL firewall
4. Service and Process Management
Managing services and processes efficiently across multiple servers:
SystemD Service Management
# Service management
sudo systemctl start service_name # Start service
sudo systemctl stop service_name # Stop service
sudo systemctl restart service_name # Restart service
sudo systemctl reload service_name # Reload configuration
sudo systemctl enable service_name # Enable at boot
sudo systemctl disable service_name # Disable at boot
# Service status and logs
systemctl status service_name # Service status
journalctl -u service_name # Service logs
journalctl -f # Follow system logs
systemctl list-units --failed # Show failed units
# Process management
kill -9 PID # Force kill process
killall process_name # Kill all instances
pkill -f pattern # Kill by pattern
jobs # Show background jobs
nohup command & # Run command in background
ArgoFusion Service Management Automation
# Automated service management with ArgoFusion
class ServiceManagementAutomation:
"""Automated service management across server infrastructure"""
def __init__(self):
self.service_groups = {
'web_services': ['nginx', 'apache2', 'httpd'],
'database_services': ['mysql', 'postgresql', 'redis'],
'monitoring_services': ['prometheus', 'grafana', 'node_exporter'],
'security_services': ['fail2ban', 'ufw', 'ssh']
}
async def execute_service_operation(self, operation, service_group, target_hosts):
"""Execute service operations across multiple hosts"""
try:
services = self.service_groups.get(service_group, [])
if not services:
raise ValueError(f"Unknown service group: {service_group}")
operation_results = {}
for host in target_hosts:
host_results = []
for service in services:
try:
# Check service existence first
check_result = await self.check_service_exists(host, service)
if not check_result['exists']:
host_results.append({
'service': service,
'status': 'skipped',
'reason': 'Service not installed'
})
continue
# Execute operation
if operation == 'restart':
result = await self.restart_service_safely(host, service)
elif operation == 'status':
result = await self.get_service_status(host, service)
elif operation == 'start':
result = await self.start_service(host, service)
elif operation == 'stop':
result = await self.stop_service(host, service)
else:
raise ValueError(f"Unsupported operation: {operation}")
host_results.append({
'service': service,
'status': 'success',
'result': result
})
except Exception as e:
host_results.append({
'service': service,
'status': 'error',
'error': str(e)
})
operation_results[host['hostname']] = host_results
return {
'operation': operation,
'service_group': service_group,
'executed_at': datetime.now(),
'results': operation_results
}
except Exception as e:
logger.error(f"Service operation failed: {str(e)}")
raise
async def restart_service_safely(self, host, service):
"""Safely restart service with health checks"""
try:
# Pre-restart health check
pre_status = await self.get_service_status(host, service)
# Stop service gracefully
stop_result = await self.execute_ssh_command(
host, f"sudo systemctl stop {service}", timeout=30
)
# Wait for graceful shutdown
await asyncio.sleep(2)
# Start service
start_result = await self.execute_ssh_command(
host, f"sudo systemctl start {service}", timeout=30
)
# Post-restart health check
await asyncio.sleep(5) # Allow service to initialize
post_status = await self.get_service_status(host, service)
# Verify service is running
if post_status['active']:
return {
'operation': 'restart',
'service': service,
'pre_status': pre_status,
'post_status': post_status,
'success': True
}
else:
# Attempt to restore previous state if restart failed
if pre_status['active']:
await self.execute_ssh_command(
host, f"sudo systemctl start {service}", timeout=30
)
return {
'operation': 'restart',
'service': service,
'success': False,
'error': 'Service failed to start after restart'
}
except Exception as e:
logger.error(f"Safe service restart failed: {str(e)}")
raise
5. Security and User Management
Security administration commands for maintaining server security:
User and Permission Management
# User management
sudo useradd -m -s /bin/bash username # Add user with home directory
sudo usermod -aG sudo username # Add user to sudo group
sudo passwd username # Change user password
sudo deluser username # Delete user
id username # Show user information
# Group management
sudo groupadd groupname # Create group
sudo usermod -aG groupname username # Add user to group
groups username # Show user's groups
getent group groupname # Show group members
# File permissions and security
find / -perm -4000 2>/dev/null # Find SUID files
find / -perm -2000 2>/dev/null # Find SGID files
find /home -name ".*" -ls # Find hidden files
sudo chage -l username # Show password aging info
6. Log Analysis and Troubleshooting
Effective log analysis is essential for troubleshooting and monitoring:
Log Management Commands
# System logs
sudo tail -f /var/log/syslog # Follow system log
sudo grep "error" /var/log/syslog # Search for errors
journalctl -xe # Recent systemd logs
journalctl -f -u service_name # Follow service logs
# Web server logs
sudo tail -f /var/log/nginx/access.log # Follow nginx access log
sudo tail -f /var/log/nginx/error.log # Follow nginx error log
awk '{print $1}' /var/log/nginx/access.log | sort | uniq -c | sort -nr | head -10
# Security logs
sudo grep "Failed password" /var/log/auth.log
sudo grep "sudo" /var/log/auth.log
last | head -20 # Recent logins
lastb | head -10 # Failed login attempts
ArgoFusion Automated Log Analysis
# Automated log analysis across server groups
class LogAnalysisEngine:
"""Automated log analysis and alerting system"""
def __init__(self):
self.log_patterns = {
'security_events': [
r'Failed password for .* from (\d+\.\d+\.\d+\.\d+)',
r'Invalid user .* from (\d+\.\d+\.\d+\.\d+)',
r'sudo: .* : command not allowed'
],
'system_errors': [
r'ERROR.*',
r'FATAL.*',
r'kernel: .*error.*',
r'Out of memory'
],
'service_issues': [
r'systemd.*failed',
r'nginx.*error',
r'mysql.*error'
]
}
async def analyze_logs_across_servers(self, server_groups, analysis_type, time_range):
"""Analyze logs across multiple server groups"""
try:
patterns = self.log_patterns.get(analysis_type, [])
if not patterns:
raise ValueError(f"Unknown analysis type: {analysis_type}")
analysis_results = {}
for group_name, hosts in server_groups.items():
group_results = []
for host in hosts:
host_analysis = await self.analyze_host_logs(
host, patterns, time_range
)
group_results.append(host_analysis)
analysis_results[group_name] = group_results
# Generate analysis report
report = await self.generate_log_analysis_report(
analysis_results, analysis_type, time_range
)
# Check for critical issues
critical_issues = await self.identify_critical_issues(analysis_results)
if critical_issues:
await self.send_critical_alerts(critical_issues)
return {
'analysis_type': analysis_type,
'time_range': time_range,
'results': analysis_results,
'report': report,
'critical_issues': critical_issues
}
except Exception as e:
logger.error(f"Log analysis failed: {str(e)}")
raise
async def analyze_host_logs(self, host, patterns, time_range):
"""Analyze logs on individual host"""
try:
log_files = [
'/var/log/syslog',
'/var/log/auth.log',
'/var/log/nginx/error.log',
'/var/log/mysql/error.log'
]
host_results = {
'hostname': host['hostname'],
'matches': {},
'summary': {}
}
for log_file in log_files:
# Check if log file exists
check_cmd = f"test -f {log_file} && echo 'exists' || echo 'missing'"
check_result = await self.execute_ssh_command(host, check_cmd)
if 'missing' in check_result['output']:
continue
file_matches = []
for pattern in patterns:
# Search for pattern in log file
search_cmd = f"grep -E '{pattern}' {log_file} | tail -100"
try:
search_result = await self.execute_ssh_command(
host, search_cmd, timeout=60
)
if search_result['output'].strip():
matches = search_result['output'].strip().split('\n')
file_matches.extend([{
'pattern': pattern,
'match': match,
'timestamp': self.extract_timestamp(match)
} for match in matches])
except Exception as e:
logger.warning(f"Pattern search failed: {str(e)}")
continue
if file_matches:
host_results['matches'][log_file] = file_matches
# Generate summary statistics
host_results['summary'] = {
'total_matches': sum(len(matches) for matches in host_results['matches'].values()),
'files_analyzed': len([f for f in log_files if f in host_results['matches']]),
'most_frequent_pattern': self.get_most_frequent_pattern(host_results['matches'])
}
return host_results
except Exception as e:
logger.error(f"Host log analysis failed: {str(e)}")
return {
'hostname': host['hostname'],
'error': str(e)
}
7. Performance Optimization Commands
Commands for optimizing server performance and resource utilization:
System Tuning and Optimization
# CPU and memory optimization
echo 'vm.swappiness=10' | sudo tee -a /etc/sysctl.conf
sudo sysctl -p # Apply sysctl changes
echo never | sudo tee /sys/kernel/mm/transparent_hugepage/enabled
# Network optimization
sudo ethtool -K eth0 gro on # Enable Generic Receive Offload
sudo ethtool -K eth0 tso on # Enable TCP Segmentation Offload
echo 'net.core.rmem_max = 16777216' | sudo tee -a /etc/sysctl.conf
# I/O optimization
echo deadline | sudo tee /sys/block/sda/queue/scheduler
sudo hdparm -tT /dev/sda # Test disk performance
iostat -x 1 10 # Monitor I/O performance
Conclusion
Mastering Linux system administration commands is essential for effective server management. ArgoFusion SSH amplifies this expertise by enabling batch execution across server groups, automated workflows, and comprehensive monitoring capabilities.
Key advantages of using ArgoFusion for Linux administration:
- Batch Execution - Run commands across multiple servers simultaneously
- Automated Workflows - Create repeatable maintenance procedures
- Real-time Monitoring - Track command execution and results
- Error Handling - Graceful handling of failures with rollback capabilities
- Audit Trail - Complete logging of all administrative actions
Supercharge Your Linux Administration
Transform your Linux administration workflow with ArgoFusion SSH:
- • Command Templates - Pre-built templates for common administrative tasks
- • Batch Operations - Execute commands across server groups with one click
- • Automated Scheduling - Schedule maintenance tasks with CRON integration
- • Real-time Output - Monitor command execution across all servers
- • Error Recovery - Automated rollback and error handling
Try the demo or start your free trial to experience efficient Linux administration.