Quick Reference

Quick Reference

One-page cheat sheet for common log analysis tasks.

📋 Common Imports

import json                                    # JSON parsing
import re                                      # Regular expressions
import numpy as np                             # Percentiles
from datetime import datetime, timedelta       # Time handling
from collections import defaultdict, Counter   # Advanced dicts
import string                                  # string.punctuation

🔍 Parsing Quick Reference

TaskCode
Parse web log (simple)data = line.strip().split()
ip, status = data[0], int(data[-3])
Parse web log (regex)pattern = r'(?P<IP>\d+\.\d+\.\d+\.\d+).*'
match.groupdict()
Parse JSON lineevent = json.loads(line)
value = event.get("key", default)
Parse delimitedfields = line.strip().split(":")
username, uid = fields[0], fields[2]
Extract bracketedstart = line.find('[')
timestamp = line[start+1:line.find(']')]
Safe nested JSONpod = event.get("obj", {}).get("name", "unknown")

🔢 Counting Quick Reference

TaskBest ApproachCode
Count occurrencesdict.get()counts[key] = counts.get(key, 0) + 1
Multiple countersdefaultdict(int)counts = defaultdict(int)
counts[key] += 1
Top N itemsCounterCounter(items).most_common(N)
Unique items onlysetunique = set()
unique.add(item)
Combine countsCountertotal = count1 + count2

Counting Decision Tree

Need counts? → Yes → Need top N? → Yes → Use Counter
                   ↓                 ↓
                   No ← Multiple?    No → Use sorted(dict.items(), key=lambda x: x[1])
                   ↓
                   Yes → defaultdict(int)
                   ↓
                   No → dict.get()

🎯 Filtering & Grouping Quick Reference

TaskBest ApproachCode
Simple filterList comprehensionerrors = [log for log in logs if log['status'] >= 400]
Complex filterLoop with continueif not (cond1 and cond2): continue
Group by keydefaultdict(list)groups[key].append(item)
First occurrenceDict membershipif key not in dict: dict[key] = value
Unique with filterSet comprehension{item for item in items if condition}

📊 Statistics Quick Reference

TaskCode
Simple averageavg = sum(values) / len(values) if values else 0
Filtered averagetotal = sum(x for x in values if condition)
count = sum(1 for x in values if condition)
avg = total / count if count > 0 else 0
Percentilesimport numpy as np
p95 = np.percentile(values, 95)
Min/Max valuemin(values), max(values)
Find max itemmax(items, key=lambda x: x['field'])
Success rate %rate = (success / total) * 100 if total > 0 else 0

⏱️ Time & Date Quick Reference

TaskCode
Parse ISO timestampdt = datetime.fromisoformat(timestamp_str)
Parse custom formatdt = datetime.strptime(ts, "%d/%b/%Y:%H:%M:%S")
Duration betweenduration = end_time - start_time # timedelta
Average durationtotal = timedelta(0)
total += duration
avg = total / count
Sort by timestampsorted(logs, key=lambda x: datetime.fromisoformat(x['ts']))
Get seconds from timedeltaduration.total_seconds()

📝 Common Task Recipes

Task: Count requests per IP

from collections import defaultdict
ip_counts = defaultdict(int)
for log in logs:
    ip_counts[log['ip']] += 1

Task: Get top 3 most requested endpoints

from collections import Counter
endpoints = [log['endpoint'] for log in logs]
for endpoint, count in Counter(endpoints).most_common(3):
    print(f"{endpoint}: {count}")

Task: Calculate success rate (2xx vs errors)

success = sum(1 for log in logs if 200 <= log['status'] < 300)
total = len(logs)
success_rate = round((success / total) * 100, 2) if total > 0 else 0
print(f"Success rate: {success_rate}%")

Task: Average response size

total_bytes = sum(log['bytes'] for log in logs)
avg_bytes = round(total_bytes / len(logs), 2) if logs else 0

Task: Average latency for successful requests only

successes = [log for log in logs if 200 <= log['status'] < 300]
avg_latency = sum(s['latency'] for s in successes) / len(successes) if successes else 0

Task: P95 and P99 latency for specific endpoint

import numpy as np
latencies = [log['latency'] for log in logs if log['endpoint'] == '/api/checkout']
if latencies:
    p95 = round(np.percentile(latencies, 95), 2)
    p99 = round(np.percentile(latencies, 99), 2)
    print(f"P95: {p95}, P99: {p99}")

Task: Find slowest request

slowest = max(logs, key=lambda x: x['latency'])
print(f"Slowest: {slowest}")

Task: Count unique IPs

unique_ips = {log['ip'] for log in logs}
print(f"Unique IPs: {len(unique_ips)}")

Task: Group logs by status code

from collections import defaultdict
by_status = defaultdict(list)
for log in logs:
    by_status[log['status']].append(log)

Task: Track unique pods with warnings

pods_with_warnings = set()
for event in events:
    if event.get('type') == 'Warning' and event.get('kind') == 'Pod':
        pods_with_warnings.add(event['pod_name'])

Task: Parse log levels and count them

from collections import Counter
import re

pattern = re.compile(r'\[(\w+)\]')
levels = Counter()

with open('app.log') as f:
    for line in f:
        matches = re.findall(pattern, line)
        for match in matches:
            levels[match.upper()] += 1

Task: Average session duration (login to logout)

from datetime import datetime, timedelta

# Assuming sorted logs
active_sessions = {}
total_duration = timedelta(0)
session_count = 0

for log in sorted_logs:
    user = log['user_id']
    action = log['action']

    if action == 'login' and user not in active_sessions:
        active_sessions[user] = datetime.fromisoformat(log['timestamp'])

    elif action == 'logout' and user in active_sessions:
        start = active_sessions[user]
        end = datetime.fromisoformat(log['timestamp'])
        total_duration += (end - start)
        session_count += 1
        del active_sessions[user]

avg_duration = total_duration / session_count if session_count > 0 else timedelta(0)

Task: Find duplicate UIDs

from collections import defaultdict

uid_users = defaultdict(list)
with open('passwd.txt') as f:
    for line in f:
        if not line.strip() or line.startswith('#'):
            continue
        fields = line.split(':')
        uid_users[fields[2]].append(fields[0])

# Print duplicates
for uid, users in uid_users.items():
    if len(users) > 1:
        print(f"Duplicate UID {uid}: {users}")

Task: Count normal vs warning events

import json
from collections import Counter

event_types = Counter()
with open('events.json') as f:
    for line in f:
        event = json.loads(line)
        event_types[event.get('type', 'Unknown')] += 1

print(event_types)  # Counter({'Normal': 150, 'Warning': 45, ...})

Task: Word frequency (top 10)

import re
from collections import Counter

with open('text.txt') as f:
    text = f.read().lower()
    words = re.findall(r'\b\w+\b', text)
    word_counts = Counter(words)

for word, count in word_counts.most_common(10):
    print(f"{word}: {count}")

🚨 Common Pitfalls & Solutions

ProblemBadGood
Division by zeroavg = total / countavg = total / count if count > 0 else 0
Empty linesdata = line.split()cleaned = line.strip()
if not cleaned: continue
Missing JSON keyevent["key"]event.get("key", default)
Nested JSONevent["obj"]["name"]event.get("obj", {}).get("name")
find() returns -1timestamp = line[start:end]try:
start = line.index('[')
except ValueError: continue
Case sensitivityif word == "ERROR"if word.upper() == "ERROR"
Default on max()max(filtered_list)max(filtered_list, default=None)

💡 Performance Tips

TipWhy
Use set for membershipO(1) vs O(n) for list
Compile regex outside looppattern = re.compile(r'...')
List comprehension > append loopFaster and cleaner
Process line-by-line for huge filesMemory efficient
Cache .get() resultsobj = event.get("involvedObject", {})
Use defaultdict for groupingAvoids repeated initialization

🎨 Code Patterns Comparison

Counting Pattern

# Basic dict                          # defaultdict                    # Counter
counts = {}                           from collections import          from collections import Counter
for item in items:                    defaultdict                      counts = Counter(items)
    counts[item] = counts.get(item,   counts = defaultdict(int)        # Get top 3:
                    0) + 1            for item in items:               counts.most_common(3)
                                          counts[item] += 1

Filtering Pattern

# List comprehension (simple)         # Loop (complex)
results = [x for x in items           results = []
           if x > 10]                 for x in items:
                                          if complex_condition(x):
                                              results.append(x)

Grouping Pattern

# Manual dict                         # defaultdict(list)
groups = {}                           from collections import defaultdict
for item in items:                    groups = defaultdict(list)
    key = item['category']            for item in items:
    if key not in groups:                 groups[item['category']].append(item)
        groups[key] = []
    groups[key].append(item)

🔧 Essential One-Liners

# Skip empty lines and comments
if not line.strip() or line.startswith('#'): continue

# Safe nested JSON access
pod_name = event.get("involvedObject", {}).get("name", "unknown")

# Check if status is 2xx
if 200 <= status < 300:

# Convert list to Counter and get top 3
Counter(items).most_common(3)

# Filter and transform in one line
error_ips = [log['ip'] for log in logs if log['status'] >= 400]

# Count items matching condition
count = sum(1 for log in logs if condition)

# Group by key using dict comprehension (if keys known)
groups = {key: [x for x in items if x['key'] == key] for key in unique_keys}

# Sort dict by value descending
sorted(my_dict.items(), key=lambda x: x[1], reverse=True)

# Get unique values from nested structure
unique = {event.get('obj', {}).get('name') for event in events}

# Remove punctuation from text
text = ''.join(c if c not in string.punctuation else ' ' for c in text)

# Better: using regex
words = re.findall(r'\b\w+\b', text.lower())

📖 Decision: Which Approach?

“Should I use dict, defaultdict, or Counter?”

Counting one thing?
    → dict with .get(key, 0) + 1

Multiple counters?
    → defaultdict(int)

Need top N?
    → Counter with .most_common(N)

Need arithmetic (+, -, &, |)?
    → Counter

“Should I use list comprehension or loop?”

Simple one-line condition?
    → List comprehension

Complex multi-line logic?
    → Loop with if/continue

Creating new list?
    → List comprehension

Modifying in place or side effects?
    → Loop

“Should I load all or process line-by-line?”

Need to sort by timestamp?
    → Load all → sort → process

File is huge (GB+)?
    → Process line-by-line

Need to reference other lines?
    → Load all

Single-pass aggregation?
    → Process line-by-line

🎯 Quick Syntax Lookup

# Dictionary operations
d.get(key, default)              # Safe access with default
d.setdefault(key, default)       # Get or set default
d.items()                        # Key-value pairs
d.keys()                         # All keys
d.values()                       # All values

# String operations
s.strip()                        # Remove whitespace
s.split(delimiter)               # Split on delimiter
s.find(substring)                # Index of substring (-1 if not found)
s.index(substring)               # Index or ValueError
s.startswith(prefix)             # Boolean check
s.upper() / s.lower()            # Case conversion

# List operations
len(lst)                         # Length
lst.append(item)                 # Add to end
lst.extend(items)                # Add multiple
sorted(lst, key=func)            # Sort by function
sorted(lst, reverse=True)        # Sort descending
max(lst, key=func)               # Max by function
min(lst, key=func)               # Min by function
sum(lst)                         # Sum all values

# Set operations
s.add(item)                      # Add item
len(s)                           # Count unique
item in s                        # Membership (O(1))
s1 & s2                          # Intersection
s1 | s2                          # Union
s1 - s2                          # Difference

# Counter operations
Counter(items)                   # Count from iterable
c.most_common(N)                 # Top N items
c1 + c2                          # Combine counts
c.update(items)                  # Add more items

# defaultdict operations
defaultdict(int)                 # Auto-init to 0
defaultdict(list)                # Auto-init to []
defaultdict(set)                 # Auto-init to set()
Last updated on