fhem-extract/scripts/diagnose_file_log.py

import re
import requests
from bs4 import BeautifulSoup
from requests.exceptions import HTTPError, Timeout, RequestException

# FHEM server URL base
FHEM_URL_BASE = "https://fhem.auwiesen2.de/fhem"

# CSRF token (replace with your actual token)
CSRF_TOKEN = "csrf_611440676390392"

# Headers including CSRF token
HEADERS = {
    "X-FHEM-csrfToken": CSRF_TOKEN,
    "Content-Type": "application/x-www-form-urlencoded",
    "Accept": "text/html"
}

# Session to handle requests
session = requests.Session()

def fetch_device_log(device_id, year):
    """Fetch the log file for the specified device and year."""
    log_url = f"{FHEM_URL_BASE}/FileLog_logWrapper&dev=FileLog_{device_id}&type=text&file={device_id}-{year}.log"

    try:
        response = session.get(log_url, headers=HEADERS, timeout=10)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, 'html.parser')

        # Extract the log data
        log_content = soup.find('pre', class_='log')
        if log_content:
            log_text = log_content.text.strip()
            # Only keep lines that start with the expected date format
            log_lines = [line for line in log_text.splitlines() if re.match(r'^\d{4}-\d{2}-\d{2}_\d{2}:\d{2}:\d{2}', line)]
            return log_lines
        else:
            print(f"No log data found for device {device_id} in year {year}.")
            return None

    except (HTTPError, Timeout) as err:
        print(f"Error fetching log for device {device_id}: {err}")
        return None
    except RequestException as req_err:
        print(f"An error occurred: {req_err}")
        return None

def analyze_log_format(log_lines):
    """Analyze the log lines to propose a table structure."""
    if not log_lines:
        return None

    parameter_patterns = {}
    timestamp_pattern = r"(\d{4}-\d{2}-\d{2}_\d{2}:\d{2}:\d{2})"
    device_pattern = r"(MA_[a-zA-Z0-9]+)"

    for line in log_lines:
        match = re.match(fr"{timestamp_pattern} {device_pattern} (.*)", line)
        if match:
            timestamp = match.group(1)
            device_id = match.group(2)
            parameters = match.group(3).split()

            for param in parameters:
                key_value = param.split(":")
                if len(key_value) == 2:
                    key, value = key_value[0], key_value[1]
                    if key not in parameter_patterns:
                        if value.isdigit():
                            parameter_patterns[key] = "INT"
                        elif re.match(r"^\d+(\.\d+)?$", value):
                            parameter_patterns[key] = "FLOAT"
                        else:
                            parameter_patterns[key] = "VARCHAR(255)"

    # Propose table structure
    table_structure = {
        "device_id": "VARCHAR(255)",
        "timestamp": "DATETIME"
    }
    table_structure.update(parameter_patterns)

    return table_structure

def print_table_structure(table_structure):
    """Print the proposed table structure."""
    print("Proposed Table Structure:")
    print("CREATE TABLE device_logs (")
    for column, dtype in table_structure.items():
        print(f"    {column} {dtype},")
    print("    PRIMARY KEY (device_id, timestamp)")
    print(");")

def main(device_id, year=2024):
    log_lines = fetch_device_log(device_id, year)

    if log_lines:
        # Print the first 100 lines of the log
        print(f"First 100 lines of the log for device {device_id}:\n")
        for i, line in enumerate(log_lines[:100]):
            print(f"{i + 1}: {line}")

        table_structure = analyze_log_format(log_lines)
        if table_structure:
            print("\nAnalyzing log format...")
            print_table_structure(table_structure)
        else:
            print("No valid log data found to analyze.")
    else:
        print("No log data retrieved.")

if __name__ == "__main__":
    # Example device ID to test with
    example_device_id = "MA_030e8b3e5bc3"
    main(example_device_id)