Files
fhem-extract/scripts/diagnose_file_log.py
2024-08-20 21:24:02 +00:00

117 lines
3.9 KiB
Python

import re
import requests
from bs4 import BeautifulSoup
from requests.exceptions import HTTPError, Timeout, RequestException
# FHEM server URL base
FHEM_URL_BASE = "https://fhem.auwiesen2.de/fhem"
# CSRF token (replace with your actual token)
CSRF_TOKEN = "csrf_611440676390392"
# Headers including CSRF token
HEADERS = {
"X-FHEM-csrfToken": CSRF_TOKEN,
"Content-Type": "application/x-www-form-urlencoded",
"Accept": "text/html"
}
# Session to handle requests
session = requests.Session()
def fetch_device_log(device_id, year):
"""Fetch the log file for the specified device and year."""
log_url = f"{FHEM_URL_BASE}/FileLog_logWrapper&dev=FileLog_{device_id}&type=text&file={device_id}-{year}.log"
try:
response = session.get(log_url, headers=HEADERS, timeout=10)
response.raise_for_status()
soup = BeautifulSoup(response.text, 'html.parser')
# Extract the log data
log_content = soup.find('pre', class_='log')
if log_content:
log_text = log_content.text.strip()
# Only keep lines that start with the expected date format
log_lines = [line for line in log_text.splitlines() if re.match(r'^\d{4}-\d{2}-\d{2}_\d{2}:\d{2}:\d{2}', line)]
return log_lines
else:
print(f"No log data found for device {device_id} in year {year}.")
return None
except (HTTPError, Timeout) as err:
print(f"Error fetching log for device {device_id}: {err}")
return None
except RequestException as req_err:
print(f"An error occurred: {req_err}")
return None
def analyze_log_format(log_lines):
"""Analyze the log lines to propose a table structure."""
if not log_lines:
return None
parameter_patterns = {}
timestamp_pattern = r"(\d{4}-\d{2}-\d{2}_\d{2}:\d{2}:\d{2})"
device_pattern = r"(MA_[a-zA-Z0-9]+)"
for line in log_lines:
match = re.match(fr"{timestamp_pattern} {device_pattern} (.*)", line)
if match:
timestamp = match.group(1)
device_id = match.group(2)
parameters = match.group(3).split()
for param in parameters:
key_value = param.split(":")
if len(key_value) == 2:
key, value = key_value[0], key_value[1]
if key not in parameter_patterns:
if value.isdigit():
parameter_patterns[key] = "INT"
elif re.match(r"^\d+(\.\d+)?$", value):
parameter_patterns[key] = "FLOAT"
else:
parameter_patterns[key] = "VARCHAR(255)"
# Propose table structure
table_structure = {
"device_id": "VARCHAR(255)",
"timestamp": "DATETIME"
}
table_structure.update(parameter_patterns)
return table_structure
def print_table_structure(table_structure):
"""Print the proposed table structure."""
print("Proposed Table Structure:")
print("CREATE TABLE device_logs (")
for column, dtype in table_structure.items():
print(f" {column} {dtype},")
print(" PRIMARY KEY (device_id, timestamp)")
print(");")
def main(device_id, year=2024):
log_lines = fetch_device_log(device_id, year)
if log_lines:
# Print the first 100 lines of the log
print(f"First 100 lines of the log for device {device_id}:\n")
for i, line in enumerate(log_lines[:100]):
print(f"{i + 1}: {line}")
table_structure = analyze_log_format(log_lines)
if table_structure:
print("\nAnalyzing log format...")
print_table_structure(table_structure)
else:
print("No valid log data found to analyze.")
else:
print("No log data retrieved.")
if __name__ == "__main__":
# Example device ID to test with
example_device_id = "MA_030e8b3e5bc3"
main(example_device_id)