Source code for util.formatting.json_parsing

import json
import re


[docs] def parse_json_from_markdown(markdown_text: str) -> dict: """ Parse JSON from markdown text. Any newline characters inside JSON string values are replaced with "<br>" and trailing commas are removed to produce valid JSON. Args: markdown_text (str): Markdown text. Raises: json.JSONDecodeError: If JSON parsing fails. """ try: # Extract JSON block if present, otherwise use the entire text pattern = re.compile(r"```(?:json)?\s*(.*?)\s*```", re.DOTALL) match = pattern.search(markdown_text) json_text = match.group(1) if match else markdown_text json_text = re.sub(r"[\x00-\x08\x0B\x0C\x0E-\x1F]", "", json_text).strip() # Remove trailing commas before closing braces/brackets json_text = re.sub(r",\s*([}\]])", r"\1", json_text) # Replace newline characters inside all JSON strings with "<br\>" def replace_newlines(match): # match.group(0) includes the surrounding quotes original = match.group(0) inner = original[1:-1] inner = inner.replace("\n", r"<br>") return json.dumps(inner) # This regex matches JSON string literals (keys and values) json_text = re.sub(r'"([^"\\]*(?:\\.[^"\\]*)*)"', replace_newlines, json_text) return json.loads(json_text) except json.JSONDecodeError as e: print(f"Error while parsing JSON: {e}") print(f"Markdown text: {markdown_text}") raise e
DEFAULT_ALLOWED_CHARS = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 \t\n\r!" + \ "\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~" + \ "äöüßñáéíóúàâæçèêëîïôœùûãõ" + \ "ÄÖÜÑÁÉÍÓÚÀÂÆÇÈÊËÎÏÔŒÙÛÃÕ" + \ "±×÷∞∑√πΔ∫≈≠≤≥" + \ "αβγδεζηθικλμνξοπρστυφχψω" + \ "ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ"
[docs] def has_invalid_characters(text: str, allowed_chars: str | None = None) -> bool: allowed = allowed_chars or DEFAULT_ALLOWED_CHARS return any(c not in allowed for c in text)