How can I import JSON data with arrays into SQLite tables using Python?

I’m learning how to work with SQLite databases and JSON parsing in Python. I’ve successfully created a three-table database structure, but I’m stuck on importing data from a JSON file that contains nested arrays.

My main challenge is handling the multiple products within each order. I need to generate unique product IDs for each item while maintaining the relationship with the parent order.

Here’s my database setup code:

import sqlite3
import json

def setup_database(database_path):
    connection = None
    try:
        connection = sqlite3.connect(database_path)
        return connection
    except sqlite3.Error as err:
        print(f"Database error: {err}")
    return connection

def build_table(connection, sql_statement):
    try:
        cursor = connection.cursor()
        cursor.execute(sql_statement)
    except sqlite3.Error as err:
        print(f"Table creation error: {err}")

def initialize_database():
    db_path = "orders_database.db"
    
    products_table = """CREATE TABLE IF NOT EXISTS products (
                        order_number INTEGER,
                        product_id INTEGER PRIMARY KEY,
                        product_name TEXT,
                        cost REAL);"""
    
    billing_table = """CREATE TABLE IF NOT EXISTS billing (
                       order_reference INTEGER,
                       transaction_date TEXT,
                       subtotal_amount REAL,
                       tax_amount REAL,
                       final_total REAL);"""
    
    payment_info_table = """CREATE TABLE IF NOT EXISTS payment_info (
                            order_reference INTEGER,
                            payment_type TEXT,
                            account_digits TEXT,
                            postal_code TEXT,
                            customer_name TEXT,
                            payment_method TEXT);"""
    
    db_connection = setup_database(db_path)
    
    if db_connection:
        build_table(db_connection, products_table)
        build_table(db_connection, billing_table) 
        build_table(db_connection, payment_info_table)
        print("Tables created successfully")
    else:
        print("Failed to establish database connection")

if __name__ == "__main__":
    initialize_database()

Here’s a sample of my JSON structure:

{
  "transactions": [
    {
      "products": [
        {
          "product_name": "latte", 
          "cost": 3.50
        },
        {
          "product_name": "cappuccino",
          "cost": 2.75
        }
      ],
      "billing": {
        "transaction_date": "2021-04-15 14:30",
        "subtotal_amount": 6.25,
        "tax_amount": 0.44,
        "final_total": 6.69
      },
      "payment_info": {
        "payment_type": "mastercard",
        "account_digits": "8934",
        "postal_code": "90210",
        "customer_name": "Sarah Chen",
        "payment_method": "debit_card"
      }
    }
  ]
}

How do I properly iterate through the products array and assign incremental IDs while inserting into my database tables?

You need to manage a global product ID counter while looping through nested arrays. I hit the same issue when processing inventory data with multiple items per transaction. Keep your product ID counter outside the transaction loop and increment it for each product insertion. Here’s what worked for me:

def import_json_data(connection, json_file_path):
    with open(json_file_path, 'r') as file:
        data = json.load(file)
    
    cursor = connection.cursor()
    product_id_counter = 1
    
    for order_index, transaction in enumerate(data['transactions'], 1):
        # Insert billing information
        billing = transaction['billing']
        cursor.execute("INSERT INTO billing VALUES (?, ?, ?, ?, ?)", 
                      (order_index, billing['transaction_date'], billing['subtotal_amount'], 
                       billing['tax_amount'], billing['final_total']))
        
        # Insert payment information
        payment = transaction['payment_info']
        cursor.execute("INSERT INTO payment_info VALUES (?, ?, ?, ?, ?, ?)", 
                      (order_index, payment['payment_type'], payment['account_digits'],
                       payment['postal_code'], payment['customer_name'], payment['payment_method']))
        
        # Insert products with incremental IDs
        for product in transaction['products']:
            cursor.execute("INSERT INTO products VALUES (?, ?, ?, ?)",
                          (order_index, product_id_counter, product['product_name'], product['cost']))
            product_id_counter += 1
    
    connection.commit()

Two key things: use enumerate(data['transactions'], 1) to generate order numbers starting from 1, and keep product_id_counter outside all loops so every product gets a unique ID across the entire dataset.

Exactly what Zack said! Had the same issue with my import script. Query the database first with cursor.execute("SELECT MAX(product_id) FROM products") then start your counter from that number + 1. No more duplicate key errors.

good point! but if u run this script multiple times, won’t product_id_counter always start from 1 and create duplicates? you’d probably wanna grab the max existing product_id first, then start your counter from there.