Fact Check SQL and Python Example with xAI

Artificial Intelligence tools to fact check every post. Grok AI even shared a script that may help others develop a fact checking tool on their site.

When most of our posts were published, AI agents were not widely usable and valid. Myself and our team of humans did their best to eliminate dis-info or false claims. Now with AI going parabolic it's getting very good at creating tools like this.

Here is the SQL architecture and Python example of a great start to a fact checking table for anyone!

CREATE TABLE IF NOT EXISTS fact_check (
    id INT AUTO_INCREMENT PRIMARY KEY,
    post_slug VARCHAR(255) NOT NULL,
    claim TEXT NOT NULL,
    factual_accuracy_score INT CHECK (factual_accuracy_score BETWEEN 1 AND 10),
    recommended_fix TEXT
);

INSERT INTO fact_check (post_slug, claim, factual_accuracy_score, recommended_fix) VALUES
-- From 'about'
('about', 'Utopia Educators was launched in October 2025.', 10, NULL),
('about', 'Subscribing provides access to the full archive and newsletters.', 10, NULL),

-- From 'about-2'
('about-2', 'Utopia Educators was launched in June 2023.', 8, 'Content appears duplicated or outdated; clarify the actual launch date as October 2025 based on recent records.'),

-- From 'tracking-used-books-across-the-world'
('tracking-used-books-across-the-world', 'BookCrossing tracks books via codes left inside.', 10, NULL),
('tracking-used-books-across-the-world', 'The Grass Crown by Colleen McCullough is 1200 pages.', 9, 'The book has approximately 900 pages; verify exact count from edition.'),

-- From 'settling-in-south-florida'
('settling-in-south-florida', 'South Florida offers salty air and sunshine.', 10, NULL),
('settling-in-south-florida', 'Chicago requires massive infrastructure investment.', 9, 'Ongoing debate; cite specific reports for emphasis.'),

-- From 'wordpress-on-gke-with-self-hosted-mysql-mariadb'
('wordpress-on-gke-with-self-hosted-mysql-mariadb', 'Persistent volumes are key for MySQL on GKE.', 10, NULL),
('wordpress-on-gke-with-self-hosted-mysql-mariadb', 'MariaDB tuned for 4GB RAM.', 10, NULL),

-- From 'tracking-finances-in-gnucash'
('tracking-finances-in-gnucash', 'PNC exports QFX files for 90 days.', 10, NULL),
('tracking-finances-in-gnucash', 'Capital One allows QIF downloads up to 1 year.', 10, NULL),
('tracking-finances-in-gnucash', 'GnuCash supports stock portfolio tracking.', 10, NULL),

-- From 'lambrechts-reilly-austrheim-howard-family-tree'
('lambrechts-reilly-austrheim-howard-family-tree', 'Thomas O\'Reilly emigrated from Ireland in 1856.', 10, NULL),
('lambrechts-reilly-austrheim-howard-family-tree', 'Hubert Christian Lambrechts born in Belgium, emigrated 1910.', 10, NULL),
('lambrechts-reilly-austrheim-howard-family-tree', 'Victoria Framberg from Sweden, emigrated 1912.', 10, NULL),
('lambrechts-reilly-austrheim-howard-family-tree', 'Howard family in America since 1718.', 10, NULL),

-- From 'how-to-get-into-reading'
('how-to-get-into-reading', 'Michael Connelly thrillers are good for beginners.', 10, NULL),
('how-to-get-into-reading', 'Harry Potter series engages new readers.', 10, NULL),

-- From 'super'
('super', 'Content is a fictional story set in 2000s.', 10, NULL),
('super', 'MIT students studying CS and EE in early 2000s.', 9, 'Python was emerging but C was standard; accurate for timeline.'),

-- From 'music-player-daemon'
('music-player-daemon', 'MPD broadcasts at 320kbps over HTTP.', 10, NULL),
('music-player-daemon', 'MPD supports Icecast output.', 10, NULL),

-- From 'tiling-window-managers'
('tiling-window-managers', 'i3, AwesomeWM, DWM are popular tiling WMs.', 10, NULL),
('tiling-window-managers', 'Install AwesomeWM on Arch with pacman -S awesome.', 10, NULL),

-- From 'a-short-story'
('a-short-story', 'Fictional tale set in 1920s Chicago with Al Capone.', 10, NULL),
('a-short-story', 'Napoleon arrived in Jamaica via displacement (inaccurate historical fiction).', 5, 'Napoleon\'s actions led to Haitian Revolution, but direct link to Jamaica is fictional; clarify as creative liberty.'),

-- From 'full-moon-ocean-ridge-fl'
('full-moon-ocean-ridge-fl', 'Image depicts full moon in Ocean Ridge, FL.', 10, NULL),

-- From 'unbxd-json-feed-to-csv-python-script'
('unbxd-json-feed-to-csv-python-script', 'Script parses JSON to CSV using Python.', 10, NULL),

-- From 'austrian-economics-and-the-business-cycle'
('austrian-economics-and-the-business-cycle', 'Business cycle caused by credit expansion per Austrian theory.', 10, NULL),
('austrian-economics-and-the-business-cycle', 'TARP bailout in 2008 totaled ~$700B.', 10, NULL),

-- From 'itron-inc-and-smart-infrastructure'
('itron-inc-and-smart-infrastructure', 'Itron manages ComEd smart meters.', 10, NULL),
('itron-inc-and-smart-infrastructure', 'Smart traffic lights in Paris use Itron.', 9, 'Itron involved in smart city projects; confirm specific Paris implementation.'),

-- From 'tj-edwards-philadelphia-eagles'
('tj-edwards-philadelphia-eagles', 'T.J. Edwards plays for Philadelphia Eagles as #57.', 10, NULL),
('tj-edwards-philadelphia-eagles', 'T.J. Edwards born to Cathy and Vince Edwards.', 10, NULL),

-- From 'ron-paul-revival-of-the-liberty-movement'
('ron-paul-revival-of-the-liberty-movement', 'Ron Paul questioned Fed in congressional hearings.', 10, NULL),
('ron-paul-revival-of-the-liberty-movement', 'Tea Party Caucus holds 36 seats.', 8, 'As of 2020; update to current number.'),

-- From 'catching-up-defi-blockchain-advancements'
('catching-up-defi-blockchain-advancements', 'Compound.finance offers supply APY.', 10, NULL),
('catching-up-defi-blockchain-advancements', 'DAI stablecoin at 8.07% APY (as of 2020).', 7, 'Rates fluctuate; historical but cite date.'),

-- From 'reaction-to-world-markets-coronavirus-bubble'
('reaction-to-world-markets-coronavirus-bubble', 'Deflation benefits savers.', 9, 'Austrian view; mainstream economics debates.'),

-- From 'superhuman-brain'
('superhuman-brain', 'CIA spied via unencrypted internet traffic in 2000s.', 7, 'Plausible with historical programs like PRISM; speculative without evidence.');

SELECT * FROM fact_check ORDER BY post_slug, id;
message.txt
message.txt (6 KB)
6 KB

import re
import sqlite3  # For in-memory DB to mimic your MySQL schema
from urllib.parse import quote  # For safe search queries
import requests  # Hypothetical; swap with your search API

# Step 1: Parse SQL dump to extract published posts
def extract_published_posts(sql_dump_file):
    with open(sql_dump_file, 'r') as f:
        content = f.read()
    
    # Find INSERT INTO posts (handle multi-line)
    posts_match = re.search(r'INSERT INTO `posts` VALUES\s*\((.*?)(\);)', content, re.DOTALL | re.MULTILINE)
    if not posts_match:
        print("No posts table found in dump.")
        return []
    
    rows_str = posts_match.group(1)
    rows = re.split(r'\)\s*,\s*\(', rows_str)  # Split rows
    
    published_posts = []
    for row in rows:
        if 'published' in row:  # Filter by status
            slug_match = re.search(r"'slug':'([^']*)'", row)
            title_match = re.search(r"'title':'([^']*)'", row)
            html_match = re.search(r"'html':([^']*?)(?=',|\))", row, re.DOTALL)  # Rough HTML extract
            
            if slug_match:
                published_posts.append({
                    'slug': slug_match.group(1),
                    'title': title_match.group(1) if title_match else 'Untitled',
                    'content': html_match.group(1).strip().replace('\\n', ' ').replace('&#x3C;', '<') if html_match else ''  # Clean HTML-ish
                })
    
    return published_posts[:20]  # Sample limit to save Watts!

# Step 2: Extract claims from content (simple "grammar science" via sentence split + fact filter)
def extract_claims(content):
    # Strip HTML tags roughly
    text = re.sub(r'<[^>]+>', ' ', content)
    # Split sentences
    sentences = re.split(r'(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?)\s', text)
    claims = []
    for sent in sentences:
        sent = sent.strip()
        if sent and len(sent) > 20 and not sent.startswith(('How', 'What', 'Why', 'The question')):  # Heuristic: factual if not interrogative/short
            claims.append(sent[:200] + '...' if len(sent) > 200 else sent)  # Truncate long ones
    return claims[:5]  # Limit per post

# Step 3: Fact-check a claim (mock web scrape; in prod, use API like SerpAPI or DDG)
def fact_check_claim(claim, search_api_key=None):
    query = f"is {claim} factually accurate"  # Prefix for verification search
    # Hypothetical API call (e.g., to DuckDuckGo Instant Answer or Google Custom Search)
    # response = requests.get(f"https://api.duckduckgo.com/?q={quote(query)}&format=json&no_html=1&skip_disambig=1")
    # For demo, simulate with internal knowledge or cached results
    # Score logic: 10 if consensus matches, deduct for contradictions
    
    # Mock scoring based on keyword matches (replace with real NLP like BERT for similarity)
    consensus_score = 10  # Default high
    if 'controversial' in claim.lower() or 'opinion' in claim.lower():
        consensus_score -= 3
    if re.search(r'\d{4}', claim) and 'historical' in claim.lower():  # Date checks
        # Cross-ref with known facts
        pass  # In real: query wiki or fact DB
    
    recommended_fix = None
    if consensus_score < 8:
        recommended_fix = f"Verify with primary sources; current consensus suggests partial inaccuracy."
    
    return {
        'claim': claim,
        'score': max(1, min(10, consensus_score)),  # Clamp 1-10
        'fix': recommended_fix
    }

# Step 4: Build fact_check table (SQLite for demo; adapt to MySQL)
def build_fact_check_table(posts, db_file='fact_check.db'):
    conn = sqlite3.connect(db_file)
    c = conn.cursor()
    c.execute('''
        CREATE TABLE IF NOT EXISTS fact_check (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            post_slug TEXT NOT NULL,
            claim TEXT NOT NULL,
            factual_accuracy_score INTEGER CHECK (factual_accuracy_score BETWEEN 1 AND 10),
            recommended_fix TEXT
        )
    ''')
    
    for post in posts:
        claims = extract_claims(post['content'])
        for claim in claims:
            fact = fact_check_claim(claim)
            c.execute('''
                INSERT INTO fact_check (post_slug, claim, factual_accuracy_score, recommended_fix)
                VALUES (?, ?, ?, ?)
            ''', (post['slug'], fact['claim'], fact['score'], fact['fix']))
    
    conn.commit()
    c.execute('SELECT * FROM fact_check ORDER BY post_slug')
    results = c.fetchall()
    conn.close()
    return results

# Run it!
posts = extract_published_posts('utopia.sql')
if posts:
    results = build_fact_check_table(posts)
    for row in results:
        print(row)
else:
    print("No published posts extracted—check dump for full INSERT.")

Next I will be implement a fact checking web interface for every post with a link on every article.

It is truly amazing what smart prompts can create with artificial intelligence. Being the hypothesis brainstorm for most of the posts on this site, it was ideal training before age of AI.

In your life never stop asking questions, never stop thinking.

Fact Check SQL and Python Example with xAI

UtEd Board

UtEd Board

The JP Morgan Spoofing Scandal: Unraveling a Decade of Market Manipulation in Precious Metals and Treasuries

GST 2.0 - The Toothpaste Revolution

Subscribe to Utopia Educators

Subscribe to Utopia Educators