import os
import glob
import re

folders = ['alibaba_orders', 'alipay_orders', 'dollar_orders', 'western', 'western 2', 'wester_orders', 'requests']
base_dir = r"d:\Work\sama-bag-main\admin"
output_file = r"C:\Users\Abdo Tony\.gemini\antigravity-ide\brain\892f4f73-418b-4360-986a-9e44bc954f4c\artifacts\analysis_results.md"

files_to_check = ['print.php', 'requestes.php', 'receipt_document.php', 'document.php', 'small_print.php']

with open(output_file, 'w', encoding='utf-8') as out:
    out.write("# Analysis of Receipt Data by Order Type\n\n")
    for folder in folders:
        out.write(f"## {folder}\n")
        folder_path = os.path.join(base_dir, folder)
        if not os.path.exists(folder_path):
            continue
            
        for filename in files_to_check:
            filepath = os.path.join(folder_path, filename)
            if os.path.exists(filepath):
                out.write(f"### {filename}\n")
                with open(filepath, 'r', encoding='utf-8', errors='ignore') as f:
                    content = f.read()
                    
                    # Extract <th> and label fields
                    matches = re.findall(r'<th[^>]*>(.*?)</th>', content, re.DOTALL | re.IGNORECASE)
                    if matches:
                        out.write("**Table Headers (th):**\n")
                        for match in matches:
                            clean_text = re.sub(r'<[^>]+>', '', match).strip()
                            clean_text = clean_text.replace('\n', ' ').strip()
                            if clean_text:
                                out.write(f"- {clean_text}\n")
                    
                    # Also try to find inputs with values (like in receipt_document.php)
                    # "استلمنا من السيد" or similar spans
                    spans = re.findall(r'<div class="data-title">.*?<span>(.*?)</span>', content, re.DOTALL | re.IGNORECASE)
                    if spans:
                        out.write("\n**Data Titles (span):**\n")
                        for span in spans:
                            clean_span = re.sub(r'<[^>]+>', '', span).strip()
                            if clean_span:
                                out.write(f"- {clean_span}\n")
                out.write("\n")

print(f"Analysis saved to {output_file}")