GSP-329: Integrate With Machine Learning APIs

GSP-329: Integrate With Machine Learning APIs

Overview

Run in Cloud Shell

1export SANAME=challenge
2gcloud iam service-accounts create $SANAME
3gcloud projects add-iam-policy-binding $DEVSHELL_PROJECT_ID --member=serviceAccount:$SANAME@$DEVSHELL_PROJECT_ID.iam.gserviceaccount.com --role=roles/bigquery.admin
4gcloud projects add-iam-policy-binding $DEVSHELL_PROJECT_ID --member=serviceAccount:$SANAME@$DEVSHELL_PROJECT_ID.iam.gserviceaccount.com --role=roles/storage.admin
5gcloud iam service-accounts keys create sa-key.json --iam-account $SANAME@$DEVSHELL_PROJECT_ID.iam.gserviceaccount.com
6export GOOGLE_APPLICATION_CREDENTIALS=${PWD}/sa-key.json
7gsutil cp gs://$DEVSHELL_PROJECT_ID/analyze-images.py .
  • Open Editor and replace the content of "analyze-images.py" file with
  1# Dataset: image_classification_dataset
  2
  3# Table name: image_text_detail
  4
  5import os
  6
  7import sys
  8
  9
 10
 11# Import Google Cloud Library modules
 12
 13from google.cloud import storage, bigquery, language, vision, translate_v2
 14
 15
 16
 17if ('GOOGLE_APPLICATION_CREDENTIALS' in os.environ):
 18
 19    if (not os.path.exists(os.environ['GOOGLE_APPLICATION_CREDENTIALS'])):
 20
 21        print ("The GOOGLE_APPLICATION_CREDENTIALS file does not exist.\n")
 22
 23        exit()
 24
 25else:
 26
 27    print ("The GOOGLE_APPLICATION_CREDENTIALS environment variable is not defined.\n")
 28
 29    exit()
 30
 31
 32
 33if len(sys.argv)<3:
 34
 35    print('You must provide parameters for the Google Cloud project ID and Storage bucket')
 36
 37    print ('python3 '+sys.argv[0]+ '[PROJECT_NAME] [BUCKET_NAME]')
 38
 39    exit()
 40
 41
 42
 43project_name = sys.argv[1]
 44
 45bucket_name = sys.argv[2]
 46
 47# Set up our GCS, BigQuery, and Natural Language clients
 48
 49storage_client = storage.Client()
 50
 51bq_client = bigquery.Client(project=project_name)
 52
 53nl_client = language.LanguageServiceClient()
 54
 55
 56
 57# Set up client objects for the vision and translate_v2 API Libraries
 58
 59vision_client = vision.ImageAnnotatorClient()
 60
 61translate_client = translate_v2.Client()
 62
 63
 64
 65# Setup the BigQuery dataset and table objects
 66
 67dataset_ref = bq_client.dataset('image_classification_dataset')
 68
 69dataset = bigquery.Dataset(dataset_ref)
 70
 71table_ref = dataset.table('image_text_detail')
 72
 73table = bq_client.get_table(table_ref)
 74
 75
 76
 77# Create an array to store results data to be inserted into the BigQuery table
 78
 79rows_for_bq = []
 80
 81
 82
 83# Get a list of the files in the Cloud Storage Bucket
 84
 85files = storage_client.bucket(bucket_name).list_blobs()
 86
 87bucket = storage_client.bucket(bucket_name)
 88
 89
 90
 91print('Processing image files from GCS. This will take a few minutes..')
 92
 93
 94
 95# Process files from Cloud Storage and save the result to send to BigQuery
 96
 97for file in files:
 98
 99    if file.name.endswith('jpg') or  file.name.endswith('png'):
100
101        file_content = file.download_as_string()
102
103
104
105        # TBD: Create a Vision API image object called image_object
106
107        # Ref: https://googleapis.dev/python/vision/latest/gapic/v1/types.html#google.cloud.vision_v1.types.Image
108
109        from google.cloud import vision_v1
110
111        import io
112
113        client = vision.ImageAnnotatorClient()
114
115
116
117
118
119        # TBD: Detect text in the image and save the response data into an object called response
120
121        # Ref: https://googleapis.dev/python/vision/latest/gapic/v1/api.html#google.cloud.vision_v1.ImageAnnotatorClient.document_text_detection
122
123        image = vision_v1.types.Image(content=file_content)
124
125        response = client.text_detection(image=image)
126
127
128
129        # Save the text content found by the vision API into a variable called text_data
130
131        text_data = response.text_annotations[0].description
132
133
134
135        # Save the text detection response data in <filename>.txt to cloud storage
136
137        file_name = file.name.split('.')[0] + '.txt'
138
139        blob = bucket.blob(file_name)
140
141        # Upload the contents of the text_data string variable to the Cloud Storage file
142
143        blob.upload_from_string(text_data, content_type='text/plain')
144
145
146
147        # Extract the description and locale data from the response file
148
149        # into variables called desc and locale
150
151        # using response object properties e.g. response.text_annotations[0].description
152
153        desc = response.text_annotations[0].description
154
155        locale = response.text_annotations[0].locale
156
157
158
159        # if the locale is English (en) save the description as the translated_txt
160
161        if locale == 'en':
162
163            translated_text = desc
164
165        else:
166
167            # TBD: For non EN locales pass the description data to the translation API
168
169            # ref: https://googleapis.dev/python/translation/latest/client.html#google.cloud.translate_v2.client.Client.translate
170
171            # Set the target_language locale to 'en')
172
173            from google.cloud import translate_v2 as translate
174
175
176
177            client = translate.Client()
178
179            translation = translate_client.translate(text_data, target_language='en')
180
181            translated_text = translation['translatedText']
182
183        print(translated_text)
184
185
186
187        # if there is response data save the original text read from the image,
188
189        # the locale, translated text, and filename
190
191        if len(response.text_annotations) > 0:
192
193            rows_for_bq.append((desc, locale, translated_text, file.name))
194
195
196
197print('Writing Vision API image data to BigQuery...')
198
199# Write original text, locale and translated text to BQ
200
201# TBD: When the script is working uncomment the next line to upload results to BigQuery
202
203errors = bq_client.insert_rows(table, rows_for_bq)
204
205
206
207assert errors == []
208
  • In Cloud Shell run
1python3 analyze-images.py $DEVSHELL_PROJECT_ID $DEVSHELL_PROJECT_ID
  • Navigation Menu -> BigQuery, Run
1SELECT locale,COUNT(locale) as lcount FROM image_classification_dataset.image_text_detail GROUP BY locale ORDER BY lcount DESC

Congratulations, you're all done with the lab 😄